All of lore.kernel.org
 help / color / mirror / Atom feed
From: Nishanth Aravamudan <nacc@us.ibm.com>
To: npiggin@suse.de
Cc: linux-mm@kvack.org, kniht@us.ibm.com, andi@firstfloor.org,
	agl@us.ibm.com, abh@cray.com, joachim.deguara@amd.com,
	greg@kroah.com
Subject: [RFC][PATCH 1/2] hugetlb: present information in sysfs
Date: Wed, 28 May 2008 23:39:15 -0700	[thread overview]
Message-ID: <20080529063915.GC11357@us.ibm.com> (raw)
In-Reply-To: <20080525143452.841211000@nick.local0.net>

While the procfs presentation of the hstate counters has tried to be as
backwards compatible as possible, I do not believe trying to maintain
all of the information in the same files is a good long-term plan. This
particularly matters for architectures that can support many hugepage
sizes (sparc64 might be one). Even with the three potential pagesizes on
power (64k, 16m and 16g), I found the proc interface to be a little
awkward.

Instead, migrate the information to sysfs in a new directory,
/sys/kernel/hugepages. Underneath that directory there will be a
directory per-supported hugepage size, e.g.:

/sys/kernel/hugepages/hugepages-64
/sys/kernel/hugepages/hugepages-16384
/sys/kernel/hugepages/hugepages-16777216

corresponding to 64k, 16m and 16g respectively. Within each
hugepages-size directory there are a number of files, corresponding to
the tracked counters in the hstate, e.g.:

/sys/kernel/hugepages/hugepages-64/nr_hugepages
/sys/kernel/hugepages/hugepages-64/nr_overcommit_hugepages
/sys/kernel/hugepages/hugepages-64/free_hugepages
/sys/kernel/hugepages/hugepages-64/resv_hugepages
/sys/kernel/hugepages/hugepages-64/surplus_hugepages

Of these files, the first two are read-write and the latter three are
read-only. The size of the hugepage being manipulated is trivially
deducible from the enclosing directory and is always expressed in kB (to
match meminfo).

Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>

---
Nick, I tested this patch and the following one at this point the
series, that is between patches 7 and 8. This does require a few compile
fixes/patch modifications in the later parts of the series. If we decide
that 2/2 is undesirable, there will be fewer of those and 1/2 could also
apply at the end, with less work. I can send you that diff, if you'd
prefer.

Greg, I didn't hear back from you on the last posting of this patch. Not
intended as a complaint, just an indication of why I didn't make any
changes relative to that version. Does this seem like a reasonable
patch as far as using the sysfs API? I realize a follow-on patch will be
needed to updated Documentation/ABI.

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 36624f1..3fe461d 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -169,6 +169,7 @@ struct hstate {
 	unsigned int nr_huge_pages_node[MAX_NUMNODES];
 	unsigned int free_huge_pages_node[MAX_NUMNODES];
 	unsigned int surplus_huge_pages_node[MAX_NUMNODES];
+	char name[32];
 };
 
 void __init hugetlb_add_hstate(unsigned order);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 080f17a..da7a4aa 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -14,6 +14,7 @@
 #include <linux/mempolicy.h>
 #include <linux/cpuset.h>
 #include <linux/mutex.h>
+#include <linux/sysfs.h>
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -578,67 +579,6 @@ static void __init report_hugepages(void)
 	}
 }
 
-static int __init hugetlb_init(void)
-{
-	BUILD_BUG_ON(HPAGE_SHIFT == 0);
-
-	if (!size_to_hstate(HPAGE_SIZE)) {
-		hugetlb_add_hstate(HUGETLB_PAGE_ORDER);
-		parsed_hstate->max_huge_pages = default_hstate_max_huge_pages;
-	}
-
-	hugetlb_init_hstates();
-
-	report_hugepages();
-
-	return 0;
-}
-module_init(hugetlb_init);
-
-/* Should be called on processing a hugepagesz=... option */
-void __init hugetlb_add_hstate(unsigned order)
-{
-	struct hstate *h;
-	if (size_to_hstate(PAGE_SIZE << order)) {
-		printk(KERN_WARNING "hugepagesz= specified twice, ignoring\n");
-		return;
-	}
-	BUG_ON(max_hstate >= HUGE_MAX_HSTATE);
-	BUG_ON(order == 0);
-	h = &hstates[max_hstate++];
-	h->order = order;
-	h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1);
-	hugetlb_init_one_hstate(h);
-	parsed_hstate = h;
-}
-
-static int __init hugetlb_setup(char *s)
-{
-	unsigned long *mhp;
-
-	if (!max_hstate)
-		mhp = &default_hstate_max_huge_pages;
-	else
-		mhp = &parsed_hstate->max_huge_pages;
-
-	if (sscanf(s, "%lu", mhp) <= 0)
-		*mhp = 0;
-
-	return 1;
-}
-__setup("hugepages=", hugetlb_setup);
-
-static unsigned int cpuset_mems_nr(unsigned int *array)
-{
-	int node;
-	unsigned int nr = 0;
-
-	for_each_node_mask(node, cpuset_current_mems_allowed)
-		nr += array[node];
-
-	return nr;
-}
-
 #ifdef CONFIG_SYSCTL
 #ifdef CONFIG_HIGHMEM
 static void try_to_free_low(struct hstate *h, unsigned long count)
@@ -740,6 +680,229 @@ out:
 	return ret;
 }
 
+#ifdef CONFIG_SYSFS
+#define HSTATE_ATTR_RO(_name) \
+	static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
+
+#define HSTATE_ATTR(_name) \
+	static struct kobj_attribute _name##_attr = \
+		__ATTR(_name, 0644, _name##_show, _name##_store)
+
+static struct kobject *hugepages_kobj;
+static struct kobject *hstate_kobjs[HUGE_MAX_HSTATE];
+
+static struct hstate *kobj_to_hstate(struct kobject *kobj)
+{
+	int i;
+	for (i = 0; i < HUGE_MAX_HSTATE; i++)
+		if (hstate_kobjs[i] == kobj)
+			return &hstates[i];
+	BUG();
+	return NULL;
+}
+
+static ssize_t nr_hugepages_show(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	struct hstate *h = kobj_to_hstate(kobj);
+	return sprintf(buf, "%lu\n", h->nr_huge_pages);
+}
+static ssize_t nr_hugepages_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	int tmp, err;
+	unsigned long input;
+	struct hstate *h = kobj_to_hstate(kobj);
+
+	err = strict_strtoul(buf, 10, &input);
+	if (err)
+		return 0;
+
+	h->max_huge_pages = set_max_huge_pages(h, input, &tmp);
+	max_huge_pages[h - hstates] = h->max_huge_pages;
+
+	return count;
+}
+HSTATE_ATTR(nr_hugepages);
+
+static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	struct hstate *h = kobj_to_hstate(kobj);
+	return sprintf(buf, "%lu\n", h->nr_overcommit_huge_pages);
+}
+static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	int err;
+	unsigned long input;
+	struct hstate *h = kobj_to_hstate(kobj);
+
+	err = strict_strtoul(buf, 10, &input);
+	if (err)
+		return 0;
+
+	spin_lock(&hugetlb_lock);
+	h->nr_overcommit_huge_pages = input;
+	sysctl_overcommit_huge_pages[h - hstates] = h->nr_overcommit_huge_pages;
+	spin_unlock(&hugetlb_lock);
+
+	return count;
+}
+HSTATE_ATTR(nr_overcommit_hugepages);
+
+static ssize_t free_hugepages_show(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	struct hstate *h = kobj_to_hstate(kobj);
+	return sprintf(buf, "%lu\n", h->free_huge_pages);
+}
+HSTATE_ATTR_RO(free_hugepages);
+
+static ssize_t resv_hugepages_show(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	struct hstate *h = kobj_to_hstate(kobj);
+	return sprintf(buf, "%lu\n", h->resv_huge_pages);
+}
+HSTATE_ATTR_RO(resv_hugepages);
+
+static ssize_t surplus_hugepages_show(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	struct hstate *h = kobj_to_hstate(kobj);
+	return sprintf(buf, "%lu\n", h->surplus_huge_pages);
+}
+HSTATE_ATTR_RO(surplus_hugepages);
+
+static struct attribute *hstate_attrs[] = {
+	&nr_hugepages_attr.attr,
+	&nr_overcommit_hugepages_attr.attr,
+	&free_hugepages_attr.attr,
+	&resv_hugepages_attr.attr,
+	&surplus_hugepages_attr.attr,
+	NULL,
+};
+
+static struct attribute_group hstate_attr_group = {
+	.attrs = hstate_attrs,
+};
+
+static int __init hugetlb_sysfs_add_hstate(struct hstate *h)
+{
+	int retval;
+
+	hstate_kobjs[h - hstates] = kobject_create_and_add(h->name,
+							hugepages_kobj);
+	if (!hstate_kobjs[h - hstates])
+		return -ENOMEM;
+
+	retval = sysfs_create_group(hstate_kobjs[h - hstates],
+							&hstate_attr_group);
+	if (retval)
+		kobject_put(hstate_kobjs[h - hstates]);
+
+	return retval;
+}
+
+static void __init hugetlb_sysfs_init(void)
+{
+	struct hstate *h;
+	int err;
+
+	hugepages_kobj = kobject_create_and_add("hugepages", kernel_kobj);
+	if (!hugepages_kobj)
+		return;
+
+	for_each_hstate(h) {
+		err = hugetlb_sysfs_add_hstate(h);
+		if (err)
+			printk(KERN_ERR "Hugetlb: Unable to add hstate %s",
+								h->name);
+	}
+}
+#else
+static void __init hugetlb_sysfs_init(void)
+{
+}
+#endif
+
+static int __init hugetlb_init(void)
+{
+	BUILD_BUG_ON(HPAGE_SHIFT == 0);
+
+	if (!size_to_hstate(HPAGE_SIZE)) {
+		hugetlb_add_hstate(HUGETLB_PAGE_ORDER);
+		parsed_hstate->max_huge_pages = default_hstate_max_huge_pages;
+	}
+
+	hugetlb_init_hstates();
+
+	report_hugepages();
+
+	hugetlb_sysfs_init();
+
+	return 0;
+}
+module_init(hugetlb_init);
+
+static void __exit hugetlb_exit(void)
+{
+	struct hstate *h;
+
+	for_each_hstate(h) {
+		kobject_put(hstate_kobjs[h - hstates]);
+	}
+
+	kobject_put(hugepages_kobj);
+}
+module_exit(hugetlb_exit);
+
+/* Should be called on processing a hugepagesz=... option */
+void __init hugetlb_add_hstate(unsigned order)
+{
+	struct hstate *h;
+	if (size_to_hstate(PAGE_SIZE << order)) {
+		printk(KERN_WARNING "hugepagesz= specified twice, ignoring\n");
+		return;
+	}
+	BUG_ON(max_hstate >= HUGE_MAX_HSTATE);
+	BUG_ON(order == 0);
+	h = &hstates[max_hstate++];
+	h->order = order;
+	h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1);
+	snprintf(h->name, 32, "hugepages-%lu", huge_page_size(h)/1024);
+	hugetlb_init_one_hstate(h);
+	parsed_hstate = h;
+}
+
+static int __init hugetlb_setup(char *s)
+{
+	unsigned long *mhp;
+
+	if (!max_hstate)
+		mhp = &default_hstate_max_huge_pages;
+	else
+		mhp = &parsed_hstate->max_huge_pages;
+
+	if (sscanf(s, "%lu", mhp) <= 0)
+		*mhp = 0;
+
+	return 1;
+}
+__setup("hugepages=", hugetlb_setup);
+
+static unsigned int cpuset_mems_nr(unsigned int *array)
+{
+	int node;
+	unsigned int nr = 0;
+
+	for_each_node_mask(node, cpuset_current_mems_allowed)
+		nr += array[node];
+
+	return nr;
+}
+
 int hugetlb_sysctl_handler(struct ctl_table *table, int write,
 			   struct file *file, void __user *buffer,
 			   size_t *length, loff_t *ppos)

-- 
Nishanth Aravamudan <nacc@us.ibm.com>
IBM Linux Technology Center

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2008-05-29  6:41 UTC|newest]

Thread overview: 95+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-05-25 14:23 [patch 00/23] multi size, giant hugetlb support, 1GB for x86, 16GB for powerpc npiggin
2008-05-25 14:23 ` [patch 01/23] hugetlb: fix lockdep error npiggin
2008-05-27 16:30   ` Nishanth Aravamudan
2008-05-27 19:55   ` Adam Litke
2008-05-25 14:23 ` [patch 02/23] hugetlb: factor out huge_new_page npiggin
2008-05-27 16:31   ` Nishanth Aravamudan
2008-05-27 20:03   ` Adam Litke
2008-05-25 14:23 ` [patch 03/23] hugetlb: modular state npiggin
2008-05-27 16:44   ` Nishanth Aravamudan
2008-05-28  8:40     ` Nick Piggin
2008-05-27 20:38   ` Adam Litke
2008-05-28  9:13     ` Nick Piggin
2008-05-25 14:23 ` [patch 04/23] hugetlb: multiple hstates npiggin
2008-05-27 16:52   ` Nishanth Aravamudan
2008-05-27 20:43   ` Adam Litke
2008-05-25 14:23 ` [patch 05/23] hugetlb: multi hstate proc files npiggin
2008-05-29  5:07   ` Nishanth Aravamudan
2008-05-29  5:44     ` Nishanth Aravamudan
2008-05-29  6:30       ` Nishanth Aravamudan
2008-05-29  9:04     ` Nick Piggin
2008-05-25 14:23 ` [patch 06/23] hugetlbfs: per mount hstates npiggin
2008-05-27 16:58   ` Nishanth Aravamudan
2008-05-27 20:50   ` Adam Litke
2008-05-25 14:23 ` [patch 07/23] hugetlb: multi hstate sysctls npiggin
2008-05-27 21:00   ` Adam Litke
2008-05-28  9:59     ` Nick Piggin
2008-05-29  4:59   ` Nishanth Aravamudan
2008-05-29  5:36     ` Nishanth Aravamudan
2008-05-29  8:59     ` Nick Piggin
2008-05-29  6:39   ` Nishanth Aravamudan [this message]
2008-05-29  6:42     ` [RFC][PATCH 2/2] hugetlb: remove multi-valued proc files Nishanth Aravamudan
2008-05-30  3:51       ` Nick Piggin
2008-05-30  7:43         ` Nishanth Aravamudan
2008-05-30  2:58     ` [RFC][PATCH 1/2] hugetlb: present information in sysfs Greg KH
2008-05-30  3:37       ` Nick Piggin
2008-05-30  4:21         ` Greg KH
2008-05-30  4:28           ` Nick Piggin
2008-05-30  7:44           ` Nishanth Aravamudan
2008-05-30  7:41         ` Nishanth Aravamudan
2008-05-30 13:40         ` Adam Litke
2008-05-30  7:39       ` Nishanth Aravamudan
2008-05-25 14:23 ` [patch 08/23] hugetlb: abstract numa round robin selection npiggin
2008-05-27 17:01   ` Nishanth Aravamudan
2008-05-27 21:02   ` Adam Litke
2008-05-25 14:23 ` [patch 09/23] mm: introduce non panic alloc_bootmem npiggin
2008-05-25 14:23 ` [patch 10/23] mm: export prep_compound_page to mm npiggin
2008-05-27 21:05   ` Adam Litke
2008-05-25 14:23 ` [patch 11/23] hugetlb: support larger than MAX_ORDER npiggin
2008-05-27 21:23   ` Adam Litke
2008-05-28 10:22     ` Nick Piggin
2008-05-25 14:23 ` [patch 12/23] hugetlb: support boot allocate different sizes npiggin
2008-05-27 17:04   ` Nishanth Aravamudan
2008-05-27 21:28   ` Adam Litke
2008-05-28 10:57     ` Nick Piggin
2008-05-28 14:01       ` Nick Piggin
2008-05-28 14:35         ` Adam Litke
2008-05-25 14:23 ` [patch 13/23] hugetlb: printk cleanup npiggin
2008-05-27 17:05   ` Nishanth Aravamudan
2008-05-27 21:30   ` Adam Litke
2008-05-25 14:23 ` [patch 14/23] hugetlb: introduce huge_pud npiggin
2008-05-26 11:09   ` Hugh Dickins
2008-05-27  2:24     ` Nick Piggin
2008-05-25 14:23 ` [patch 15/23] x86: support GB hugepages on 64-bit npiggin
2008-05-27 21:35   ` Adam Litke
2008-05-25 14:23 ` [patch 16/23] x86: add hugepagesz option " npiggin
2008-05-25 14:23 ` [patch 17/23] hugetlb: do not always register default HPAGE_SIZE huge page size npiggin
2008-05-27 21:39   ` Adam Litke
2008-05-25 14:23 ` [patch 18/23] hugetlb: allow arch overried hugepage allocation npiggin
2008-05-27 21:41   ` Adam Litke
2008-05-25 14:23 ` [patch 19/23] powerpc: function to allocate gigantic hugepages npiggin
2008-05-27 21:44   ` Adam Litke
2008-05-25 14:23 ` [patch 20/23] powerpc: scan device tree for gigantic pages npiggin
2008-05-27 21:47   ` Adam Litke
2008-05-25 14:23 ` [patch 21/23] powerpc: define support for 16G hugepages npiggin
2008-05-25 14:23 ` [patch 22/23] fs: check for statfs overflow npiggin
2008-05-27 17:14   ` Nishanth Aravamudan
2008-05-27 17:19     ` Jon Tollefson
2008-05-28  9:02       ` Nick Piggin
2008-05-28  9:02         ` Nick Piggin
2008-05-29 23:56         ` Andreas Dilger
2008-05-29 23:56           ` Andreas Dilger
2008-05-30  0:12           ` Nishanth Aravamudan
2008-05-30  0:12             ` Nishanth Aravamudan
2008-05-30  1:14           ` Nick Piggin
2008-05-30  1:14             ` Nick Piggin
2008-06-02  3:16             ` Andreas Dilger
2008-06-02  3:16               ` Andreas Dilger
2008-06-03  3:27               ` Nick Piggin
2008-06-03  3:27                 ` Nick Piggin
2008-06-03 17:17                 ` Andreas Dilger
2008-06-03 17:17                   ` Andreas Dilger
2008-05-25 14:23 ` [patch 23/23] powerpc: support multiple hugepage sizes npiggin
2008-05-27 17:14   ` Nishanth Aravamudan
2008-05-28  8:49     ` Nick Piggin
2008-05-25 14:42 ` [patch 00/23] multi size, giant hugetlb support, 1GB for x86, 16GB for powerpc Nick Piggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080529063915.GC11357@us.ibm.com \
    --to=nacc@us.ibm.com \
    --cc=abh@cray.com \
    --cc=agl@us.ibm.com \
    --cc=andi@firstfloor.org \
    --cc=greg@kroah.com \
    --cc=joachim.deguara@amd.com \
    --cc=kniht@us.ibm.com \
    --cc=linux-mm@kvack.org \
    --cc=npiggin@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.