From: npiggin@suse.de
To: akpm@linux-foundation.org
Cc: linux-mm@kvack.org
Subject: [patch 04/21] hugetlbfs: per mount huge page sizes
Date: Wed, 04 Jun 2008 21:29:43 +1000 [thread overview]
Message-ID: <20080604113111.521975017@amd.local0.net> (raw)
In-Reply-To: 20080604112939.789444496@amd.local0.net
[-- Attachment #1: hugetlbfs-per-mount-hstate.patch --]
[-- Type: text/plain, Size: 8108 bytes --]
Add the ability to configure the hugetlb hstate used on a per mount basis.
- Add a new pagesize= option to the hugetlbfs mount that allows setting
the page size
- This option causes the mount code to find the hstate corresponding to the
specified size, and sets up a pointer to the hstate in the mount's
superblock.
- Change the hstate accessors to use this information rather than the
global_hstate they were using (requires a slight change in mm/memory.c
so we don't NULL deref in the error-unmap path -- see comments).
[np: take hstate out of hugetlbfs inode and vma->vm_private_data]
Acked-by: Adam Litke <agl@us.ibm.com>
Acked-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
---
fs/hugetlbfs/inode.c | 48 ++++++++++++++++++++++++++++++++++++++----------
include/linux/hugetlb.h | 14 +++++++++-----
mm/hugetlb.c | 16 +++-------------
mm/memory.c | 18 ++++++++++++++++--
4 files changed, 66 insertions(+), 30 deletions(-)
Index: linux-2.6/include/linux/hugetlb.h
===================================================================
--- linux-2.6.orig/include/linux/hugetlb.h 2008-06-04 20:51:18.000000000 +1000
+++ linux-2.6/include/linux/hugetlb.h 2008-06-04 20:51:19.000000000 +1000
@@ -100,6 +100,7 @@ struct hugetlbfs_config {
umode_t mode;
long nr_blocks;
long nr_inodes;
+ struct hstate *hstate;
};
struct hugetlbfs_sb_info {
@@ -108,6 +109,7 @@ struct hugetlbfs_sb_info {
long max_inodes; /* inodes allowed */
long free_inodes; /* inodes free */
spinlock_t stat_lock;
+ struct hstate *hstate;
};
@@ -191,19 +193,21 @@ extern unsigned int default_hstate_idx;
#define default_hstate (hstates[default_hstate_idx])
-static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
+static inline struct hstate *hstate_inode(struct inode *i)
{
- return &default_hstate;
+ struct hugetlbfs_sb_info *hsb;
+ hsb = HUGETLBFS_SB(i->i_sb);
+ return hsb->hstate;
}
static inline struct hstate *hstate_file(struct file *f)
{
- return &default_hstate;
+ return hstate_inode(f->f_dentry->d_inode);
}
-static inline struct hstate *hstate_inode(struct inode *i)
+static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
{
- return &default_hstate;
+ return hstate_file(vma->vm_file);
}
static inline unsigned long huge_page_size(struct hstate *h)
Index: linux-2.6/fs/hugetlbfs/inode.c
===================================================================
--- linux-2.6.orig/fs/hugetlbfs/inode.c 2008-06-04 20:51:18.000000000 +1000
+++ linux-2.6/fs/hugetlbfs/inode.c 2008-06-04 20:51:19.000000000 +1000
@@ -53,6 +53,7 @@ int sysctl_hugetlb_shm_group;
enum {
Opt_size, Opt_nr_inodes,
Opt_mode, Opt_uid, Opt_gid,
+ Opt_pagesize,
Opt_err,
};
@@ -62,6 +63,7 @@ static match_table_t tokens = {
{Opt_mode, "mode=%o"},
{Opt_uid, "uid=%u"},
{Opt_gid, "gid=%u"},
+ {Opt_pagesize, "pagesize=%s"},
{Opt_err, NULL},
};
@@ -750,6 +752,8 @@ hugetlbfs_parse_options(char *options, s
char *p, *rest;
substring_t args[MAX_OPT_ARGS];
int option;
+ unsigned long long size = 0;
+ enum { NO_SIZE, SIZE_STD, SIZE_PERCENT } setsize = NO_SIZE;
if (!options)
return 0;
@@ -780,17 +784,13 @@ hugetlbfs_parse_options(char *options, s
break;
case Opt_size: {
- unsigned long long size;
/* memparse() will accept a K/M/G without a digit */
if (!isdigit(*args[0].from))
goto bad_val;
size = memparse(args[0].from, &rest);
- if (*rest == '%') {
- size <<= HPAGE_SHIFT;
- size *= max_huge_pages;
- do_div(size, 100);
- }
- pconfig->nr_blocks = (size >> HPAGE_SHIFT);
+ setsize = SIZE_STD;
+ if (*rest == '%')
+ setsize = SIZE_PERCENT;
break;
}
@@ -801,6 +801,19 @@ hugetlbfs_parse_options(char *options, s
pconfig->nr_inodes = memparse(args[0].from, &rest);
break;
+ case Opt_pagesize: {
+ unsigned long ps;
+ ps = memparse(args[0].from, &rest);
+ pconfig->hstate = size_to_hstate(ps);
+ if (!pconfig->hstate) {
+ printk(KERN_ERR
+ "hugetlbfs: Unsupported page size %lu MB\n",
+ ps >> 20);
+ return -EINVAL;
+ }
+ break;
+ }
+
default:
printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n",
p);
@@ -808,6 +821,18 @@ hugetlbfs_parse_options(char *options, s
break;
}
}
+
+ /* Do size after hstate is set up */
+ if (setsize > NO_SIZE) {
+ struct hstate *h = pconfig->hstate;
+ if (setsize == SIZE_PERCENT) {
+ size <<= huge_page_shift(h);
+ size *= h->max_huge_pages;
+ do_div(size, 100);
+ }
+ pconfig->nr_blocks = (size >> huge_page_shift(h));
+ }
+
return 0;
bad_val:
@@ -832,6 +857,7 @@ hugetlbfs_fill_super(struct super_block
config.uid = current->fsuid;
config.gid = current->fsgid;
config.mode = 0755;
+ config.hstate = &default_hstate;
ret = hugetlbfs_parse_options(data, &config);
if (ret)
return ret;
@@ -840,14 +866,15 @@ hugetlbfs_fill_super(struct super_block
if (!sbinfo)
return -ENOMEM;
sb->s_fs_info = sbinfo;
+ sbinfo->hstate = config.hstate;
spin_lock_init(&sbinfo->stat_lock);
sbinfo->max_blocks = config.nr_blocks;
sbinfo->free_blocks = config.nr_blocks;
sbinfo->max_inodes = config.nr_inodes;
sbinfo->free_inodes = config.nr_inodes;
sb->s_maxbytes = MAX_LFS_FILESIZE;
- sb->s_blocksize = HPAGE_SIZE;
- sb->s_blocksize_bits = HPAGE_SHIFT;
+ sb->s_blocksize = huge_page_size(config.hstate);
+ sb->s_blocksize_bits = huge_page_shift(config.hstate);
sb->s_magic = HUGETLBFS_MAGIC;
sb->s_op = &hugetlbfs_ops;
sb->s_time_gran = 1;
Index: linux-2.6/mm/hugetlb.c
===================================================================
--- linux-2.6.orig/mm/hugetlb.c 2008-06-04 20:51:18.000000000 +1000
+++ linux-2.6/mm/hugetlb.c 2008-06-04 20:51:19.000000000 +1000
@@ -1334,19 +1334,9 @@ void __unmap_hugepage_range(struct vm_ar
void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end, struct page *ref_page)
{
- /*
- * It is undesirable to test vma->vm_file as it should be non-null
- * for valid hugetlb area. However, vm_file will be NULL in the error
- * cleanup path of do_mmap_pgoff. When hugetlbfs ->mmap method fails,
- * do_mmap_pgoff() nullifies vma->vm_file before calling this function
- * to clean up. Since no pte has actually been setup, it is safe to
- * do nothing in this case.
- */
- if (vma->vm_file) {
- spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
- __unmap_hugepage_range(vma, start, end, ref_page);
- spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
- }
+ spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
+ __unmap_hugepage_range(vma, start, end, ref_page);
+ spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
}
/*
Index: linux-2.6/mm/memory.c
===================================================================
--- linux-2.6.orig/mm/memory.c 2008-06-04 20:51:18.000000000 +1000
+++ linux-2.6/mm/memory.c 2008-06-04 20:51:19.000000000 +1000
@@ -902,9 +902,23 @@ unsigned long unmap_vmas(struct mmu_gath
}
if (unlikely(is_vm_hugetlb_page(vma))) {
- unmap_hugepage_range(vma, start, end, NULL);
- zap_work -= (end - start) /
+ /*
+ * It is undesirable to test vma->vm_file as it
+ * should be non-null for valid hugetlb area.
+ * However, vm_file will be NULL in the error
+ * cleanup path of do_mmap_pgoff. When
+ * hugetlbfs ->mmap method fails,
+ * do_mmap_pgoff() nullifies vma->vm_file
+ * before calling this function to clean up.
+ * Since no pte has actually been setup, it is
+ * safe to do nothing in this case.
+ */
+ if (vma->vm_file) {
+ unmap_hugepage_range(vma, start, end, NULL);
+ zap_work -= (end - start) /
pages_per_huge_page(hstate_vma(vma));
+ }
+
start = end;
} else
start = unmap_page_range(*tlbp, vma,
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2008-06-04 11:29 UTC|newest]
Thread overview: 51+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-06-04 11:29 [patch 00/21] hugetlb patches resend npiggin
2008-06-04 11:29 ` [patch 01/21] hugetlb: factor out prep_new_huge_page npiggin
2008-06-04 11:29 ` [patch 02/21] hugetlb: modular state for hugetlb page size npiggin
2008-06-04 11:29 ` [patch 03/21] hugetlb: multiple hstates for multiple page sizes npiggin
2008-06-04 11:29 ` npiggin [this message]
2008-06-04 11:29 ` [patch 05/21] hugetlb: new sysfs interface npiggin
2008-06-08 18:59 ` Andrew Morton
2008-06-10 3:02 ` Nick Piggin
2008-06-12 1:11 ` Nishanth Aravamudan
2008-07-02 0:24 ` Nishanth Aravamudan
2008-06-20 15:18 ` Dave Hansen
2008-06-23 2:48 ` Nick Piggin
2008-06-23 3:31 ` Andrew Morton
2008-06-23 3:52 ` Nick Piggin
2008-06-04 11:29 ` [patch 06/21] hugetlb: abstract numa round robin selection npiggin
2008-06-04 11:29 ` [patch 07/21] mm: introduce non panic alloc_bootmem npiggin
2008-06-04 11:29 ` [patch 08/21] mm: export prep_compound_page to mm npiggin
2008-06-04 11:29 ` [patch 09/21] hugetlb: support larger than MAX_ORDER npiggin
2008-06-04 11:29 ` [patch 10/21] hugetlb: support boot allocate different sizes npiggin
2008-06-04 11:29 ` [patch 11/21] hugetlb: printk cleanup npiggin
2008-06-04 11:29 ` [patch 12/21] hugetlb: introduce pud_huge npiggin
2008-06-11 23:16 ` Andrew Morton
2008-06-12 0:45 ` Nick Piggin
2008-06-04 11:29 ` [patch 13/21] x86: support GB hugepages on 64-bit npiggin
2008-06-04 11:29 ` [patch 14/21] x86: add hugepagesz option " npiggin
2008-06-04 17:51 ` Randy Dunlap
2008-06-05 2:01 ` Nick Piggin
2008-06-04 11:29 ` [patch 15/21] hugetlb: override default huge page size npiggin
2008-06-09 10:41 ` Andrew Morton
2008-06-10 3:22 ` Nick Piggin
2008-06-04 11:29 ` [patch 16/21] hugetlb: allow arch overried hugepage allocation npiggin
2008-06-08 19:14 ` Andrew Morton
2008-06-10 3:26 ` Nick Piggin
2008-06-12 8:08 ` Andy Whitcroft
2008-06-04 11:29 ` [patch 17/21] powerpc: function to allocate gigantic hugepages npiggin
2008-06-04 11:29 ` [patch 18/21] powerpc: scan device tree for gigantic pages npiggin
2008-06-04 11:29 ` [patch 19/21] powerpc: define support for 16G hugepages npiggin
2008-06-08 19:05 ` Andrew Morton
2008-06-10 3:05 ` Nick Piggin
2008-06-04 11:29 ` [patch 20/21] fs: check for statfs overflow npiggin
2008-06-08 19:06 ` Andrew Morton
2008-06-10 3:12 ` Nick Piggin
2008-06-04 11:30 ` [patch 21/21] powerpc: support multiple hugepage sizes npiggin
2008-07-14 16:32 ` [patch] powerpc: hugetlb pgtable cache access cleanup Jon Tollefson
2008-07-14 16:32 ` Jon Tollefson
2008-07-14 22:56 ` Andrew Morton
2008-07-14 22:56 ` Andrew Morton
2008-07-15 22:49 ` [patch v2] " Jon Tollefson
2008-07-15 22:49 ` Jon Tollefson
2008-07-15 22:57 ` Andrew Morton
2008-07-15 22:57 ` Andrew Morton
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080604113111.521975017@amd.local0.net \
--to=npiggin@suse.de \
--cc=akpm@linux-foundation.org \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.