public inbox for linux-ia64@vger.kernel.org
 help / color / mirror / Atom feed
* HUGEPAGE SIZE a boottime option
@ 2004-02-20  1:07 Jack Steiner
  2004-02-20  2:35 ` David Mosberger
                   ` (13 more replies)
  0 siblings, 14 replies; 15+ messages in thread
From: Jack Steiner @ 2004-02-20  1:07 UTC (permalink / raw)
  To: linux-ia64


Here is a preliminary version of a patch that makes the size of
HUGEPAGES a boottime option. Only ia64-specific files are changed (except
for the Documentation file).

We have a number of customers using large pages. Unfortunately, 
the "optimum" size of a large page is application & configuration
dependent. Rather that having each customer recompile to specify their
own HUGEPAGESIZE, this patch make the size a boottime option.


Does this patch look reasonable? If so, I will incorporate feedback,
finish testing it, update it to 2.6.3 & resubmit.

-------------------------------------------------------------------------

Based on 2.6.2-mm.

--- linux.base/Documentation/vm/hugetlbpage.txt	Tue Feb  3 21:43:11 2004
+++ linux/Documentation/vm/hugetlbpage.txt	Thu Feb 19 16:17:16 2004
@@ -63,6 +63,22 @@
 kernel to request huge pages early in the boot process (when the possibility
 of getting physical contiguous pages is still very high).
 
+Another option is to add the following boot command line option:
+
+	hugepages=xxxx
+
+The value xxxx specifies the number of hugepages that should be reserved
+at boot time.                                                          
+
+Some architectures (ia64) support an additional boot line option to specify
+the hugepage size at boot time. If this option is not specified, the hugepage 
+size defaults to the value specified in the .config file.
+
+	hugepagesz=xxxx                                                                                                            
+
+The value of xxxx is any valid pagesize (256k, ... 256m, etc).
+
+
 If the user applications are going to request hugepages using mmap system
 call, then it is required that system administrator mount a file system of
 type hugetlbfs:


--- linux.base/arch/ia64/kernel/ivt.S	Tue Feb  3 21:43:15 2004
+++ linux/arch/ia64/kernel/ivt.S	Thu Feb 19 16:10:01 2004
@@ -118,10 +118,8 @@
 #ifdef CONFIG_HUGETLB_PAGE
 	extr.u r26=r25,2,6
 	;;
-	cmp.eq p8,p0=HPAGE_SHIFT,r26
-	;;
+	UPDATE_HPAGE_REGISTERS(p8, r26, r22)	// if hugepage, set p8 & update pte index
 (p8)	dep r25=r18,r25,2,6
-(p8)	shr r22=r22,HPAGE_SHIFT-PAGE_SHIFT
 #endif
 	;;
 	cmp.eq p6,p7=5,r17			// is IFA pointing into to region 5?


--- linux.base/arch/ia64/kernel/patch.c	Tue Feb  3 21:44:04 2004
+++ linux/arch/ia64/kernel/patch.c	Thu Feb 19 15:39:02 2004
@@ -107,6 +107,24 @@
 	ia64_srlz_i();
 }
 
+/*
+ * Patch the vhpt_miss handler with the actual size of huge pages.
+ */
+void __init
+ia64_patch_hugepage(unsigned long addr, long hpageshift)
+{
+	s32 *offp = (s32 *) addr;
+	u64 ip;
+	
+	ip = (u64) offp + *offp;
+	ia64_patch(ip, 0x000fe000UL, hpageshift << 13);
+	ia64_patch(ip+2, 0xfc0fc000UL, ((64-hpageshift+PAGE_SHIFT-1) << 27) 
+		| ((hpageshift-PAGE_SHIFT) << 14));
+
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+
 void
 ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
 {


--- linux.base/arch/ia64/kernel/vmlinux.lds.S	Tue Feb  3 21:44:27 2004
+++ linux/arch/ia64/kernel/vmlinux.lds.S	Thu Feb 19 09:01:31 2004
@@ -56,6 +56,13 @@
 	  __stop___ex_table = .;
 	}
 
+  .data.patch.hugepage : AT(ADDR(.data.patch.hugepage) - LOAD_OFFSET)
+	{
+	  __start___hugepage_patchlist = .;
+	  *(.data.patch.hugepage)
+	  __end___hugepage_patchlist = .;
+	}
+
   .data.patch.vtop : AT(ADDR(.data.patch.vtop) - LOAD_OFFSET)
 	{
 	  __start___vtop_patchlist = .;


--- linux.base/arch/ia64/mm/hugetlbpage.c	Tue Feb  3 21:43:49 2004
+++ linux/arch/ia64/mm/hugetlbpage.c	Thu Feb 19 12:25:51 2004
@@ -13,16 +13,20 @@
 #include <linux/smp_lock.h>
 #include <linux/slab.h>
 #include <linux/sysctl.h>
+#include <linux/module.h>
 #include <asm/mman.h>
 #include <asm/pgalloc.h>
+#include <asm/pal.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
+#include <asm/sections.h>
 
 #define TASK_HPAGE_BASE (REGION_HPAGE << REGION_SHIFT)
 
 static long	htlbpagemem;
 int		htlbpage_max;
 static long	htlbzone_pages;
+int 		hpage_shift=HPAGE_SHIFT_DEFAULT;
 
 static struct list_head hugepage_freelists[MAX_NUMNODES];
 static spinlock_t htlbpage_lock = SPIN_LOCK_UNLOCKED;
@@ -520,6 +524,35 @@
 }
 __setup("hugepages=", hugetlb_setup);
 
+static int __init hugetlb_setup_sz(char *s)
+{
+	long long size;
+	u64 tr_pages;
+	long pageshift;
+	char *rest;
+	extern void ia64_patch_hugepage(unsigned long, long);
+
+	if (ia64_pal_vm_page_size(&tr_pages, 0) !=0)
+		return 1;
+	size = memparse(s, &rest);
+	if (*rest || (size & (size-1)))
+		goto bad;
+
+	pageshift = __ffs(size);
+	if (!(tr_pages & (1UL<<pageshift)))
+		goto bad;
+
+	hpage_shift = pageshift;
+	ia64_patch_hugepage((u64)__start___hugepage_patchlist, pageshift);
+	return 1;
+
+
+bad:
+	printk(KERN_WARNING "%s is not a valid huge page size\n", s);
+	return 1;
+}
+__setup("hugepagesz=", hugetlb_setup_sz);
+
 static int __init hugetlb_init(void)
 {
 	int i;
@@ -537,7 +570,7 @@
 		spin_unlock(&htlbpage_lock);
 	}
 	htlbpage_max = htlbpagemem = htlbzone_pages = i;
-	printk("Total HugeTLB memory allocated, %ld\n", htlbpagemem);
+	printk("Total HugeTLB memory allocated %ld pages, pagesize %ldkB\n", htlbpagemem, (1UL<<(hpage_shift-10)));
 	return 0;
 }
 module_init(hugetlb_init);


--- linux.base/include/asm-ia64/asmmacro.h	Tue Feb  3 21:43:19 2004
+++ linux/include/asm-ia64/asmmacro.h	Thu Feb 19 16:11:11 2004
@@ -64,6 +64,25 @@
 	.xdata4 ".data.patch.vtop", 1b-.
 
 /*
+ * If faulted pagesize is a HUGE page, shift pte index by
+ * the difference in base pagesize & hugepagesize.
+ * 	pr - predicate register to set TRUE if huge page
+ * 	ps - register that holds faulted pagesize
+ * 	idx - pte index
+ */
+	.section ".data.patch.hugepage", "a"	// declare section & section attributes
+	.previous
+
+#define UPDATE_HPAGE_REGISTERS(pr, ps, idx)	\
+[1:]{ .mmi;					\
+     cmp.eq pr,p0=HPAGE_SHIFT_DEFAULT,ps;;	\
+     nop.m 0;					\
+(pr) shr idx=idx,HPAGE_SHIFT_DEFAULT-PAGE_SHIFT;\
+};						\
+	.xdata4 ".data.patch.hugepage", 1b-.
+
+
+/*
  * For now, we always put in the McKinley E9 workaround.  On CPUs that don't need it,
  * we'll patch out the work-around bundles with NOPs, so their impact is minimal.
  */


--- linux.base/include/asm-ia64/page.h	Tue Feb  3 21:43:11 2004
+++ linux/include/asm-ia64/page.h	Wed Feb 18 16:21:58 2004
@@ -39,24 +39,25 @@
 #ifdef CONFIG_HUGETLB_PAGE
 
 # if defined(CONFIG_HUGETLB_PAGE_SIZE_4GB)
-#  define HPAGE_SHIFT	32
+#  define HPAGE_SHIFT_DEFAULT	32
 # elif defined(CONFIG_HUGETLB_PAGE_SIZE_1GB)
-#  define HPAGE_SHIFT	30
+#  define HPAGE_SHIFT_DEFAULT	30
 # elif defined(CONFIG_HUGETLB_PAGE_SIZE_256MB)
-#  define HPAGE_SHIFT	28
+#  define HPAGE_SHIFT_DEFAULT	28
 # elif defined(CONFIG_HUGETLB_PAGE_SIZE_64MB)
-#  define HPAGE_SHIFT	26
+#  define HPAGE_SHIFT_DEFAULT	26
 # elif defined(CONFIG_HUGETLB_PAGE_SIZE_16MB)
-#  define HPAGE_SHIFT	24
+#  define HPAGE_SHIFT_DEFAULT	24
 # elif defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
-#  define HPAGE_SHIFT	22
+#  define HPAGE_SHIFT_DEFAULT	22
 # elif defined(CONFIG_HUGETLB_PAGE_SIZE_1MB)
-#  define HPAGE_SHIFT	20
+#  define HPAGE_SHIFT_DEFAULT	20
 # elif defined(CONFIG_HUGETLB_PAGE_SIZE_256KB)
-#  define HPAGE_SHIFT	18
+#  define HPAGE_SHIFT_DEFAULT	18
 # else
 #  error Unsupported IA-64 HugeTLB Page Size!
 # endif
+#define HPAGE_SHIFT hpage_shift
 
 # define REGION_HPAGE	(4UL)	/* note: this is hardcoded in mmu_context.h:reload_context()!*/
 # define REGION_SHIFT	61
@@ -75,6 +76,7 @@
 
 extern void clear_page (void *page);
 extern void copy_page (void *to, void *from);
+extern int hpage_shift;
 
 /*
  * clear_user_page() and copy_user_page() can't be inline functions because


--- linux.base/include/asm-ia64/sections.h	Tue Feb  3 21:43:47 2004
+++ linux/include/asm-ia64/sections.h	Thu Feb 19 12:22:19 2004
@@ -10,6 +10,7 @@
 
 extern char __per_cpu_start[], __per_cpu_end[], __phys_per_cpu_start[];
 extern char __start___vtop_patchlist[], __end___vtop_patchlist[];
+extern char __start___hugepage_patchlist[], __end___hugepage_patchlist[];
 extern char __start___mckinley_e9_bundles[], __end___mckinley_e9_bundles[];
 extern char __start_gate_section[];
 extern char __start_gate_mckinley_e9_patchlist[], __end_gate_mckinley_e9_patchlist[];
-- 
Thanks

Jack Steiner (steiner@sgi.com)          651-683-5302
Principal Engineer                      SGI - Silicon Graphics, Inc.



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: HUGEPAGE SIZE a boottime option
  2004-02-20  1:07 HUGEPAGE SIZE a boottime option Jack Steiner
@ 2004-02-20  2:35 ` David Mosberger
  2004-02-20  4:00 ` Chen, Kenneth W
                   ` (12 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: David Mosberger @ 2004-02-20  2:35 UTC (permalink / raw)
  To: linux-ia64

>>>>> On Thu, 19 Feb 2004 19:07:31 -0600, Jack Steiner <steiner@sgi.com> said:

  Jack> Here is a preliminary version of a patch that makes the size
  Jack> of HUGEPAGES a boottime option. Only ia64-specific files are
  Jack> changed (except for the Documentation file).

  Jack> We have a number of customers using large
  Jack> pages. Unfortunately, the "optimum" size of a large page is
  Jack> application & configuration dependent. Rather that having each
  Jack> customer recompile to specify their own HUGEPAGESIZE, this
  Jack> patch make the size a boottime option.

  Jack> Does this patch look reasonable? If so, I will incorporate feedback,
  Jack> finish testing it, update it to 2.6.3 & resubmit.

Looks neat to me.  But, man, do lose that trailing whitespace!
(especially in Kconfig---all 166 characters of it! ;-)

Rohit?

	--david

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: HUGEPAGE SIZE a boottime option
  2004-02-20  1:07 HUGEPAGE SIZE a boottime option Jack Steiner
  2004-02-20  2:35 ` David Mosberger
@ 2004-02-20  4:00 ` Chen, Kenneth W
  2004-02-20 19:36 ` Seth, Rohit
                   ` (11 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Chen, Kenneth W @ 2004-02-20  4:00 UTC (permalink / raw)
  To: linux-ia64

It is not functionally complete though.  alloc_fresh_huge_page(), hugetlb_free_pgtables(), and update_and_free_page has #define constant that indirectly from HPAGE_SHIFT.

You might checked already, text replication works in this case?

I have more comments, will post later ...

- Ken


-----Original Message-----
From: linux-ia64-owner@vger.kernel.org
[mailto:linux-ia64-owner@vger.kernel.org]On Behalf Of Jack Steiner
Sent: Thursday, February 19, 2004 5:08 PM
To: linux-ia64@vger.kernel.org
Subject: HUGEPAGE SIZE a boottime option


Here is a preliminary version of a patch that makes the size of
HUGEPAGES a boottime option. Only ia64-specific files are changed (except
for the Documentation file).

We have a number of customers using large pages. Unfortunately, 
the "optimum" size of a large page is application & configuration
dependent. Rather that having each customer recompile to specify their
own HUGEPAGESIZE, this patch make the size a boottime option.


Does this patch look reasonable? If so, I will incorporate feedback,
finish testing it, update it to 2.6.3 & resubmit.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: HUGEPAGE SIZE a boottime option
  2004-02-20  1:07 HUGEPAGE SIZE a boottime option Jack Steiner
  2004-02-20  2:35 ` David Mosberger
  2004-02-20  4:00 ` Chen, Kenneth W
@ 2004-02-20 19:36 ` Seth, Rohit
  2004-02-22  5:27 ` Chris Wedgwood
                   ` (10 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Seth, Rohit @ 2004-02-20 19:36 UTC (permalink / raw)
  To: linux-ia64

Recently there have been couple of feature extensions requests for
hugetlb support.  Boot time option is one of them.  Run-time
modification of HUGE_PAGE_SIZE is another one.

Ken is currently in the process of providing the support for boot time
configuration for huge page size.  As long as there are only two
supported page sizes (PAGE_SIZE and HPAGE_SIZE), it is possible to
achieve the boot time configuration of HUGE_PAGE_SIZE without needing to
patch the kernel (and possibly better packing the vhpt fault handler
code).  We will ship the patch out to the list.

Thanks, rohit

_>-----Original Message-----
_>From: linux-ia64-owner@vger.kernel.org [mailto:linux-ia64-
_>owner@vger.kernel.org] On Behalf Of Chen, Kenneth W
_>Sent: Thursday, February 19, 2004 8:00 PM
_>To: Jack Steiner; linux-ia64@vger.kernel.org
_>Subject: RE: HUGEPAGE SIZE a boottime option
_>
_>It is not functionally complete though.  alloc_fresh_huge_page(),
_>hugetlb_free_pgtables(), and update_and_free_page has #define constant
_>that indirectly from HPAGE_SHIFT.
_>
_>You might checked already, text replication works in this case?
_>
_>I have more comments, will post later ...
_>
_>- Ken
_>
_>
_>-----Original Message-----
_>From: linux-ia64-owner@vger.kernel.org
_>[mailto:linux-ia64-owner@vger.kernel.org]On Behalf Of Jack Steiner
_>Sent: Thursday, February 19, 2004 5:08 PM
_>To: linux-ia64@vger.kernel.org
_>Subject: HUGEPAGE SIZE a boottime option
_>
_>
_>Here is a preliminary version of a patch that makes the size of
_>HUGEPAGES a boottime option. Only ia64-specific files are changed
(except
_>for the Documentation file).
_>
_>We have a number of customers using large pages. Unfortunately,
_>the "optimum" size of a large page is application & configuration
_>dependent. Rather that having each customer recompile to specify their
_>own HUGEPAGESIZE, this patch make the size a boottime option.
_>
_>
_>Does this patch look reasonable? If so, I will incorporate feedback,
_>finish testing it, update it to 2.6.3 & resubmit.
_>-
_>To unsubscribe from this list: send the line "unsubscribe linux-ia64"
in
_>the body of a message to majordomo@vger.kernel.org
_>More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: HUGEPAGE SIZE a boottime option
  2004-02-20  1:07 HUGEPAGE SIZE a boottime option Jack Steiner
                   ` (2 preceding siblings ...)
  2004-02-20 19:36 ` Seth, Rohit
@ 2004-02-22  5:27 ` Chris Wedgwood
  2004-02-22 23:08 ` Jack Steiner
                   ` (9 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Chris Wedgwood @ 2004-02-22  5:27 UTC (permalink / raw)
  To: linux-ia64

On Fri, Feb 20, 2004 at 11:36:38AM -0800, Seth, Rohit wrote:

> Ken is currently in the process of providing the support for boot
> time configuration for huge page size.

What's wrong with Jack's patch?

> As long as there are only two supported page sizes (PAGE_SIZE and
> HPAGE_SIZE), it is possible to achieve the boot time configuration
> of HUGE_PAGE_SIZE without needing to patch the kernel (and possibly
> better packing the vhpt fault handler code).  We will ship the patch
> out to the list.

Jack's code doesn't have this limitation.  IMO it's a more sane way to
go.  Why should people be restricted to a limited set (two) of
hugepage sizes?  This seems especially important for vendor kernels
where people want to use the same kernel across a variety of different
machines.



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: HUGEPAGE SIZE a boottime option
  2004-02-20  1:07 HUGEPAGE SIZE a boottime option Jack Steiner
                   ` (3 preceding siblings ...)
  2004-02-22  5:27 ` Chris Wedgwood
@ 2004-02-22 23:08 ` Jack Steiner
  2004-02-23 16:19 ` Chen, Kenneth W
                   ` (8 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Jack Steiner @ 2004-02-22 23:08 UTC (permalink / raw)
  To: linux-ia64

On Thu, Feb 19, 2004 at 08:00:04PM -0800, Chen, Kenneth W wrote:
> It is not functionally complete though.  alloc_fresh_huge_page(), hugetlb_free_pgtables(), and update_and_free_page has #define constant that indirectly from HPAGE_SHIFT.
> 
> You might checked already, text replication works in this case?
> 

(I posted this earlier. However, our mail server has been messed up & I dont think
the mail got thru. Excuse the duplicate if the other mail ever makes it....)



The patch passes preliminary testing.

I dont see any issues with #define constants indirectly using HPAGE_SHIFT. HPAGE_SHIFT
is now defined as:
        #define HPAGE_SHIFT hpage_shift
and
        extern int hpage_shift;         
        int hpage_shift=HPAGE_SHIFT_DEFAULT;


Indirect references should work ok.


> I have more comments, will post later ...
> 
> - Ken
> 
> 
> -----Original Message-----
> From: linux-ia64-owner@vger.kernel.org
> [mailto:linux-ia64-owner@vger.kernel.org]On Behalf Of Jack Steiner
> Sent: Thursday, February 19, 2004 5:08 PM
> To: linux-ia64@vger.kernel.org
> Subject: HUGEPAGE SIZE a boottime option
> 
> 
> Here is a preliminary version of a patch that makes the size of
> HUGEPAGES a boottime option. Only ia64-specific files are changed (except
> for the Documentation file).
> 
> We have a number of customers using large pages. Unfortunately, 
> the "optimum" size of a large page is application & configuration
> dependent. Rather that having each customer recompile to specify their
> own HUGEPAGESIZE, this patch make the size a boottime option.
> 
> 
> Does this patch look reasonable? If so, I will incorporate feedback,
> finish testing it, update it to 2.6.3 & resubmit.
> -
> To unsubscribe from this list: send the line "unsubscribe linux-ia64" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

-- 
Thanks

Jack Steiner (steiner@sgi.com)          651-683-5302
Principal Engineer                      SGI - Silicon Graphics, Inc.



^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: HUGEPAGE SIZE a boottime option
  2004-02-20  1:07 HUGEPAGE SIZE a boottime option Jack Steiner
                   ` (4 preceding siblings ...)
  2004-02-22 23:08 ` Jack Steiner
@ 2004-02-23 16:19 ` Chen, Kenneth W
  2004-02-23 16:26 ` Chen, Kenneth W
                   ` (7 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Chen, Kenneth W @ 2004-02-23 16:19 UTC (permalink / raw)
  To: linux-ia64

On Fri, Feb 20, 2004 at 11:36:38AM -0800, Seth, Rohit wrote:

> As long as there are only two supported page sizes (PAGE_SIZE and
> HPAGE_SIZE), it is possible to achieve the boot time configuration
> of HUGE_PAGE_SIZE without needing to patch the kernel (and possibly
> better packing the vhpt fault handler code).


On Sat, February 21, 2004 at 9:28 PM, Chris Wedgwood wrote:

> Jack's code doesn't have this limitation.  IMO it's a more
> sane way to go.  Why should people be restricted to a limited
> set (two) of hugepage sizes?  This seems especially important
> for vendor kernels where people want to use the same kernel
> across a variety of different machines.


It's probably a misunderstanding.  We are talking about run time
user page size, and you probably referring to huge page size that
user wants to specify at boot time.  These are two totally
different things.

- Ken

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: HUGEPAGE SIZE a boottime option
  2004-02-20  1:07 HUGEPAGE SIZE a boottime option Jack Steiner
                   ` (5 preceding siblings ...)
  2004-02-23 16:19 ` Chen, Kenneth W
@ 2004-02-23 16:26 ` Chen, Kenneth W
  2004-02-23 18:52 ` David Mosberger
                   ` (6 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Chen, Kenneth W @ 2004-02-23 16:26 UTC (permalink / raw)
  To: linux-ia64

[-- Attachment #1: Type: text/plain, Size: 2917 bytes --]

sorry, missed this important hunk:
 # else
 #  error Unsupported IA-64 HugeTLB Page Size!
 # endif
+#define HPAGE_SHIFT hpage_shift

Here is a work-in-progress patch that includes more comments we have.

(1) hugepagesz parameter should have min/max checked.  Doesn't make
sense to config huge page size smaller than PAGE_SIZE, or config huge
page size larger than what page allocator allows (MAX_ORDER).

(2) We can avoid patching vhpt handler and still allow dynamic sizing.

(3) we remain unhappy with penalty hit on reload_context().  Region
register 4 now has a dependency on loading variable hpage_shift, which
could have worst case two/three hundred cycles.  This variable is next
to ia64_ctx (which is heavily used), but there is no guarantee that
they sits in the same cache line.  I've tried prefetch() with
gcc-3.2.3, but it generates code that everyone can laugh at it.

(4) If we have gone this far, it probably won't take that much more
to make it runtime configurable!


- Ken


-----Original Message-----
From: Jack Steiner [mailto:steiner@sgi.com]
Sent: Sunday, February 22, 2004 3:08 PM
To: Chen, Kenneth W
Cc: linux-ia64@vger.kernel.org
Subject: Re: HUGEPAGE SIZE a boottime option


On Thu, Feb 19, 2004 at 08:00:04PM -0800, Chen, Kenneth W wrote:
> It is not functionally complete though.  alloc_fresh_huge_page(),
> hugetlb_free_pgtables(), and update_and_free_page has #define
> constant that indirectly from HPAGE_SHIFT.
> 
> You might checked already, text replication works in this case?
> 

(I posted this earlier. However, our mail server has been messed up & I dont think
the mail got thru. Excuse the duplicate if the other mail ever makes it....)


The patch passes preliminary testing.

I dont see any issues with #define constants indirectly using HPAGE_SHIFT. HPAGE_SHIFT
is now defined as:
        #define HPAGE_SHIFT hpage_shift
and
        extern int hpage_shift;         
        int hpage_shift=HPAGE_SHIFT_DEFAULT;


Indirect references should work ok.


> -----Original Message-----
> From: linux-ia64-owner@vger.kernel.org
> [mailto:linux-ia64-owner@vger.kernel.org]On Behalf Of Jack Steiner
> Sent: Thursday, February 19, 2004 5:08 PM
> To: linux-ia64@vger.kernel.org
> Subject: HUGEPAGE SIZE a boottime option
> 
> 
> Here is a preliminary version of a patch that makes the size of
> HUGEPAGES a boottime option. Only ia64-specific files are changed (except
> for the Documentation file).
> 
> We have a number of customers using large pages. Unfortunately, 
> the "optimum" size of a large page is application & configuration
> dependent. Rather that having each customer recompile to specify their
> own HUGEPAGESIZE, this patch make the size a boottime option.
> 
> 
> Does this patch look reasonable? If so, I will incorporate feedback,
> finish testing it, update it to 2.6.3 & resubmit.

[-- Attachment #2: htlb_size.patch --]
[-- Type: application/octet-stream, Size: 3429 bytes --]

diff -Nur linux-2.6.3/arch/ia64/kernel/ivt.S linux-2.6.3.htlb/arch/ia64/kernel/ivt.S
--- linux-2.6.3/arch/ia64/kernel/ivt.S	2004-02-17 19:57:16.000000000 -0800
+++ linux-2.6.3.htlb/arch/ia64/kernel/ivt.S	2004-02-22 23:13:34.000000000 -0800
@@ -118,10 +118,11 @@
 #ifdef CONFIG_HUGETLB_PAGE
 	extr.u r26=r25,2,6
 	;;
-	cmp.eq p8,p0=HPAGE_SHIFT,r26
+	cmp.ne p8,p0=r18,r26
+	sub r27=r26,r18
 	;;
 (p8)	dep r25=r18,r25,2,6
-(p8)	shr r22=r22,HPAGE_SHIFT-PAGE_SHIFT
+(p8)	shr r22=r22,r27
 #endif
 	;;
 	cmp.eq p6,p7=5,r17			// is IFA pointing into to region 5?
diff -Nur linux-2.6.3/arch/ia64/mm/hugetlbpage.c linux-2.6.3.htlb/arch/ia64/mm/hugetlbpage.c
--- linux-2.6.3/arch/ia64/mm/hugetlbpage.c	2004-02-17 19:58:01.000000000 -0800
+++ linux-2.6.3.htlb/arch/ia64/mm/hugetlbpage.c	2004-02-22 22:58:58.000000000 -0800
@@ -23,6 +23,7 @@
 static long	htlbpagemem;
 int		htlbpage_max;
 static long	htlbzone_pages;
+unsigned int	hpage_shift=HPAGE_SHIFT_DEFAULT;
 
 static struct list_head hugepage_freelists[MAX_NUMNODES];
 static spinlock_t htlbpage_lock = SPIN_LOCK_UNLOCKED;
@@ -520,6 +521,30 @@
 }
 __setup("hugepages=", hugetlb_setup);
 
+static int __init hugetlb_setup_sz(char *str)
+{
+	u64 tr_pages;
+	unsigned long long size;
+
+	if (ia64_pal_vm_page_size(&tr_pages, NULL) != 0)
+		/*
+		 * shouldn't happen, but just in case.
+		 */
+		tr_pages = 0x15557000UL;
+
+	size = memparse(str, &str);
+	if (*str || (size & (size-1)) || !(tr_pages & size) ||
+		size <= PAGE_SIZE ||
+		size >= (1UL << PAGE_SHIFT << MAX_ORDER)) {
+		printk(KERN_WARNING "Invalid huge page size specified\n");
+		return 1;
+	}
+
+	hpage_shift = __ffs(size);
+	return 1;
+}
+__setup("hugepagesz=", hugetlb_setup_sz);
+
 static int __init hugetlb_init(void)
 {
 	int i;
diff -Nur linux-2.6.3/include/asm-ia64/page.h linux-2.6.3.htlb/include/asm-ia64/page.h
--- linux-2.6.3/include/asm-ia64/page.h	2004-02-17 19:57:16.000000000 -0800
+++ linux-2.6.3.htlb/include/asm-ia64/page.h	2004-02-22 17:26:18.000000000 -0800
@@ -37,26 +37,26 @@
 #define RGN_MAP_LIMIT	((1UL << (4*PAGE_SHIFT - 12)) - PAGE_SIZE)	/* per region addr limit */
 
 #ifdef CONFIG_HUGETLB_PAGE
-
 # if defined(CONFIG_HUGETLB_PAGE_SIZE_4GB)
-#  define HPAGE_SHIFT	32
+#  define HPAGE_SHIFT_DEFAULT	32
 # elif defined(CONFIG_HUGETLB_PAGE_SIZE_1GB)
-#  define HPAGE_SHIFT	30
+#  define HPAGE_SHIFT_DEFAULT	30
 # elif defined(CONFIG_HUGETLB_PAGE_SIZE_256MB)
-#  define HPAGE_SHIFT	28
+#  define HPAGE_SHIFT_DEFAULT	28
 # elif defined(CONFIG_HUGETLB_PAGE_SIZE_64MB)
-#  define HPAGE_SHIFT	26
+#  define HPAGE_SHIFT_DEFAULT	26
 # elif defined(CONFIG_HUGETLB_PAGE_SIZE_16MB)
-#  define HPAGE_SHIFT	24
+#  define HPAGE_SHIFT_DEFAULT	24
 # elif defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
-#  define HPAGE_SHIFT	22
+#  define HPAGE_SHIFT_DEFAULT	22
 # elif defined(CONFIG_HUGETLB_PAGE_SIZE_1MB)
-#  define HPAGE_SHIFT	20
+#  define HPAGE_SHIFT_DEFAULT	20
 # elif defined(CONFIG_HUGETLB_PAGE_SIZE_256KB)
-#  define HPAGE_SHIFT	18
+#  define HPAGE_SHIFT_DEFAULT	18
 # else
 #  error Unsupported IA-64 HugeTLB Page Size!
 # endif
+#define HPAGE_SHIFT hpage_shift
 
 # define REGION_HPAGE	(4UL)	/* note: this is hardcoded in mmu_context.h:reload_context()!*/
 # define REGION_SHIFT	61
@@ -140,6 +140,7 @@
 # define is_hugepage_only_range(addr, len)		\
 	 (REGION_NUMBER(addr) == REGION_HPAGE &&	\
 	  REGION_NUMBER((addr)+(len)) == REGION_HPAGE)
+extern unsigned int hpage_shift;
 #endif
 
 static __inline__ int

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: HUGEPAGE SIZE a boottime option
  2004-02-20  1:07 HUGEPAGE SIZE a boottime option Jack Steiner
                   ` (6 preceding siblings ...)
  2004-02-23 16:26 ` Chen, Kenneth W
@ 2004-02-23 18:52 ` David Mosberger
  2004-02-23 18:58 ` Chen, Kenneth W
                   ` (5 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: David Mosberger @ 2004-02-23 18:52 UTC (permalink / raw)
  To: linux-ia64

>>>>> On Mon, 23 Feb 2004 08:26:32 -0800, "Chen, Kenneth W" <kenneth.w.chen@intel.com> said:

 >  -(p8)	shr r22=r22,HPAGE_SHIFT-PAGE_SHIFT
 >  +(p8)	shr r22=r22,r27

Nice.

Can't you get rid of CONFIG_HUGETLB_PAGE_SIZE entirely?  Just pick
some default value and let the users override it with a boot
command-line (or runtime) option.  No?

	--david

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: HUGEPAGE SIZE a boottime option
  2004-02-20  1:07 HUGEPAGE SIZE a boottime option Jack Steiner
                   ` (7 preceding siblings ...)
  2004-02-23 18:52 ` David Mosberger
@ 2004-02-23 18:58 ` Chen, Kenneth W
  2004-02-24  4:05 ` Jack Steiner
                   ` (4 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Chen, Kenneth W @ 2004-02-23 18:58 UTC (permalink / raw)
  To: linux-ia64

Sure, we can do that.  It would clean up a chunk of code as well :-)

- Ken


-----Original Message-----
From: David Mosberger [mailto:davidm@napali.hpl.hp.com] 
Sent: Monday, February 23, 2004 10:53 AM
To: Chen, Kenneth W
Cc: Jack Steiner; linux-ia64@vger.kernel.org
Subject: RE: HUGEPAGE SIZE a boottime option


>>>>> On Mon, 23 Feb 2004 08:26:32 -0800, "Chen, Kenneth W"
<kenneth.w.chen@intel.com> said:

 >  -(p8)	shr r22=r22,HPAGE_SHIFT-PAGE_SHIFT
 >  +(p8)	shr r22=r22,r27

Nice.

Can't you get rid of CONFIG_HUGETLB_PAGE_SIZE entirely?  Just pick
some default value and let the users override it with a boot
command-line (or runtime) option.  No?

	--david

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: HUGEPAGE SIZE a boottime option
  2004-02-20  1:07 HUGEPAGE SIZE a boottime option Jack Steiner
                   ` (8 preceding siblings ...)
  2004-02-23 18:58 ` Chen, Kenneth W
@ 2004-02-24  4:05 ` Jack Steiner
  2004-02-26  1:26 ` Chen, Kenneth W
                   ` (3 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Jack Steiner @ 2004-02-24  4:05 UTC (permalink / raw)
  To: linux-ia64

On Mon, Feb 23, 2004 at 08:26:32AM -0800, Chen, Kenneth W wrote:
> sorry, missed this important hunk:
>  # else
>  #  error Unsupported IA-64 HugeTLB Page Size!
>  # endif
> +#define HPAGE_SHIFT hpage_shift
> 
> Here is a work-in-progress patch that includes more comments we have.


Looks reasonable to me....


> 
> (1) hugepagesz parameter should have min/max checked.  Doesn't make
> sense to config huge page size smaller than PAGE_SIZE, or config huge
> page size larger than what page allocator allows (MAX_ORDER).
> 
> (2) We can avoid patching vhpt handler and still allow dynamic sizing.
> 
> (3) we remain unhappy with penalty hit on reload_context().  Region
> register 4 now has a dependency on loading variable hpage_shift, which
> could have worst case two/three hundred cycles.  This variable is next
> to ia64_ctx (which is heavily used), but there is no guarantee that
> they sits in the same cache line.  I've tried prefetch() with
> gcc-3.2.3, but it generates code that everyone can laugh at it.
> 
> (4) If we have gone this far, it probably won't take that much more
> to make it runtime configurable!
> 
> 
> - Ken
> 
> 
> -----Original Message-----
> From: Jack Steiner [mailto:steiner@sgi.com]
> Sent: Sunday, February 22, 2004 3:08 PM
> To: Chen, Kenneth W
> Cc: linux-ia64@vger.kernel.org
> Subject: Re: HUGEPAGE SIZE a boottime option
> 
> 
> On Thu, Feb 19, 2004 at 08:00:04PM -0800, Chen, Kenneth W wrote:
> > It is not functionally complete though.  alloc_fresh_huge_page(),
> > hugetlb_free_pgtables(), and update_and_free_page has #define
> > constant that indirectly from HPAGE_SHIFT.
> > 
> > You might checked already, text replication works in this case?
> > 
> 
> (I posted this earlier. However, our mail server has been messed up & I dont think
> the mail got thru. Excuse the duplicate if the other mail ever makes it....)
> 
> 
> The patch passes preliminary testing.
> 
> I dont see any issues with #define constants indirectly using HPAGE_SHIFT. HPAGE_SHIFT
> is now defined as:
>         #define HPAGE_SHIFT hpage_shift
> and
>         extern int hpage_shift;         
>         int hpage_shift=HPAGE_SHIFT_DEFAULT;
> 
> 
> Indirect references should work ok.
> 
> 
> > -----Original Message-----
> > From: linux-ia64-owner@vger.kernel.org
> > [mailto:linux-ia64-owner@vger.kernel.org]On Behalf Of Jack Steiner
> > Sent: Thursday, February 19, 2004 5:08 PM
> > To: linux-ia64@vger.kernel.org
> > Subject: HUGEPAGE SIZE a boottime option
> > 
> > 
> > Here is a preliminary version of a patch that makes the size of
> > HUGEPAGES a boottime option. Only ia64-specific files are changed (except
> > for the Documentation file).
> > 
> > We have a number of customers using large pages. Unfortunately, 
> > the "optimum" size of a large page is application & configuration
> > dependent. Rather that having each customer recompile to specify their
> > own HUGEPAGESIZE, this patch make the size a boottime option.
> > 
> > 
> > Does this patch look reasonable? If so, I will incorporate feedback,
> > finish testing it, update it to 2.6.3 & resubmit.

Content-Description: htlb_size.patch
> [-- octet_filter file type: "'diff' output text" --]
> 
> [-- Statistics (lines words chars):  106 377 3429 /tmp/htlb_size.patch --]
> 
> diff -Nur linux-2.6.3/arch/ia64/kernel/ivt.S linux-2.6.3.htlb/arch/ia64/kernel/ivt.S
> --- linux-2.6.3/arch/ia64/kernel/ivt.S	2004-02-17 19:57:16.000000000 -0800
> +++ linux-2.6.3.htlb/arch/ia64/kernel/ivt.S	2004-02-22 23:13:34.000000000 -0800
> @@ -118,10 +118,11 @@
>  #ifdef CONFIG_HUGETLB_PAGE
>  	extr.u r26=r25,2,6
>  	;;
> -	cmp.eq p8,p0=HPAGE_SHIFT,r26
> +	cmp.ne p8,p0=r18,r26
> +	sub r27=r26,r18
>  	;;
>  (p8)	dep r25=r18,r25,2,6
> -(p8)	shr r22=r22,HPAGE_SHIFT-PAGE_SHIFT
> +(p8)	shr r22=r22,r27
>  #endif
>  	;;
>  	cmp.eq p6,p7=5,r17			// is IFA pointing into to region 5?
> diff -Nur linux-2.6.3/arch/ia64/mm/hugetlbpage.c linux-2.6.3.htlb/arch/ia64/mm/hugetlbpage.c
> --- linux-2.6.3/arch/ia64/mm/hugetlbpage.c	2004-02-17 19:58:01.000000000 -0800
> +++ linux-2.6.3.htlb/arch/ia64/mm/hugetlbpage.c	2004-02-22 22:58:58.000000000 -0800
> @@ -23,6 +23,7 @@
>  static long	htlbpagemem;
>  int		htlbpage_max;
>  static long	htlbzone_pages;
> +unsigned int	hpage_shift=HPAGE_SHIFT_DEFAULT;
>  
>  static struct list_head hugepage_freelists[MAX_NUMNODES];
>  static spinlock_t htlbpage_lock = SPIN_LOCK_UNLOCKED;
> @@ -520,6 +521,30 @@
>  }
>  __setup("hugepages=", hugetlb_setup);
>  
> +static int __init hugetlb_setup_sz(char *str)
> +{
> +	u64 tr_pages;
> +	unsigned long long size;
> +
> +	if (ia64_pal_vm_page_size(&tr_pages, NULL) != 0)
> +		/*
> +		 * shouldn't happen, but just in case.
> +		 */
> +		tr_pages = 0x15557000UL;
> +
> +	size = memparse(str, &str);
> +	if (*str || (size & (size-1)) || !(tr_pages & size) ||
> +		size <= PAGE_SIZE ||
> +		size >= (1UL << PAGE_SHIFT << MAX_ORDER)) {
> +		printk(KERN_WARNING "Invalid huge page size specified\n");
> +		return 1;
> +	}
> +
> +	hpage_shift = __ffs(size);
> +	return 1;
> +}
> +__setup("hugepagesz=", hugetlb_setup_sz);
> +
>  static int __init hugetlb_init(void)
>  {
>  	int i;
> diff -Nur linux-2.6.3/include/asm-ia64/page.h linux-2.6.3.htlb/include/asm-ia64/page.h
> --- linux-2.6.3/include/asm-ia64/page.h	2004-02-17 19:57:16.000000000 -0800
> +++ linux-2.6.3.htlb/include/asm-ia64/page.h	2004-02-22 17:26:18.000000000 -0800
> @@ -37,26 +37,26 @@
>  #define RGN_MAP_LIMIT	((1UL << (4*PAGE_SHIFT - 12)) - PAGE_SIZE)	/* per region addr limit */
>  
>  #ifdef CONFIG_HUGETLB_PAGE
> -
>  # if defined(CONFIG_HUGETLB_PAGE_SIZE_4GB)
> -#  define HPAGE_SHIFT	32
> +#  define HPAGE_SHIFT_DEFAULT	32
>  # elif defined(CONFIG_HUGETLB_PAGE_SIZE_1GB)
> -#  define HPAGE_SHIFT	30
> +#  define HPAGE_SHIFT_DEFAULT	30
>  # elif defined(CONFIG_HUGETLB_PAGE_SIZE_256MB)
> -#  define HPAGE_SHIFT	28
> +#  define HPAGE_SHIFT_DEFAULT	28
>  # elif defined(CONFIG_HUGETLB_PAGE_SIZE_64MB)
> -#  define HPAGE_SHIFT	26
> +#  define HPAGE_SHIFT_DEFAULT	26
>  # elif defined(CONFIG_HUGETLB_PAGE_SIZE_16MB)
> -#  define HPAGE_SHIFT	24
> +#  define HPAGE_SHIFT_DEFAULT	24
>  # elif defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
> -#  define HPAGE_SHIFT	22
> +#  define HPAGE_SHIFT_DEFAULT	22
>  # elif defined(CONFIG_HUGETLB_PAGE_SIZE_1MB)
> -#  define HPAGE_SHIFT	20
> +#  define HPAGE_SHIFT_DEFAULT	20
>  # elif defined(CONFIG_HUGETLB_PAGE_SIZE_256KB)
> -#  define HPAGE_SHIFT	18
> +#  define HPAGE_SHIFT_DEFAULT	18
>  # else
>  #  error Unsupported IA-64 HugeTLB Page Size!
>  # endif
> +#define HPAGE_SHIFT hpage_shift
>  
>  # define REGION_HPAGE	(4UL)	/* note: this is hardcoded in mmu_context.h:reload_context()!*/
>  # define REGION_SHIFT	61
> @@ -140,6 +140,7 @@
>  # define is_hugepage_only_range(addr, len)		\
>  	 (REGION_NUMBER(addr) = REGION_HPAGE &&	\
>  	  REGION_NUMBER((addr)+(len)) = REGION_HPAGE)
> +extern unsigned int hpage_shift;
>  #endif
>  
>  static __inline__ int


-- 
Thanks

Jack Steiner (steiner@sgi.com)          651-683-5302
Principal Engineer                      SGI - Silicon Graphics, Inc.



^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: HUGEPAGE SIZE a boottime option
  2004-02-20  1:07 HUGEPAGE SIZE a boottime option Jack Steiner
                   ` (9 preceding siblings ...)
  2004-02-24  4:05 ` Jack Steiner
@ 2004-02-26  1:26 ` Chen, Kenneth W
  2004-02-26  2:09 ` Chen, Kenneth W
                   ` (2 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Chen, Kenneth W @ 2004-02-26  1:26 UTC (permalink / raw)
  To: linux-ia64

[-- Attachment #1: Type: text/plain, Size: 277 bytes --]

We believe we have resolved all the remain issues, all critical
speed path has been taken care of, i.e., vhpt hander and context
switch.  There should be no performance penalty with this dynamic
hugetlb page size feature.

David, this is our final tested patch.

- Ken

[-- Attachment #2: htlb_size.patch --]
[-- Type: application/octet-stream, Size: 6424 bytes --]

diff -Nurp linux-2.6.3/arch/ia64/Kconfig linux-2.6.3.htlb/arch/ia64/Kconfig
--- linux-2.6.3/arch/ia64/Kconfig	2004-02-25 17:17:57.000000000 -0800
+++ linux-2.6.3.htlb/arch/ia64/Kconfig	2004-02-25 17:19:05.000000000 -0800
@@ -282,39 +282,6 @@ config FORCE_MAX_ZONEORDER
 	int
 	default "18"
 
-choice
-	prompt "Huge TLB page size"
-	depends on HUGETLB_PAGE
-	default HUGETLB_PAGE_SIZE_16MB
-
-config HUGETLB_PAGE_SIZE_4GB
-	depends on MCKINLEY
-	bool "4GB"
-
-config HUGETLB_PAGE_SIZE_1GB
-	depends on MCKINLEY
-	bool "1GB"
-
-config HUGETLB_PAGE_SIZE_256MB
-	bool "256MB"
-
-config HUGETLB_PAGE_SIZE_64MB
-	bool "64MB"
-
-config HUGETLB_PAGE_SIZE_16MB
-	bool "16MB"
-
-config HUGETLB_PAGE_SIZE_4MB
-	bool "4MB"
-
-config HUGETLB_PAGE_SIZE_1MB
-	bool "1MB"
-
-config HUGETLB_PAGE_SIZE_256KB
-	bool "256KB"
-
-endchoice
-
 config IA64_PAL_IDLE
 	bool "Use PAL_HALT_LIGHT in idle loop"
 	help
diff -Nurp linux-2.6.3/arch/ia64/kernel/ivt.S linux-2.6.3.htlb/arch/ia64/kernel/ivt.S
--- linux-2.6.3/arch/ia64/kernel/ivt.S	2004-02-25 17:17:57.000000000 -0800
+++ linux-2.6.3.htlb/arch/ia64/kernel/ivt.S	2004-02-25 17:19:05.000000000 -0800
@@ -118,10 +118,11 @@ ENTRY(vhpt_miss)
 #ifdef CONFIG_HUGETLB_PAGE
 	extr.u r26=r25,2,6
 	;;
-	cmp.eq p8,p0=HPAGE_SHIFT,r26
+	cmp.ne p8,p0=r18,r26
+	sub r27=r26,r18
 	;;
 (p8)	dep r25=r18,r25,2,6
-(p8)	shr r22=r22,HPAGE_SHIFT-PAGE_SHIFT
+(p8)	shr r22=r22,r27
 #endif
 	;;
 	cmp.eq p6,p7=5,r17			// is IFA pointing into to region 5?
diff -Nurp linux-2.6.3/arch/ia64/mm/hugetlbpage.c linux-2.6.3.htlb/arch/ia64/mm/hugetlbpage.c
--- linux-2.6.3/arch/ia64/mm/hugetlbpage.c	2004-02-25 17:17:57.000000000 -0800
+++ linux-2.6.3.htlb/arch/ia64/mm/hugetlbpage.c	2004-02-25 17:19:05.000000000 -0800
@@ -1,7 +1,11 @@
 /*
  * IA-64 Huge TLB Page Support for Kernel.
  *
- * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
+ * Copyright (C) 2002-2004 Rohit Seth <rohit.seth@intel.com>
+ * Copyright (C) 2003-2004 Ken Chen <kenneth.w.chen@intel.com>
+ *
+ * Sep, 2003: add numa support
+ * Feb, 2004: dynamic hugetlb page size via command line
  */
 
 #include <linux/config.h>
@@ -23,6 +27,7 @@
 static long	htlbpagemem;
 int		htlbpage_max;
 static long	htlbzone_pages;
+unsigned int	hpage_shift=HPAGE_SHIFT_DEFAULT;
 
 static struct list_head hugepage_freelists[MAX_NUMNODES];
 static spinlock_t htlbpage_lock = SPIN_LOCK_UNLOCKED;
@@ -520,6 +525,35 @@ static int __init hugetlb_setup(char *s)
 }
 __setup("hugepages=", hugetlb_setup);
 
+static int __init hugetlb_setup_sz(char *str)
+{
+	u64 tr_pages;
+	unsigned long long size;
+
+	if (ia64_pal_vm_page_size(&tr_pages, NULL) != 0)
+		/*
+		 * shouldn't happen, but just in case.
+		 */
+		tr_pages = 0x15557000UL;
+
+	size = memparse(str, &str);
+	if (*str || (size & (size-1)) || !(tr_pages & size) ||
+		size <= PAGE_SIZE ||
+		size >= (1UL << PAGE_SHIFT << MAX_ORDER)) {
+		printk(KERN_WARNING "Invalid huge page size specified\n");
+		return 1;
+	}
+
+	hpage_shift = __ffs(size);
+	/*
+	 * boot cpu already executed ia64_mmu_init, and has HPAGE_SHIFT_DEFAULT
+	 * override here with new page shift.
+	 */
+	ia64_set_rr(0x8000000000000000, hpage_shift << 2);
+	return 1;
+}
+__setup("hugepagesz=", hugetlb_setup_sz);
+
 static int __init hugetlb_init(void)
 {
 	int i;
@@ -540,7 +574,7 @@ static int __init hugetlb_init(void)
 	printk("Total HugeTLB memory allocated, %ld\n", htlbpagemem);
 	return 0;
 }
-module_init(hugetlb_init);
+__initcall(hugetlb_init);
 
 int hugetlb_report_meminfo(char *buf)
 {
diff -Nurp linux-2.6.3/arch/ia64/mm/init.c linux-2.6.3.htlb/arch/ia64/mm/init.c
--- linux-2.6.3/arch/ia64/mm/init.c	2004-02-25 17:17:57.000000000 -0800
+++ linux-2.6.3.htlb/arch/ia64/mm/init.c	2004-02-25 17:19:05.000000000 -0800
@@ -342,6 +342,10 @@ ia64_mmu_init (void *my_cpu_data)
 
 	ia64_tlb_init();
 
+#ifdef	CONFIG_HUGETLB_PAGE
+	ia64_set_rr(0x8000000000000000, hpage_shift << 2);
+#endif
+
 #ifdef	CONFIG_IA64_MCA
 	cpu = smp_processor_id();
 
diff -Nurp linux-2.6.3/include/asm-ia64/mmu_context.h linux-2.6.3.htlb/include/asm-ia64/mmu_context.h
--- linux-2.6.3/include/asm-ia64/mmu_context.h	2004-02-25 17:18:04.000000000 -0800
+++ linux-2.6.3.htlb/include/asm-ia64/mmu_context.h	2004-02-25 17:19:05.000000000 -0800
@@ -140,8 +140,9 @@ reload_context (mm_context_t context)
 {
 	unsigned long rid;
 	unsigned long rid_incr = 0;
-	unsigned long rr0, rr1, rr2, rr3, rr4;
+	unsigned long rr0, rr1, rr2, rr3, rr4, old_rr4;
 
+	old_rr4 = ia64_get_rr(0x8000000000000000);
 	rid = context << 3;	/* make space for encoding the region number */
 	rid_incr = 1 << 8;
 
@@ -152,7 +153,7 @@ reload_context (mm_context_t context)
 	rr3 = rr0 + 3*rid_incr;
 	rr4 = rr0 + 4*rid_incr;
 #ifdef  CONFIG_HUGETLB_PAGE
-	rr4 = (rr4 & (~(0xfcUL))) | (HPAGE_SHIFT << 2);
+	rr4 = (rr4 & (~(0xfcUL))) | (old_rr4 & 0xfc);
 #endif
 
 	ia64_set_rr(0x0000000000000000, rr0);
diff -Nurp linux-2.6.3/include/asm-ia64/page.h linux-2.6.3.htlb/include/asm-ia64/page.h
--- linux-2.6.3/include/asm-ia64/page.h	2004-02-25 17:18:04.000000000 -0800
+++ linux-2.6.3.htlb/include/asm-ia64/page.h	2004-02-25 17:19:05.000000000 -0800
@@ -37,27 +37,8 @@
 #define RGN_MAP_LIMIT	((1UL << (4*PAGE_SHIFT - 12)) - PAGE_SIZE)	/* per region addr limit */
 
 #ifdef CONFIG_HUGETLB_PAGE
-
-# if defined(CONFIG_HUGETLB_PAGE_SIZE_4GB)
-#  define HPAGE_SHIFT	32
-# elif defined(CONFIG_HUGETLB_PAGE_SIZE_1GB)
-#  define HPAGE_SHIFT	30
-# elif defined(CONFIG_HUGETLB_PAGE_SIZE_256MB)
-#  define HPAGE_SHIFT	28
-# elif defined(CONFIG_HUGETLB_PAGE_SIZE_64MB)
-#  define HPAGE_SHIFT	26
-# elif defined(CONFIG_HUGETLB_PAGE_SIZE_16MB)
-#  define HPAGE_SHIFT	24
-# elif defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
-#  define HPAGE_SHIFT	22
-# elif defined(CONFIG_HUGETLB_PAGE_SIZE_1MB)
-#  define HPAGE_SHIFT	20
-# elif defined(CONFIG_HUGETLB_PAGE_SIZE_256KB)
-#  define HPAGE_SHIFT	18
-# else
-#  error Unsupported IA-64 HugeTLB Page Size!
-# endif
-
+# define HPAGE_SHIFT hpage_shift
+# define HPAGE_SHIFT_DEFAULT	28	/* check ia64 SDM for architecture supported size */
 # define REGION_HPAGE	(4UL)	/* note: this is hardcoded in mmu_context.h:reload_context()!*/
 # define REGION_SHIFT	61
 # define HPAGE_SIZE	(__IA64_UL_CONST(1) << HPAGE_SHIFT)
@@ -140,6 +121,7 @@ typedef union ia64_va {
 # define is_hugepage_only_range(addr, len)		\
 	 (REGION_NUMBER(addr) == REGION_HPAGE &&	\
 	  REGION_NUMBER((addr)+(len)) == REGION_HPAGE)
+extern unsigned int hpage_shift;
 #endif
 
 static __inline__ int

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: HUGEPAGE SIZE a boottime option
  2004-02-20  1:07 HUGEPAGE SIZE a boottime option Jack Steiner
                   ` (10 preceding siblings ...)
  2004-02-26  1:26 ` Chen, Kenneth W
@ 2004-02-26  2:09 ` Chen, Kenneth W
  2004-02-26  5:18 ` David Mosberger
  2004-02-26 20:31 ` Chen, Kenneth W
  13 siblings, 0 replies; 15+ messages in thread
From: Chen, Kenneth W @ 2004-02-26  2:09 UTC (permalink / raw)
  To: linux-ia64

My apology for any oversight, that we would like to thank Jack
Steiner for his initiative on this feature and his initial cool
working patch.

- Ken


-----Original Message-----
From: Chen, Kenneth W 
Sent: Wednesday, February 25, 2004 5:26 PM
To: linux-ia64@vger.kernel.org
Cc: 'David Mosberger'
Subject: RE: HUGEPAGE SIZE a boottime option


We believe we have resolved all the remain issues, all critical
speed path has been taken care of, i.e., vhpt hander and context
switch.  There should be no performance penalty with this dynamic
hugetlb page size feature.

David, this is our final tested patch.

- Ken

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: HUGEPAGE SIZE a boottime option
  2004-02-20  1:07 HUGEPAGE SIZE a boottime option Jack Steiner
                   ` (11 preceding siblings ...)
  2004-02-26  2:09 ` Chen, Kenneth W
@ 2004-02-26  5:18 ` David Mosberger
  2004-02-26 20:31 ` Chen, Kenneth W
  13 siblings, 0 replies; 15+ messages in thread
From: David Mosberger @ 2004-02-26  5:18 UTC (permalink / raw)
  To: linux-ia64

>>>>> On Wed, 25 Feb 2004 17:26:25 -0800, "Chen, Kenneth W" <kenneth.w.chen@intel.com> said:

  Ken> We believe we have resolved all the remain issues, all critical
  Ken> speed path has been taken care of, i.e., vhpt hander and
  Ken> context switch.  There should be no performance penalty with
  Ken> this dynamic hugetlb page size feature.

What about huge-page page-faults?  Before, HPAGE_SHIFT, HPAGE_SIZE,
and HPAGE_MASK all were constant, now they have to be fetched from
memory/recalculated each time.  I suppose it's rare enough that it's
not worth worrying about but did you verify that there is at least not
anything really gross going on (like someone doing a modulo operation
against HPAGE_SIZE)?

How about replacing 0x8000000000000000 with (REGION_HPAGE <<
REGION_SHIFT) or a manifest constant defined to this value (perhaps
HUGETLB_REGION_ADDR)?

Why does the region register get initialized both in mm/init.c and
hugetlb_setup_sz()?  Couldn't this be done once in hugetlb_init()?

Other than that, the patch looks good to me.

	--david


^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: HUGEPAGE SIZE a boottime option
  2004-02-20  1:07 HUGEPAGE SIZE a boottime option Jack Steiner
                   ` (12 preceding siblings ...)
  2004-02-26  5:18 ` David Mosberger
@ 2004-02-26 20:31 ` Chen, Kenneth W
  13 siblings, 0 replies; 15+ messages in thread
From: Chen, Kenneth W @ 2004-02-26 20:31 UTC (permalink / raw)
  To: linux-ia64

> What about huge-page page-faults?  Before, HPAGE_SHIFT, HPAGE_SIZE,
> and HPAGE_MASK all were constant, now they have to be fetched from
> memory/recalculated each time. I suppose it's rare enough that it's
> not worth worrying about

Yeah, the penalty is paid at setup time, not runtime.


> did you verify that there is at least not anything really gross
> going on (like someone doing a modulo operation against HPAGE_SIZE)?

Just checked, doesn't look like any misuse for HPAGE_SIZE.


> How about replacing 0x8000000000000000 with (REGION_HPAGE <<
> REGION_SHIFT) or a manifest constant defined to this value (perhaps
> HUGETLB_REGION_ADDR)?

There is a similar TASK_HPAGE_BASE defined in hugetlbpage.c, I have
moved it into page.h and updated the patch.  Can't change the 0x800...
in reload_context because it would then need #if CONFIG_HUGETLB_PAGE.


> Why does the region register get initialized both in mm/init.c and
> hugetlb_setup_sz()?  Couldn't this be done once in hugetlb_init()?

The hpage_shift is stored in rr4.ps on each CPU, so on context
switch, we get current hpage_shfit from rr4 instead of memory
reference.  Since initialization needs to be done on all CPU,
mm/init.c is the logical place.  However, when BP executes
hugetlb_setup_sz, it already loaded rr4.ps with compile time
default (AP aren't online yet). In case we need to change the
size, rr4 on BP needs to be re-initialized.  Once all setup code
finishes, AP goes online and they load rr4.ps with the correct
value.  This should also cover hot-plug cpu pretty nicely.

(patch attached)
(p.s. I'm trying something new with mailer to include patch
      in the message body.  If my mailer screws up, please
      let me know.  I will resent.)


diff -Nurp linux-2.6.3/arch/ia64/Kconfig
linux-2.6.3.htlb/arch/ia64/Kconfig
--- linux-2.6.3/arch/ia64/Kconfig	2004-02-17 19:59:33.000000000
-0800
+++ linux-2.6.3.htlb/arch/ia64/Kconfig	2004-02-26 12:18:51.000000000
-0800
@@ -282,39 +282,6 @@ config FORCE_MAX_ZONEORDER
 	int
 	default "18"
 
-choice
-	prompt "Huge TLB page size"
-	depends on HUGETLB_PAGE
-	default HUGETLB_PAGE_SIZE_16MB
-
-config HUGETLB_PAGE_SIZE_4GB
-	depends on MCKINLEY
-	bool "4GB"
-
-config HUGETLB_PAGE_SIZE_1GB
-	depends on MCKINLEY
-	bool "1GB"
-
-config HUGETLB_PAGE_SIZE_256MB
-	bool "256MB"
-
-config HUGETLB_PAGE_SIZE_64MB
-	bool "64MB"
-
-config HUGETLB_PAGE_SIZE_16MB
-	bool "16MB"
-
-config HUGETLB_PAGE_SIZE_4MB
-	bool "4MB"
-
-config HUGETLB_PAGE_SIZE_1MB
-	bool "1MB"
-
-config HUGETLB_PAGE_SIZE_256KB
-	bool "256KB"
-
-endchoice
-
 config IA64_PAL_IDLE
 	bool "Use PAL_HALT_LIGHT in idle loop"
 	help
diff -Nurp linux-2.6.3/arch/ia64/kernel/ivt.S
linux-2.6.3.htlb/arch/ia64/kernel/ivt.S
--- linux-2.6.3/arch/ia64/kernel/ivt.S	2004-02-17 19:57:16.000000000
-0800
+++ linux-2.6.3.htlb/arch/ia64/kernel/ivt.S	2004-02-26
12:18:51.000000000 -0800
@@ -118,10 +118,11 @@ ENTRY(vhpt_miss)
 #ifdef CONFIG_HUGETLB_PAGE
 	extr.u r26=r25,2,6
 	;;
-	cmp.eq p8,p0=HPAGE_SHIFT,r26
+	cmp.ne p8,p0=r18,r26
+	sub r27=r26,r18
 	;;
 (p8)	dep r25=r18,r25,2,6
-(p8)	shr r22=r22,HPAGE_SHIFT-PAGE_SHIFT
+(p8)	shr r22=r22,r27
 #endif
 	;;
 	cmp.eq p6,p7=5,r17			// is IFA pointing into
to region 5?
diff -Nurp linux-2.6.3/arch/ia64/mm/hugetlbpage.c
linux-2.6.3.htlb/arch/ia64/mm/hugetlbpage.c
--- linux-2.6.3/arch/ia64/mm/hugetlbpage.c	2004-02-17
19:58:01.000000000 -0800
+++ linux-2.6.3.htlb/arch/ia64/mm/hugetlbpage.c	2004-02-26
12:18:51.000000000 -0800
@@ -1,7 +1,11 @@
 /*
  * IA-64 Huge TLB Page Support for Kernel.
  *
- * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
+ * Copyright (C) 2002-2004 Rohit Seth <rohit.seth@intel.com>
+ * Copyright (C) 2003-2004 Ken Chen <kenneth.w.chen@intel.com>
+ *
+ * Sep, 2003: add numa support
+ * Feb, 2004: dynamic hugetlb page size via boot parameter
  */
 
 #include <linux/config.h>
@@ -18,11 +22,10 @@
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 
-#define TASK_HPAGE_BASE (REGION_HPAGE << REGION_SHIFT)
-
 static long	htlbpagemem;
 int		htlbpage_max;
 static long	htlbzone_pages;
+unsigned int	hpage_shift=HPAGE_SHIFT_DEFAULT;
 
 static struct list_head hugepage_freelists[MAX_NUMNODES];
 static spinlock_t htlbpage_lock = SPIN_LOCK_UNLOCKED;
@@ -407,7 +410,7 @@ unsigned long hugetlb_get_unmapped_area(
 		return -EINVAL;
 	/* This code assumes that REGION_HPAGE != 0. */
 	if ((REGION_NUMBER(addr) != REGION_HPAGE) || (addr & (HPAGE_SIZE
- 1)))
-		addr = TASK_HPAGE_BASE;
+		addr = HPAGE_REGION_BASE;
 	else
 		addr = ALIGN(addr, HPAGE_SIZE);
 	for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) {
@@ -520,6 +523,35 @@ static int __init hugetlb_setup(char *s)
 }
 __setup("hugepages=", hugetlb_setup);
 
+static int __init hugetlb_setup_sz(char *str)
+{
+	u64 tr_pages;
+	unsigned long long size;
+
+	if (ia64_pal_vm_page_size(&tr_pages, NULL) != 0)
+		/*
+		 * shouldn't happen, but just in case.
+		 */
+		tr_pages = 0x15557000UL;
+
+	size = memparse(str, &str);
+	if (*str || (size & (size-1)) || !(tr_pages & size) ||
+		size <= PAGE_SIZE ||
+		size >= (1UL << PAGE_SHIFT << MAX_ORDER)) {
+		printk(KERN_WARNING "Invalid huge page size
specified\n");
+		return 1;
+	}
+
+	hpage_shift = __ffs(size);
+	/*
+	 * boot cpu already executed ia64_mmu_init, and has
HPAGE_SHIFT_DEFAULT
+	 * override here with new page shift.
+	 */
+	ia64_set_rr(HPAGE_REGION_BASE, hpage_shift << 2);
+	return 1;
+}
+__setup("hugepagesz=", hugetlb_setup_sz);
+
 static int __init hugetlb_init(void)
 {
 	int i;
@@ -540,7 +572,7 @@ static int __init hugetlb_init(void)
 	printk("Total HugeTLB memory allocated, %ld\n", htlbpagemem);
 	return 0;
 }
-module_init(hugetlb_init);
+__initcall(hugetlb_init);
 
 int hugetlb_report_meminfo(char *buf)
 {
diff -Nurp linux-2.6.3/arch/ia64/mm/init.c
linux-2.6.3.htlb/arch/ia64/mm/init.c
--- linux-2.6.3/arch/ia64/mm/init.c	2004-02-17 19:57:39.000000000
-0800
+++ linux-2.6.3.htlb/arch/ia64/mm/init.c	2004-02-26
12:18:51.000000000 -0800
@@ -342,6 +342,10 @@ ia64_mmu_init (void *my_cpu_data)
 
 	ia64_tlb_init();
 
+#ifdef	CONFIG_HUGETLB_PAGE
+	ia64_set_rr(HPAGE_REGION_BASE, HPAGE_SHIFT << 2);
+#endif
+
 #ifdef	CONFIG_IA64_MCA
 	cpu = smp_processor_id();
 
diff -Nurp linux-2.6.3/include/asm-ia64/mmu_context.h
linux-2.6.3.htlb/include/asm-ia64/mmu_context.h
--- linux-2.6.3/include/asm-ia64/mmu_context.h	2004-02-17
19:57:16.000000000 -0800
+++ linux-2.6.3.htlb/include/asm-ia64/mmu_context.h	2004-02-26
12:18:51.000000000 -0800
@@ -140,8 +140,9 @@ reload_context (mm_context_t context)
 {
 	unsigned long rid;
 	unsigned long rid_incr = 0;
-	unsigned long rr0, rr1, rr2, rr3, rr4;
+	unsigned long rr0, rr1, rr2, rr3, rr4, old_rr4;
 
+	old_rr4 = ia64_get_rr(0x8000000000000000);
 	rid = context << 3;	/* make space for encoding the region
number */
 	rid_incr = 1 << 8;
 
@@ -152,7 +153,7 @@ reload_context (mm_context_t context)
 	rr3 = rr0 + 3*rid_incr;
 	rr4 = rr0 + 4*rid_incr;
 #ifdef  CONFIG_HUGETLB_PAGE
-	rr4 = (rr4 & (~(0xfcUL))) | (HPAGE_SHIFT << 2);
+	rr4 = (rr4 & (~(0xfcUL))) | (old_rr4 & 0xfc);
 #endif
 
 	ia64_set_rr(0x0000000000000000, rr0);
diff -Nurp linux-2.6.3/include/asm-ia64/page.h
linux-2.6.3.htlb/include/asm-ia64/page.h
--- linux-2.6.3/include/asm-ia64/page.h	2004-02-17 19:57:16.000000000
-0800
+++ linux-2.6.3.htlb/include/asm-ia64/page.h	2004-02-26
12:18:51.000000000 -0800
@@ -37,31 +37,14 @@
 #define RGN_MAP_LIMIT	((1UL << (4*PAGE_SHIFT - 12)) - PAGE_SIZE)
/* per region addr limit */
 
 #ifdef CONFIG_HUGETLB_PAGE
+# define REGION_HPAGE		(4UL)	/* note: this is hardcoded in
reload_context()!*/
+# define REGION_SHIFT		61
+# define HPAGE_REGION_BASE	(REGION_HPAGE << REGION_SHIFT)
+# define HPAGE_SHIFT		hpage_shift
+# define HPAGE_SHIFT_DEFAULT	28	/* check ia64 SDM for
architecture supported size */
+# define HPAGE_SIZE		(__IA64_UL_CONST(1) << HPAGE_SHIFT)
+# define HPAGE_MASK		(~(HPAGE_SIZE - 1))
 
-# if defined(CONFIG_HUGETLB_PAGE_SIZE_4GB)
-#  define HPAGE_SHIFT	32
-# elif defined(CONFIG_HUGETLB_PAGE_SIZE_1GB)
-#  define HPAGE_SHIFT	30
-# elif defined(CONFIG_HUGETLB_PAGE_SIZE_256MB)
-#  define HPAGE_SHIFT	28
-# elif defined(CONFIG_HUGETLB_PAGE_SIZE_64MB)
-#  define HPAGE_SHIFT	26
-# elif defined(CONFIG_HUGETLB_PAGE_SIZE_16MB)
-#  define HPAGE_SHIFT	24
-# elif defined(CONFIG_HUGETLB_PAGE_SIZE_4MB)
-#  define HPAGE_SHIFT	22
-# elif defined(CONFIG_HUGETLB_PAGE_SIZE_1MB)
-#  define HPAGE_SHIFT	20
-# elif defined(CONFIG_HUGETLB_PAGE_SIZE_256KB)
-#  define HPAGE_SHIFT	18
-# else
-#  error Unsupported IA-64 HugeTLB Page Size!
-# endif
-
-# define REGION_HPAGE	(4UL)	/* note: this is hardcoded in
mmu_context.h:reload_context()!*/
-# define REGION_SHIFT	61
-# define HPAGE_SIZE	(__IA64_UL_CONST(1) << HPAGE_SHIFT)
-# define HPAGE_MASK	(~(HPAGE_SIZE - 1))
 # define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
 # define ARCH_HAS_HUGEPAGE_ONLY_RANGE
 #endif /* CONFIG_HUGETLB_PAGE */
@@ -140,6 +123,7 @@ typedef union ia64_va {
 # define is_hugepage_only_range(addr, len)		\
 	 (REGION_NUMBER(addr) = REGION_HPAGE &&	\
 	  REGION_NUMBER((addr)+(len)) = REGION_HPAGE)
+extern unsigned int hpage_shift;
 #endif
 
 static __inline__ int

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2004-02-26 20:31 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-02-20  1:07 HUGEPAGE SIZE a boottime option Jack Steiner
2004-02-20  2:35 ` David Mosberger
2004-02-20  4:00 ` Chen, Kenneth W
2004-02-20 19:36 ` Seth, Rohit
2004-02-22  5:27 ` Chris Wedgwood
2004-02-22 23:08 ` Jack Steiner
2004-02-23 16:19 ` Chen, Kenneth W
2004-02-23 16:26 ` Chen, Kenneth W
2004-02-23 18:52 ` David Mosberger
2004-02-23 18:58 ` Chen, Kenneth W
2004-02-24  4:05 ` Jack Steiner
2004-02-26  1:26 ` Chen, Kenneth W
2004-02-26  2:09 ` Chen, Kenneth W
2004-02-26  5:18 ` David Mosberger
2004-02-26 20:31 ` Chen, Kenneth W

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox