* [RFC PATCH] Support for big page sizes on 44x (Updated) @ 2008-10-16 2:22 Ilya Yanok 2008-10-16 2:22 ` [PATCH 1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures Ilya Yanok ` (3 more replies) 0 siblings, 4 replies; 35+ messages in thread From: Ilya Yanok @ 2008-10-16 2:22 UTC (permalink / raw) To: linuxppc-dev; +Cc: yanok, dzu, pvr, wd These patches add support for selecting page size on PPC 44x. First one adds support for 16K/64K pages while second one adds support for 256K pages along with some hacks. However there are still number of problems: 1. We can't use default PKMAP_BASE definition with 64KB/256KB pages so we change it. Not sure that it's optimal. Then redefined PKMAP_BASE is not aligned on (1<<PMD_SHIFT), don't know if it is really bad. 2. with 16KB/64KB/256KB pages WARN_ON(!pmd_none(*pmd)) is triggered inside dma_alloc_init() function. Not sure if it is really bad. 3. with 256KB pages ENTRIES_PER_PAGEPAGE in mm/shem.c become zero. 4. We use asm-offsets mechanism to make PTE_SHIFT/PMD_SHIFT available in assembler but we don't really need the power of asm-offsets here. Maybe it will be more convinient to just take these defines out of #ifndef __ASSEMBLY__? But this would change asm-generic... We would appreciate any comment. Regards, Ilya. ^ permalink raw reply [flat|nested] 35+ messages in thread
* [PATCH 1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures. 2008-10-16 2:22 [RFC PATCH] Support for big page sizes on 44x (Updated) Ilya Yanok @ 2008-10-16 2:22 ` Ilya Yanok 2008-10-17 15:54 ` prodyut hazarika ` (2 more replies) 2008-10-16 2:22 ` [PATCH 2/2] powerpc: support for 256K pages on PPC 44x Ilya Yanok ` (2 subsequent siblings) 3 siblings, 3 replies; 35+ messages in thread From: Ilya Yanok @ 2008-10-16 2:22 UTC (permalink / raw) To: linuxppc-dev; +Cc: yanok, dzu, pvr, wd This patch adds support for page sizes bigger than 4K (16K/64K) on PPC 44x. Signed-off-by: Yuri Tikhonov <yur@emcraft.com> Signed-off-by: Vladimir Panfilov <pvr@emcraft.com> Signed-off-by: Ilya Yanok <yanok@emcraft.com> --- arch/powerpc/Kconfig | 26 ++++++++++++++++++++------ arch/powerpc/include/asm/highmem.h | 8 +++++++- arch/powerpc/include/asm/mmu-44x.h | 18 ++++++++++++++++++ arch/powerpc/include/asm/page.h | 13 ++++++++----- arch/powerpc/include/asm/pgtable.h | 3 +++ arch/powerpc/kernel/asm-offsets.c | 4 ++++ arch/powerpc/kernel/head_44x.S | 22 +++++++++++++--------- arch/powerpc/kernel/misc_32.S | 12 ++++++------ arch/powerpc/mm/pgtable_32.c | 9 ++------- arch/powerpc/platforms/Kconfig.cputype | 2 +- 10 files changed, 82 insertions(+), 35 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 587da5e..9627cfd 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -402,16 +402,30 @@ config PPC_HAS_HASH_64K depends on PPC64 default n -config PPC_64K_PAGES - bool "64k page size" - depends on PPC64 - select PPC_HAS_HASH_64K +choice + prompt "Page size" + default PPC_4K_PAGES help - This option changes the kernel logical page size to 64k. On machines + The PAGE_SIZE definition. Increasing the page size may + improve the system performance in some dedicated cases like software + RAID with accelerated calculations. In PPC64 case on machines without processor support for 64k pages, the kernel will simulate them by loading each individual 4k page on demand transparently, while on hardware with such support, it will be used to map normal application pages. + If unsure, set it to 4 KB. + +config PPC_4K_PAGES + bool "4k page size" + +config PPC_16K_PAGES + bool "16k page size" if 44x + +config PPC_64K_PAGES + bool "64k page size" if 44x || PPC64 + select PPC_HAS_HASH_64K if PPC64 + +endchoice config FORCE_MAX_ZONEORDER int "Maximum zone order" @@ -435,7 +449,7 @@ config FORCE_MAX_ZONEORDER config PPC_SUBPAGE_PROT bool "Support setting protections for 4k subpages" - depends on PPC_64K_PAGES + depends on PPC64 && PPC_64K_PAGES help This option adds support for a system call to allow user programs to set access permissions (read/write, readonly, or no access) diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h index 5d99b64..dc1132c 100644 --- a/arch/powerpc/include/asm/highmem.h +++ b/arch/powerpc/include/asm/highmem.h @@ -38,9 +38,15 @@ extern pte_t *pkmap_page_table; * easily, subsequent pte tables have to be allocated in one physical * chunk of RAM. */ +#if defined(CONFIG_PPC_64K_PAGES) && !defined(CONFIG_PPC64) +#define PKMAP_ORDER (27 - PAGE_SHIFT) +#define LAST_PKMAP (1 << PKMAP_ORDER) +#define PKMAP_BASE (FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1)) +#else #define LAST_PKMAP (1 << PTE_SHIFT) -#define LAST_PKMAP_MASK (LAST_PKMAP-1) #define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1)) & PMD_MASK) +#endif +#define LAST_PKMAP_MASK (LAST_PKMAP-1) #define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) #define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h index a825524..2ca18e8 100644 --- a/arch/powerpc/include/asm/mmu-44x.h +++ b/arch/powerpc/include/asm/mmu-44x.h @@ -4,6 +4,8 @@ * PPC440 support */ +#include <asm/page.h> + #define PPC44x_MMUCR_TID 0x000000ff #define PPC44x_MMUCR_STS 0x00010000 @@ -73,4 +75,20 @@ typedef struct { /* Size of the TLBs used for pinning in lowmem */ #define PPC_PIN_SIZE (1 << 28) /* 256M */ +#if (PAGE_SHIFT == 12) +#define PPC44x_TLBE_SIZE PPC44x_TLB_4K +#elif (PAGE_SHIFT == 14) +#define PPC44x_TLBE_SIZE PPC44x_TLB_16K +#elif (PAGE_SHIFT == 16) +#define PPC44x_TLBE_SIZE PPC44x_TLB_64K +#else +#error "Unsupported PAGE_SIZE" +#endif + +#define PPC44x_PGD_OFF_SHIFT (32 - PMD_SHIFT + 2) +#define PPC44x_PGD_OFF_MASK (PMD_SHIFT - 2) +#define PPC44x_PTE_ADD_SHIFT (32 - PMD_SHIFT + PTE_SHIFT + 3) +#define PPC44x_PTE_ADD_MASK (32 - 3 - PTE_SHIFT) +#define PPC44x_RPN_MASK (31 - PAGE_SHIFT) + #endif /* _ASM_POWERPC_MMU_44X_H_ */ diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index e088545..537d5b1 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -15,12 +15,15 @@ #include <asm/types.h> /* - * On PPC32 page size is 4K. For PPC64 we support either 4K or 64K software + * On regular PPC32 page size is 4K (but we support 4K/16K/64K pages + * on PPC44x). For PPC64 we support either 4K or 64K software * page size. When using 64K pages however, whether we are really supporting * 64K pages in HW or not is irrelevant to those definitions. */ -#ifdef CONFIG_PPC_64K_PAGES +#if defined(CONFIG_PPC_64K_PAGES) #define PAGE_SHIFT 16 +#elif defined(CONFIG_PPC_16K_PAGES) +#define PAGE_SHIFT 14 #else #define PAGE_SHIFT 12 #endif @@ -140,7 +143,7 @@ typedef struct { pte_basic_t pte; } pte_t; /* 64k pages additionally define a bigger "real PTE" type that gathers * the "second half" part of the PTE for pseudo 64k pages */ -#ifdef CONFIG_PPC_64K_PAGES +#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC64) typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; #else typedef struct { pte_t pte; } real_pte_t; @@ -180,10 +183,10 @@ typedef pte_basic_t pte_t; #define pte_val(x) (x) #define __pte(x) (x) -#ifdef CONFIG_PPC_64K_PAGES +#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC64) typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; #else -typedef unsigned long real_pte_t; +typedef pte_t real_pte_t; #endif diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index dbb8ca1..0d447fb 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -39,6 +39,9 @@ extern void paging_init(void); #include <asm-generic/pgtable.h> +#define PGD_T_LOG2 (__builtin_ffs(sizeof(pgd_t)) - 1) +#define PMD_T_LOG2 (__builtin_ffs(sizeof(pmd_t)) - 1) +#define PTE_T_LOG2 (__builtin_ffs(sizeof(pte_t)) - 1) /* * This gets called at the end of handling a page fault, when diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 92768d3..98b8bb6 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -375,6 +375,10 @@ int main(void) DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); #endif +#ifdef CONFIG_44x + DEFINE(PMD_SHIFT, PMD_SHIFT); + DEFINE(PTE_SHIFT, PTE_SHIFT); +#endif return 0; } diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index f3a1ea9..6525124 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -391,12 +391,14 @@ interrupt_base: rlwimi r13,r12,10,30,30 /* Load the PTE */ - rlwinm r12, r10, 13, 19, 29 /* Compute pgdir/pmd offset */ + /* Compute pgdir/pmd offset */ + rlwinm r12, r10, PPC44x_PGD_OFF_SHIFT, PPC44x_PGD_OFF_MASK, 29 lwzx r11, r12, r11 /* Get pgd/pmd entry */ rlwinm. r12, r11, 0, 0, 20 /* Extract pt base address */ beq 2f /* Bail if no table */ - rlwimi r12, r10, 23, 20, 28 /* Compute pte address */ + /* Compute pte address */ + rlwimi r12, r10, PPC44x_PTE_ADD_SHIFT, PPC44x_PTE_ADD_MASK, 28 lwz r11, 0(r12) /* Get high word of pte entry */ lwz r12, 4(r12) /* Get low word of pte entry */ @@ -485,12 +487,14 @@ tlb_44x_patch_hwater_D: /* Make up the required permissions */ li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_HWEXEC - rlwinm r12, r10, 13, 19, 29 /* Compute pgdir/pmd offset */ + /* Compute pgdir/pmd offset */ + rlwinm r12, r10, PPC44x_PGD_OFF_SHIFT, PPC44x_PGD_OFF_MASK, 29 lwzx r11, r12, r11 /* Get pgd/pmd entry */ rlwinm. r12, r11, 0, 0, 20 /* Extract pt base address */ beq 2f /* Bail if no table */ - rlwimi r12, r10, 23, 20, 28 /* Compute pte address */ + /* Compute pte address */ + rlwimi r12, r10, PPC44x_PTE_ADD_SHIFT, PPC44x_PTE_ADD_MASK, 28 lwz r11, 0(r12) /* Get high word of pte entry */ lwz r12, 4(r12) /* Get low word of pte entry */ @@ -554,15 +558,15 @@ tlb_44x_patch_hwater_I: */ finish_tlb_load: /* Combine RPN & ERPN an write WS 0 */ - rlwimi r11,r12,0,0,19 + rlwimi r11,r12,0,0,PPC44x_RPN_MASK tlbwe r11,r13,PPC44x_TLB_XLAT /* * Create WS1. This is the faulting address (EPN), * page size, and valid flag. */ - li r11,PPC44x_TLB_VALID | PPC44x_TLB_4K - rlwimi r10,r11,0,20,31 /* Insert valid and page size*/ + li r11,PPC44x_TLB_VALID | PPC44x_TLBE_SIZE + rlwimi r10,r11,0,PPC44x_PTE_ADD_MASK,31/* Insert valid and page size*/ tlbwe r10,r13,PPC44x_TLB_PAGEID /* Write PAGEID */ /* And WS 2 */ @@ -634,12 +638,12 @@ _GLOBAL(set_context) * goes at the beginning of the data segment, which is page-aligned. */ .data - .align 12 + .align PAGE_SHIFT .globl sdata sdata: .globl empty_zero_page empty_zero_page: - .space 4096 + .space PAGE_SIZE /* * To support >32-bit physical addresses, we use an 8KB pgdir. diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 7a6dfbc..0110fcd 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -589,8 +589,8 @@ _GLOBAL(__flush_dcache_icache) BEGIN_FTR_SECTION blr END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) - rlwinm r3,r3,0,0,19 /* Get page base address */ - li r4,4096/L1_CACHE_BYTES /* Number of lines in a page */ + rlwinm r3,r3,0,0,PPC44x_RPN_MASK /* Get page base address */ + li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */ mtctr r4 mr r6,r3 0: dcbst 0,r3 /* Write line to ram */ @@ -630,8 +630,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) rlwinm r0,r10,0,28,26 /* clear DR */ mtmsr r0 isync - rlwinm r3,r3,0,0,19 /* Get page base address */ - li r4,4096/L1_CACHE_BYTES /* Number of lines in a page */ + rlwinm r3,r3,0,0,PPC44x_RPN_MASK /* Get page base address */ + li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */ mtctr r4 mr r6,r3 0: dcbst 0,r3 /* Write line to ram */ @@ -655,7 +655,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) * void clear_pages(void *page, int order) ; */ _GLOBAL(clear_pages) - li r0,4096/L1_CACHE_BYTES + li r0,PAGE_SIZE/L1_CACHE_BYTES slw r0,r0,r4 mtctr r0 #ifdef CONFIG_8xx @@ -713,7 +713,7 @@ _GLOBAL(copy_page) dcbt r5,r4 li r11,L1_CACHE_BYTES+4 #endif /* MAX_COPY_PREFETCH */ - li r0,4096/L1_CACHE_BYTES - MAX_COPY_PREFETCH + li r0,PAGE_SIZE/L1_CACHE_BYTES - MAX_COPY_PREFETCH crclr 4*cr0+eq 2: mtctr r0 diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 2001abd..4eed001 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -72,12 +72,7 @@ extern unsigned long p_mapped_by_tlbcam(unsigned long pa); #define p_mapped_by_tlbcam(x) (0UL) #endif /* HAVE_TLBCAM */ -#ifdef CONFIG_PTE_64BIT -/* 44x uses an 8kB pgdir because it has 8-byte Linux PTEs. */ -#define PGDIR_ORDER 1 -#else -#define PGDIR_ORDER 0 -#endif +#define PGDIR_ORDER max(32 + PGD_T_LOG2 - PGDIR_SHIFT - PAGE_SHIFT, 0) pgd_t *pgd_alloc(struct mm_struct *mm) { @@ -400,7 +395,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable) #endif /* CONFIG_DEBUG_PAGEALLOC */ static int fixmaps; -unsigned long FIXADDR_TOP = 0xfffff000; +unsigned long FIXADDR_TOP = (-PAGE_SIZE); EXPORT_SYMBOL(FIXADDR_TOP); void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 7f65127..a1386a4 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -202,7 +202,7 @@ config PPC_STD_MMU_32 config PPC_MM_SLICES bool - default y if HUGETLB_PAGE || PPC_64K_PAGES + default y if HUGETLB_PAGE || (PPC64 && PPC_64K_PAGES) default n config VIRT_CPU_ACCOUNTING -- 1.5.6.1 ^ permalink raw reply related [flat|nested] 35+ messages in thread
* Re: [PATCH 1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures. 2008-10-16 2:22 ` [PATCH 1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures Ilya Yanok @ 2008-10-17 15:54 ` prodyut hazarika 2008-10-18 12:58 ` Josh Boyer 2008-10-22 14:28 ` Christian Ehrhardt 2008-11-10 15:09 ` [1/2] " Milton Miller 2 siblings, 1 reply; 35+ messages in thread From: prodyut hazarika @ 2008-10-17 15:54 UTC (permalink / raw) To: Ilya Yanok; +Cc: linuxppc-dev, pvr, dzu, wd On Wed, Oct 15, 2008 at 7:22 PM, Ilya Yanok <yanok@emcraft.com> wrote: > This patch adds support for page sizes bigger than 4K (16K/64K) on > PPC 44x. > This patch looks good to me. Seems that all the review comments have been incorporated. Josh, it would be great if this patch is pulled into the mainline kernel. I have seen significant performance improvement with RAID0/5 by using 64K pages. ^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures. 2008-10-17 15:54 ` prodyut hazarika @ 2008-10-18 12:58 ` Josh Boyer 2008-10-18 20:36 ` prodyut hazarika 0 siblings, 1 reply; 35+ messages in thread From: Josh Boyer @ 2008-10-18 12:58 UTC (permalink / raw) To: prodyut hazarika; +Cc: linuxppc-dev, dzu, pvr, Ilya Yanok, wd On Fri, 17 Oct 2008 08:54:52 -0700 "prodyut hazarika" <prodyuth@gmail.com> wrote: > On Wed, Oct 15, 2008 at 7:22 PM, Ilya Yanok <yanok@emcraft.com> wrote: > > This patch adds support for page sizes bigger than 4K (16K/64K) on > > PPC 44x. > > > > This patch looks good to me. Seems that all the review comments have > been incorporated. > > Josh, it would be great if this patch is pulled into the mainline > kernel. I have seen significant performance improvement with RAID0/5 > by using 64K pages. It helps if you CC the person you're writing too :). Anyway, I looked over it briefly and agree it looks pretty good. A bit late for 2.6.28, but I'll do a more thorough review and get it in for 2.6.29. josh ^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures. 2008-10-18 12:58 ` Josh Boyer @ 2008-10-18 20:36 ` prodyut hazarika 0 siblings, 0 replies; 35+ messages in thread From: prodyut hazarika @ 2008-10-18 20:36 UTC (permalink / raw) To: Josh Boyer; +Cc: linuxppc-dev, dzu, pvr, Ilya Yanok, wd > It helps if you CC the person you're writing too :). Thanks Josh for pointing this out :-) I will be careful in future. > Anyway, I looked over it briefly and agree it looks pretty good. A bit > late for 2.6.28, but I'll do a more thorough review and get it in for > 2.6.29. > Great. Look forward to seeing this on the mainline kernel. ^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures. 2008-10-16 2:22 ` [PATCH 1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures Ilya Yanok 2008-10-17 15:54 ` prodyut hazarika @ 2008-10-22 14:28 ` Christian Ehrhardt 2008-10-22 17:54 ` Christian Ehrhardt 2008-10-31 23:23 ` Hollis Blanchard 2008-11-10 15:09 ` [1/2] " Milton Miller 2 siblings, 2 replies; 35+ messages in thread From: Christian Ehrhardt @ 2008-10-22 14:28 UTC (permalink / raw) To: Ilya Yanok; +Cc: linuxppc-dev, Hollis Blanchard, pvr, dzu, Wolfgang Denk Hi Ilya, I just tried your patch on my 440 board because it would help us in our environment. Unfortunately I run into a bug on early boot (mark_bootmem). A log can be found in this mail, this is the bug when running with 64k page size. I tried this with and without your 2/2 265k patch and also with page size configured to 16k, the error is the same in all cases. I used an earlier version of your patch in the past and it worked fine. Applying this old patch causes the same problem. Therefore I expect that there was some other code changed that breaks with page size != 4k. I did not check that in detail yet, but I would be happy for every hint I could get to fix this. => bootm ## Booting kernel from Legacy Image at 04000000 ... Image Name: Linux-2.6.27-dirty Image Type: PowerPC Linux Kernel Image (gzip compressed) Data Size: 1512203 Bytes = 1.4 MB Load Address: 00400000 Entry Point: 00400458 Verifying Checksum ... OK Uncompressing Kernel Image ... OK CPU clock-frequency <- 0x27bc86a4 (667MHz) CPU timebase-frequency <- 0x27bc86a4 (667MHz) /plb: clock-frequency <- 9ef21a9 (167MHz) /plb/opb: clock-frequency <- 4f790d4 (83MHz) /plb/opb/ebc: clock-frequency <- 34fb5e3 (56MHz) /plb/opb/serial@ef600300: clock-frequency <- a8c000 (11MHz) /plb/opb/serial@ef600400: clock-frequency <- a8c000 (11MHz) /plb/opb/serial@ef600500: clock-frequency <- 42ecac (4MHz) /plb/opb/serial@ef600600: clock-frequency <- 42ecac (4MHz) Memory <- <0x0 0x0 0xffff000> (255MB) ethernet0: local-mac-address <- 00:10:ec:00:e2:3e ethernet1: local-mac-address <- 00:10:ec:80:e2:3e zImage starting: loaded at 0x00400000 (sp: 0x0fe3c820) Allocating 0x3c54dc bytes for kernel ... gunzipping (0x00000000 <- 0x0040e000:0x007a2428)...done 0x380a90 bytes Linux/PowerPC load: console=ttyS0,115200 ip=dhcp nfsroot=192.168.1.2:/home/paelzer/ubuntu_ppc.8.04 root=/dev/nfs rw Finalizing device tree... flat tree at 0x40bed8 Using PowerPC 44x Platform machine description Linux version 2.6.27-dirty (paelzer@HelionPrime) (gcc version 4.2.3) #5 Wed Oct 22 15:15:40 CEST 2008 console [udbg0] enabled ------------[ cut here ]------------ Kernel BUG at c02be6cc [verbose debug info unavailable] Oops: Exception in kernel mode, sig: 5 [#1] PowerPC 44x Platform NIP: c02be6cc LR: c02ba4e4 CTR: 00000000 REGS: c0351eb0 TRAP: 0700 Not tainted (2.6.27-dirty) MSR: 00021000 <ME> CR: 22004022 XER: 0000005f TASK = c03204a8[0] 'swapper' THREAD: c0350000 GPR00: c02d0a1c c0351f60 c03204a8 00000fff 00001000 00000001 00000000 00000000 GPR08: e0000000 00000000 ffffffff c02d0a14 22000024 00000000 0ffa6800 0ffbf000 GPR16: c02ed838 bfe8f45c 00000000 00000000 0ffa7500 0fe3cb20 00000001 c02d0a1c GPR24: 00000000 00000001 00001000 00000fff c0390000 00000fff c039d1d0 c02d0a08 NIP [c02be6cc] mark_bootmem+0xe0/0x124 LR [c02ba4e4] do_init_bootmem+0x134/0x168 Call Trace: [c0351f60] [c02be6a4] mark_bootmem+0xb8/0x124 (unreliable) [c0351f90] [c02ba4e4] do_init_bootmem+0x134/0x168 [c0351fb0] [c02b8e00] setup_arch+0x13c/0x1b8 [c0351fc0] [c02b066c] start_kernel+0x94/0x2ac [c0351ff0] [c00001e8] skpinv+0x190/0x1cc Instruction dump: 7f07c378 4bfffe15 7c7e1b78 4192000c 2f830000 409e0024 7f9ae000 419e0050 817f0014 83bf0004 3bebffec 4bffff68 <0fe00000> 48000000 7f63db78 7fa4eb78 ---[ end trace 31fd0ba7d8756001 ]--- Kernel panic - not syncing: Attempted to kill the idle task! Rebooting in 180 seconds.. Ilya Yanok wrote: > This patch adds support for page sizes bigger than 4K (16K/64K) on > PPC 44x. > > Signed-off-by: Yuri Tikhonov <yur@emcraft.com> > Signed-off-by: Vladimir Panfilov <pvr@emcraft.com> > Signed-off-by: Ilya Yanok <yanok@emcraft.com> > --- > arch/powerpc/Kconfig | 26 ++++++++++++++++++++------ > arch/powerpc/include/asm/highmem.h | 8 +++++++- > arch/powerpc/include/asm/mmu-44x.h | 18 ++++++++++++++++++ > arch/powerpc/include/asm/page.h | 13 ++++++++----- > arch/powerpc/include/asm/pgtable.h | 3 +++ > arch/powerpc/kernel/asm-offsets.c | 4 ++++ > arch/powerpc/kernel/head_44x.S | 22 +++++++++++++--------- > arch/powerpc/kernel/misc_32.S | 12 ++++++------ > arch/powerpc/mm/pgtable_32.c | 9 ++------- > arch/powerpc/platforms/Kconfig.cputype | 2 +- > 10 files changed, 82 insertions(+), 35 deletions(-) > > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig > index 587da5e..9627cfd 100644 > --- a/arch/powerpc/Kconfig > +++ b/arch/powerpc/Kconfig > @@ -402,16 +402,30 @@ config PPC_HAS_HASH_64K > depends on PPC64 > default n > > -config PPC_64K_PAGES > - bool "64k page size" > - depends on PPC64 > - select PPC_HAS_HASH_64K > +choice > + prompt "Page size" > + default PPC_4K_PAGES > help > - This option changes the kernel logical page size to 64k. On machines > + The PAGE_SIZE definition. Increasing the page size may > + improve the system performance in some dedicated cases like software > + RAID with accelerated calculations. In PPC64 case on machines > without processor support for 64k pages, the kernel will simulate > them by loading each individual 4k page on demand transparently, > while on hardware with such support, it will be used to map > normal application pages. > + If unsure, set it to 4 KB. > + > +config PPC_4K_PAGES > + bool "4k page size" > + > +config PPC_16K_PAGES > + bool "16k page size" if 44x > + > +config PPC_64K_PAGES > + bool "64k page size" if 44x || PPC64 > + select PPC_HAS_HASH_64K if PPC64 > + > +endchoice > > config FORCE_MAX_ZONEORDER > int "Maximum zone order" > @@ -435,7 +449,7 @@ config FORCE_MAX_ZONEORDER > > config PPC_SUBPAGE_PROT > bool "Support setting protections for 4k subpages" > - depends on PPC_64K_PAGES > + depends on PPC64 && PPC_64K_PAGES > help > This option adds support for a system call to allow user programs > to set access permissions (read/write, readonly, or no access) > diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h > index 5d99b64..dc1132c 100644 > --- a/arch/powerpc/include/asm/highmem.h > +++ b/arch/powerpc/include/asm/highmem.h > @@ -38,9 +38,15 @@ extern pte_t *pkmap_page_table; > * easily, subsequent pte tables have to be allocated in one physical > * chunk of RAM. > */ > +#if defined(CONFIG_PPC_64K_PAGES) && !defined(CONFIG_PPC64) > +#define PKMAP_ORDER (27 - PAGE_SHIFT) > +#define LAST_PKMAP (1 << PKMAP_ORDER) > +#define PKMAP_BASE (FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1)) > +#else > #define LAST_PKMAP (1 << PTE_SHIFT) > -#define LAST_PKMAP_MASK (LAST_PKMAP-1) > #define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1)) & PMD_MASK) > +#endif > +#define LAST_PKMAP_MASK (LAST_PKMAP-1) > #define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) > #define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) > > diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h > index a825524..2ca18e8 100644 > --- a/arch/powerpc/include/asm/mmu-44x.h > +++ b/arch/powerpc/include/asm/mmu-44x.h > @@ -4,6 +4,8 @@ > * PPC440 support > */ > > +#include <asm/page.h> > + > #define PPC44x_MMUCR_TID 0x000000ff > #define PPC44x_MMUCR_STS 0x00010000 > > @@ -73,4 +75,20 @@ typedef struct { > /* Size of the TLBs used for pinning in lowmem */ > #define PPC_PIN_SIZE (1 << 28) /* 256M */ > > +#if (PAGE_SHIFT == 12) > +#define PPC44x_TLBE_SIZE PPC44x_TLB_4K > +#elif (PAGE_SHIFT == 14) > +#define PPC44x_TLBE_SIZE PPC44x_TLB_16K > +#elif (PAGE_SHIFT == 16) > +#define PPC44x_TLBE_SIZE PPC44x_TLB_64K > +#else > +#error "Unsupported PAGE_SIZE" > +#endif > + > +#define PPC44x_PGD_OFF_SHIFT (32 - PMD_SHIFT + 2) > +#define PPC44x_PGD_OFF_MASK (PMD_SHIFT - 2) > +#define PPC44x_PTE_ADD_SHIFT (32 - PMD_SHIFT + PTE_SHIFT + 3) > +#define PPC44x_PTE_ADD_MASK (32 - 3 - PTE_SHIFT) > +#define PPC44x_RPN_MASK (31 - PAGE_SHIFT) > + > #endif /* _ASM_POWERPC_MMU_44X_H_ */ > diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h > index e088545..537d5b1 100644 > --- a/arch/powerpc/include/asm/page.h > +++ b/arch/powerpc/include/asm/page.h > @@ -15,12 +15,15 @@ > #include <asm/types.h> > > /* > - * On PPC32 page size is 4K. For PPC64 we support either 4K or 64K software > + * On regular PPC32 page size is 4K (but we support 4K/16K/64K pages > + * on PPC44x). For PPC64 we support either 4K or 64K software > * page size. When using 64K pages however, whether we are really supporting > * 64K pages in HW or not is irrelevant to those definitions. > */ > -#ifdef CONFIG_PPC_64K_PAGES > +#if defined(CONFIG_PPC_64K_PAGES) > #define PAGE_SHIFT 16 > +#elif defined(CONFIG_PPC_16K_PAGES) > +#define PAGE_SHIFT 14 > #else > #define PAGE_SHIFT 12 > #endif > @@ -140,7 +143,7 @@ typedef struct { pte_basic_t pte; } pte_t; > /* 64k pages additionally define a bigger "real PTE" type that gathers > * the "second half" part of the PTE for pseudo 64k pages > */ > -#ifdef CONFIG_PPC_64K_PAGES > +#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC64) > typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; > #else > typedef struct { pte_t pte; } real_pte_t; > @@ -180,10 +183,10 @@ typedef pte_basic_t pte_t; > #define pte_val(x) (x) > #define __pte(x) (x) > > -#ifdef CONFIG_PPC_64K_PAGES > +#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC64) > typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; > #else > -typedef unsigned long real_pte_t; > +typedef pte_t real_pte_t; > #endif > > > diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h > index dbb8ca1..0d447fb 100644 > --- a/arch/powerpc/include/asm/pgtable.h > +++ b/arch/powerpc/include/asm/pgtable.h > @@ -39,6 +39,9 @@ extern void paging_init(void); > > #include <asm-generic/pgtable.h> > > +#define PGD_T_LOG2 (__builtin_ffs(sizeof(pgd_t)) - 1) > +#define PMD_T_LOG2 (__builtin_ffs(sizeof(pmd_t)) - 1) > +#define PTE_T_LOG2 (__builtin_ffs(sizeof(pte_t)) - 1) > > /* > * This gets called at the end of handling a page fault, when > diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c > index 92768d3..98b8bb6 100644 > --- a/arch/powerpc/kernel/asm-offsets.c > +++ b/arch/powerpc/kernel/asm-offsets.c > @@ -375,6 +375,10 @@ int main(void) > DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); > DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); > #endif > +#ifdef CONFIG_44x > + DEFINE(PMD_SHIFT, PMD_SHIFT); > + DEFINE(PTE_SHIFT, PTE_SHIFT); > +#endif > > return 0; > } > diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S > index f3a1ea9..6525124 100644 > --- a/arch/powerpc/kernel/head_44x.S > +++ b/arch/powerpc/kernel/head_44x.S > @@ -391,12 +391,14 @@ interrupt_base: > rlwimi r13,r12,10,30,30 > > /* Load the PTE */ > - rlwinm r12, r10, 13, 19, 29 /* Compute pgdir/pmd offset */ > + /* Compute pgdir/pmd offset */ > + rlwinm r12, r10, PPC44x_PGD_OFF_SHIFT, PPC44x_PGD_OFF_MASK, 29 > lwzx r11, r12, r11 /* Get pgd/pmd entry */ > rlwinm. r12, r11, 0, 0, 20 /* Extract pt base address */ > beq 2f /* Bail if no table */ > > - rlwimi r12, r10, 23, 20, 28 /* Compute pte address */ > + /* Compute pte address */ > + rlwimi r12, r10, PPC44x_PTE_ADD_SHIFT, PPC44x_PTE_ADD_MASK, 28 > lwz r11, 0(r12) /* Get high word of pte entry */ > lwz r12, 4(r12) /* Get low word of pte entry */ > > @@ -485,12 +487,14 @@ tlb_44x_patch_hwater_D: > /* Make up the required permissions */ > li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_HWEXEC > > - rlwinm r12, r10, 13, 19, 29 /* Compute pgdir/pmd offset */ > + /* Compute pgdir/pmd offset */ > + rlwinm r12, r10, PPC44x_PGD_OFF_SHIFT, PPC44x_PGD_OFF_MASK, 29 > lwzx r11, r12, r11 /* Get pgd/pmd entry */ > rlwinm. r12, r11, 0, 0, 20 /* Extract pt base address */ > beq 2f /* Bail if no table */ > > - rlwimi r12, r10, 23, 20, 28 /* Compute pte address */ > + /* Compute pte address */ > + rlwimi r12, r10, PPC44x_PTE_ADD_SHIFT, PPC44x_PTE_ADD_MASK, 28 > lwz r11, 0(r12) /* Get high word of pte entry */ > lwz r12, 4(r12) /* Get low word of pte entry */ > > @@ -554,15 +558,15 @@ tlb_44x_patch_hwater_I: > */ > finish_tlb_load: > /* Combine RPN & ERPN an write WS 0 */ > - rlwimi r11,r12,0,0,19 > + rlwimi r11,r12,0,0,PPC44x_RPN_MASK > tlbwe r11,r13,PPC44x_TLB_XLAT > > /* > * Create WS1. This is the faulting address (EPN), > * page size, and valid flag. > */ > - li r11,PPC44x_TLB_VALID | PPC44x_TLB_4K > - rlwimi r10,r11,0,20,31 /* Insert valid and page size*/ > + li r11,PPC44x_TLB_VALID | PPC44x_TLBE_SIZE > + rlwimi r10,r11,0,PPC44x_PTE_ADD_MASK,31/* Insert valid and page size*/ > tlbwe r10,r13,PPC44x_TLB_PAGEID /* Write PAGEID */ > > /* And WS 2 */ > @@ -634,12 +638,12 @@ _GLOBAL(set_context) > * goes at the beginning of the data segment, which is page-aligned. > */ > .data > - .align 12 > + .align PAGE_SHIFT > .globl sdata > sdata: > .globl empty_zero_page > empty_zero_page: > - .space 4096 > + .space PAGE_SIZE > > /* > * To support >32-bit physical addresses, we use an 8KB pgdir. > diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S > index 7a6dfbc..0110fcd 100644 > --- a/arch/powerpc/kernel/misc_32.S > +++ b/arch/powerpc/kernel/misc_32.S > @@ -589,8 +589,8 @@ _GLOBAL(__flush_dcache_icache) > BEGIN_FTR_SECTION > blr > END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) > - rlwinm r3,r3,0,0,19 /* Get page base address */ > - li r4,4096/L1_CACHE_BYTES /* Number of lines in a page */ > + rlwinm r3,r3,0,0,PPC44x_RPN_MASK /* Get page base address */ > + li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */ > mtctr r4 > mr r6,r3 > 0: dcbst 0,r3 /* Write line to ram */ > @@ -630,8 +630,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) > rlwinm r0,r10,0,28,26 /* clear DR */ > mtmsr r0 > isync > - rlwinm r3,r3,0,0,19 /* Get page base address */ > - li r4,4096/L1_CACHE_BYTES /* Number of lines in a page */ > + rlwinm r3,r3,0,0,PPC44x_RPN_MASK /* Get page base address */ > + li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */ > mtctr r4 > mr r6,r3 > 0: dcbst 0,r3 /* Write line to ram */ > @@ -655,7 +655,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) > * void clear_pages(void *page, int order) ; > */ > _GLOBAL(clear_pages) > - li r0,4096/L1_CACHE_BYTES > + li r0,PAGE_SIZE/L1_CACHE_BYTES > slw r0,r0,r4 > mtctr r0 > #ifdef CONFIG_8xx > @@ -713,7 +713,7 @@ _GLOBAL(copy_page) > dcbt r5,r4 > li r11,L1_CACHE_BYTES+4 > #endif /* MAX_COPY_PREFETCH */ > - li r0,4096/L1_CACHE_BYTES - MAX_COPY_PREFETCH > + li r0,PAGE_SIZE/L1_CACHE_BYTES - MAX_COPY_PREFETCH > crclr 4*cr0+eq > 2: > mtctr r0 > diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c > index 2001abd..4eed001 100644 > --- a/arch/powerpc/mm/pgtable_32.c > +++ b/arch/powerpc/mm/pgtable_32.c > @@ -72,12 +72,7 @@ extern unsigned long p_mapped_by_tlbcam(unsigned long pa); > #define p_mapped_by_tlbcam(x) (0UL) > #endif /* HAVE_TLBCAM */ > > -#ifdef CONFIG_PTE_64BIT > -/* 44x uses an 8kB pgdir because it has 8-byte Linux PTEs. */ > -#define PGDIR_ORDER 1 > -#else > -#define PGDIR_ORDER 0 > -#endif > +#define PGDIR_ORDER max(32 + PGD_T_LOG2 - PGDIR_SHIFT - PAGE_SHIFT, 0) > > pgd_t *pgd_alloc(struct mm_struct *mm) > { > @@ -400,7 +395,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable) > #endif /* CONFIG_DEBUG_PAGEALLOC */ > > static int fixmaps; > -unsigned long FIXADDR_TOP = 0xfffff000; > +unsigned long FIXADDR_TOP = (-PAGE_SIZE); > EXPORT_SYMBOL(FIXADDR_TOP); > > void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) > diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype > index 7f65127..a1386a4 100644 > --- a/arch/powerpc/platforms/Kconfig.cputype > +++ b/arch/powerpc/platforms/Kconfig.cputype > @@ -202,7 +202,7 @@ config PPC_STD_MMU_32 > > config PPC_MM_SLICES > bool > - default y if HUGETLB_PAGE || PPC_64K_PAGES > + default y if HUGETLB_PAGE || (PPC64 && PPC_64K_PAGES) > default n > > config VIRT_CPU_ACCOUNTING > -- Grüsse / regards, Christian Ehrhardt IBM Linux Technology Center, Open Virtualization ^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures. 2008-10-22 14:28 ` Christian Ehrhardt @ 2008-10-22 17:54 ` Christian Ehrhardt 2008-10-31 23:23 ` Hollis Blanchard 1 sibling, 0 replies; 35+ messages in thread From: Christian Ehrhardt @ 2008-10-22 17:54 UTC (permalink / raw) To: Ilya Yanok; +Cc: linuxppc-dev, Hollis Blanchard, pvr, dzu, Wolfgang Denk Ilya, here the snippet you asked for with CONFIG_DEBUG_BUGVERBOSE enabled and bootmem_debug set. ## Booting kernel from Legacy Image at 04000000 ... Image Name: Linux-2.6.27-dirty Image Type: PowerPC Linux Kernel Image (gzip compressed) Data Size: 1521505 Bytes = 1.5 MB Load Address: 00400000 Entry Point: 00400458 Verifying Checksum ... OK Uncompressing Kernel Image ... OK CPU clock-frequency <- 0x27bc86a4 (667MHz) CPU timebase-frequency <- 0x27bc86a4 (667MHz) /plb: clock-frequency <- 9ef21a9 (167MHz) /plb/opb: clock-frequency <- 4f790d4 (83MHz) /plb/opb/ebc: clock-frequency <- 34fb5e3 (56MHz) /plb/opb/serial@ef600300: clock-frequency <- a8c000 (11MHz) /plb/opb/serial@ef600400: clock-frequency <- a8c000 (11MHz) /plb/opb/serial@ef600500: clock-frequency <- 42ecac (4MHz) /plb/opb/serial@ef600600: clock-frequency <- 42ecac (4MHz) Memory <- <0x0 0x0 0xffff000> (255MB) ethernet0: local-mac-address <- 00:10:ec:00:e2:3e ethernet1: local-mac-address <- 00:10:ec:80:e2:3e zImage starting: loaded at 0x00400000 (sp: 0x0fe3c820) Allocating 0x3d54dc bytes for kernel ... gunzipping (0x00000000 <- 0x0040e000:0x007b24a4)...done 0x390af8 bytes Linux/PowerPC load: console=ttyS0,115200 ip=dhcp nfsroot=192.168.1.2:/home/paelzer/ubuntu_ppc.8.04 root=/dev/nfs rw bootmem_debug Finalizing device tree... flat tree at 0x40bed8 Using PowerPC 44x Platform machine description Linux version 2.6.27-dirty (paelzer@HelionPrime) (gcc version 4.2.3) #12 Wed Oct 22 19:40:49 CEST 2008 console [udbg0] enabled bootmem::init_bootmem_core nid=0 start=0 map=ffd end=fff mapsize=200 bootmem::mark_bootmem_node nid=0 start=0 end=fff reserve=0 flags=0 bootmem::__free nid=0 start=0 end=fff bootmem::mark_bootmem_node nid=0 start=0 end=3e reserve=1 flags=0 bootmem::__reserve nid=0 start=0 end=3e flags=0 bootmem::mark_bootmem_node nid=0 start=40 end=41 reserve=1 flags=0 bootmem::__reserve nid=0 start=40 end=41 flags=0 bootmem::mark_bootmem_node nid=0 start=ffd end=fff reserve=1 flags=0 bootmem::__reserve nid=0 start=ffd end=fff flags=0 ------------[ cut here ]------------ kernel BUG at mm/bootmem.c:320! Oops: Exception in kernel mode, sig: 5 [#1] PowerPC 44x Platform NIP: c02ce838 LR: c02ca4e4 CTR: c000dcf8 REGS: c0361eb0 TRAP: 0700 Not tainted (2.6.27-dirty) MSR: 00021000 <ME> CR: 22004022 XER: 0000005f TASK = c03304a8[0] 'swapper' THREAD: c0360000 GPR00: c02e0c98 c0361f60 c03304a8 00000fff 00001000 00000001 00000000 00004000 GPR08: e0000000 00000000 ffffffff c02e0c90 22000024 00000000 0ffa6800 0ffbf000 GPR16: 100c0000 00000000 100c0000 00000000 0ffa7500 0fe3cb20 00000001 c02e0c98 GPR24: 00000000 00000001 00001000 00000fff c03a0000 00000fff c03ad1e0 c02e0c84 NIP [c02ce838] mark_bootmem+0xe0/0x124 LR [c02ca4e4] do_init_bootmem+0x134/0x168 Call Trace: [c0361f60] [c02ce810] mark_bootmem+0xb8/0x124 (unreliable) [c0361f90] [c02ca4e4] do_init_bootmem+0x134/0x168 [c0361fb0] [c02c8e00] setup_arch+0x13c/0x1b8 [c0361fc0] [c02c066c] start_kernel+0x94/0x2ac [c0361ff0] [c00001e8] skpinv+0x190/0x1cc Instruction dump: 7f07c378 4bfffe15 7c7e1b78 4192000c 2f830000 409e0024 7f9ae000 419e0050 817f0014 83bf0004 3bebffec 4bffff68 <0fe00000> 48000000 7f63db78 7fa4eb78 ---[ end trace 31fd0ba7d8756001 ]--- Kernel panic - not syncing: Attempted to kill the idle task! Rebooting in 180 seconds.. Christian Ehrhardt wrote: > Hi Ilya, > I just tried your patch on my 440 board because it would help us in > our environment. > Unfortunately I run into a bug on early boot (mark_bootmem). > > A log can be found in this mail, this is the bug when running with 64k > page size. > I tried this with and without your 2/2 265k patch and also with page > size configured to 16k, the error is the same in all cases. > > I used an earlier version of your patch in the past and it worked > fine. Applying this old patch causes the same problem. > Therefore I expect that there was some other code changed that breaks > with page size != 4k. > > I did not check that in detail yet, but I would be happy for every > hint I could get to fix this. > > => bootm > ## Booting kernel from Legacy Image at 04000000 ... > Image Name: Linux-2.6.27-dirty > Image Type: PowerPC Linux Kernel Image (gzip compressed) > Data Size: 1512203 Bytes = 1.4 MB > Load Address: 00400000 > Entry Point: 00400458 > Verifying Checksum ... OK > Uncompressing Kernel Image ... OK > CPU clock-frequency <- 0x27bc86a4 (667MHz) > CPU timebase-frequency <- 0x27bc86a4 (667MHz) > /plb: clock-frequency <- 9ef21a9 (167MHz) > /plb/opb: clock-frequency <- 4f790d4 (83MHz) > /plb/opb/ebc: clock-frequency <- 34fb5e3 (56MHz) > /plb/opb/serial@ef600300: clock-frequency <- a8c000 (11MHz) > /plb/opb/serial@ef600400: clock-frequency <- a8c000 (11MHz) > /plb/opb/serial@ef600500: clock-frequency <- 42ecac (4MHz) > /plb/opb/serial@ef600600: clock-frequency <- 42ecac (4MHz) > Memory <- <0x0 0x0 0xffff000> (255MB) > ethernet0: local-mac-address <- 00:10:ec:00:e2:3e > ethernet1: local-mac-address <- 00:10:ec:80:e2:3e > > zImage starting: loaded at 0x00400000 (sp: 0x0fe3c820) > Allocating 0x3c54dc bytes for kernel ... > gunzipping (0x00000000 <- 0x0040e000:0x007a2428)...done 0x380a90 bytes > > Linux/PowerPC load: console=ttyS0,115200 ip=dhcp > nfsroot=192.168.1.2:/home/paelzer/ubuntu_ppc.8.04 root=/dev/nfs rw > Finalizing device tree... flat tree at 0x40bed8 > Using PowerPC 44x Platform machine description > Linux version 2.6.27-dirty (paelzer@HelionPrime) (gcc version 4.2.3) > #5 Wed Oct 22 15:15:40 CEST 2008 > console [udbg0] enabled > ------------[ cut here ]------------ > Kernel BUG at c02be6cc [verbose debug info unavailable] > Oops: Exception in kernel mode, sig: 5 [#1] > PowerPC 44x Platform > NIP: c02be6cc LR: c02ba4e4 CTR: 00000000 > REGS: c0351eb0 TRAP: 0700 Not tainted (2.6.27-dirty) > MSR: 00021000 <ME> CR: 22004022 XER: 0000005f > TASK = c03204a8[0] 'swapper' THREAD: c0350000 > GPR00: c02d0a1c c0351f60 c03204a8 00000fff 00001000 00000001 00000000 > 00000000 > GPR08: e0000000 00000000 ffffffff c02d0a14 22000024 00000000 0ffa6800 > 0ffbf000 > GPR16: c02ed838 bfe8f45c 00000000 00000000 0ffa7500 0fe3cb20 00000001 > c02d0a1c > GPR24: 00000000 00000001 00001000 00000fff c0390000 00000fff c039d1d0 > c02d0a08 > NIP [c02be6cc] mark_bootmem+0xe0/0x124 > LR [c02ba4e4] do_init_bootmem+0x134/0x168 > Call Trace: > [c0351f60] [c02be6a4] mark_bootmem+0xb8/0x124 (unreliable) > [c0351f90] [c02ba4e4] do_init_bootmem+0x134/0x168 > [c0351fb0] [c02b8e00] setup_arch+0x13c/0x1b8 > [c0351fc0] [c02b066c] start_kernel+0x94/0x2ac > [c0351ff0] [c00001e8] skpinv+0x190/0x1cc > Instruction dump: > 7f07c378 4bfffe15 7c7e1b78 4192000c 2f830000 409e0024 7f9ae000 419e0050 > 817f0014 83bf0004 3bebffec 4bffff68 <0fe00000> 48000000 7f63db78 7fa4eb78 > ---[ end trace 31fd0ba7d8756001 ]--- > Kernel panic - not syncing: Attempted to kill the idle task! > Rebooting in 180 seconds.. > > > Ilya Yanok wrote: >> This patch adds support for page sizes bigger than 4K (16K/64K) on >> PPC 44x. >> >> Signed-off-by: Yuri Tikhonov <yur@emcraft.com> >> Signed-off-by: Vladimir Panfilov <pvr@emcraft.com> >> Signed-off-by: Ilya Yanok <yanok@emcraft.com> >> --- >> arch/powerpc/Kconfig | 26 >> ++++++++++++++++++++------ >> arch/powerpc/include/asm/highmem.h | 8 +++++++- >> arch/powerpc/include/asm/mmu-44x.h | 18 ++++++++++++++++++ >> arch/powerpc/include/asm/page.h | 13 ++++++++----- >> arch/powerpc/include/asm/pgtable.h | 3 +++ >> arch/powerpc/kernel/asm-offsets.c | 4 ++++ >> arch/powerpc/kernel/head_44x.S | 22 +++++++++++++--------- >> arch/powerpc/kernel/misc_32.S | 12 ++++++------ >> arch/powerpc/mm/pgtable_32.c | 9 ++------- >> arch/powerpc/platforms/Kconfig.cputype | 2 +- >> 10 files changed, 82 insertions(+), 35 deletions(-) >> >> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig >> index 587da5e..9627cfd 100644 >> --- a/arch/powerpc/Kconfig >> +++ b/arch/powerpc/Kconfig >> @@ -402,16 +402,30 @@ config PPC_HAS_HASH_64K >> depends on PPC64 >> default n >> >> -config PPC_64K_PAGES >> - bool "64k page size" >> - depends on PPC64 >> - select PPC_HAS_HASH_64K >> +choice >> + prompt "Page size" >> + default PPC_4K_PAGES >> help >> - This option changes the kernel logical page size to 64k. On >> machines >> + The PAGE_SIZE definition. Increasing the page size may >> + improve the system performance in some dedicated cases like >> software >> + RAID with accelerated calculations. In PPC64 case on machines >> without processor support for 64k pages, the kernel will simulate >> them by loading each individual 4k page on demand transparently, >> while on hardware with such support, it will be used to map >> normal application pages. >> + If unsure, set it to 4 KB. >> + >> +config PPC_4K_PAGES >> + bool "4k page size" >> + >> +config PPC_16K_PAGES >> + bool "16k page size" if 44x >> + >> +config PPC_64K_PAGES >> + bool "64k page size" if 44x || PPC64 >> + select PPC_HAS_HASH_64K if PPC64 >> + >> +endchoice >> >> config FORCE_MAX_ZONEORDER >> int "Maximum zone order" >> @@ -435,7 +449,7 @@ config FORCE_MAX_ZONEORDER >> >> config PPC_SUBPAGE_PROT >> bool "Support setting protections for 4k subpages" >> - depends on PPC_64K_PAGES >> + depends on PPC64 && PPC_64K_PAGES >> help >> This option adds support for a system call to allow user programs >> to set access permissions (read/write, readonly, or no access) >> diff --git a/arch/powerpc/include/asm/highmem.h >> b/arch/powerpc/include/asm/highmem.h >> index 5d99b64..dc1132c 100644 >> --- a/arch/powerpc/include/asm/highmem.h >> +++ b/arch/powerpc/include/asm/highmem.h >> @@ -38,9 +38,15 @@ extern pte_t *pkmap_page_table; >> * easily, subsequent pte tables have to be allocated in one physical >> * chunk of RAM. >> */ >> +#if defined(CONFIG_PPC_64K_PAGES) && !defined(CONFIG_PPC64) >> +#define PKMAP_ORDER (27 - PAGE_SHIFT) >> +#define LAST_PKMAP (1 << PKMAP_ORDER) >> +#define PKMAP_BASE (FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1)) >> +#else >> #define LAST_PKMAP (1 << PTE_SHIFT) >> -#define LAST_PKMAP_MASK (LAST_PKMAP-1) >> #define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1)) >> & PMD_MASK) >> +#endif >> +#define LAST_PKMAP_MASK (LAST_PKMAP-1) >> #define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) >> #define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) >> >> diff --git a/arch/powerpc/include/asm/mmu-44x.h >> b/arch/powerpc/include/asm/mmu-44x.h >> index a825524..2ca18e8 100644 >> --- a/arch/powerpc/include/asm/mmu-44x.h >> +++ b/arch/powerpc/include/asm/mmu-44x.h >> @@ -4,6 +4,8 @@ >> * PPC440 support >> */ >> >> +#include <asm/page.h> >> + >> #define PPC44x_MMUCR_TID 0x000000ff >> #define PPC44x_MMUCR_STS 0x00010000 >> >> @@ -73,4 +75,20 @@ typedef struct { >> /* Size of the TLBs used for pinning in lowmem */ >> #define PPC_PIN_SIZE (1 << 28) /* 256M */ >> >> +#if (PAGE_SHIFT == 12) >> +#define PPC44x_TLBE_SIZE PPC44x_TLB_4K >> +#elif (PAGE_SHIFT == 14) >> +#define PPC44x_TLBE_SIZE PPC44x_TLB_16K >> +#elif (PAGE_SHIFT == 16) >> +#define PPC44x_TLBE_SIZE PPC44x_TLB_64K >> +#else >> +#error "Unsupported PAGE_SIZE" >> +#endif >> + >> +#define PPC44x_PGD_OFF_SHIFT (32 - PMD_SHIFT + 2) >> +#define PPC44x_PGD_OFF_MASK (PMD_SHIFT - 2) >> +#define PPC44x_PTE_ADD_SHIFT (32 - PMD_SHIFT + PTE_SHIFT + 3) >> +#define PPC44x_PTE_ADD_MASK (32 - 3 - PTE_SHIFT) >> +#define PPC44x_RPN_MASK (31 - PAGE_SHIFT) >> + >> #endif /* _ASM_POWERPC_MMU_44X_H_ */ >> diff --git a/arch/powerpc/include/asm/page.h >> b/arch/powerpc/include/asm/page.h >> index e088545..537d5b1 100644 >> --- a/arch/powerpc/include/asm/page.h >> +++ b/arch/powerpc/include/asm/page.h >> @@ -15,12 +15,15 @@ >> #include <asm/types.h> >> >> /* >> - * On PPC32 page size is 4K. For PPC64 we support either 4K or 64K >> software >> + * On regular PPC32 page size is 4K (but we support 4K/16K/64K pages >> + * on PPC44x). For PPC64 we support either 4K or 64K software >> * page size. When using 64K pages however, whether we are really >> supporting >> * 64K pages in HW or not is irrelevant to those definitions. >> */ >> -#ifdef CONFIG_PPC_64K_PAGES >> +#if defined(CONFIG_PPC_64K_PAGES) >> #define PAGE_SHIFT 16 >> +#elif defined(CONFIG_PPC_16K_PAGES) >> +#define PAGE_SHIFT 14 >> #else >> #define PAGE_SHIFT 12 >> #endif >> @@ -140,7 +143,7 @@ typedef struct { pte_basic_t pte; } pte_t; >> /* 64k pages additionally define a bigger "real PTE" type that gathers >> * the "second half" part of the PTE for pseudo 64k pages >> */ >> -#ifdef CONFIG_PPC_64K_PAGES >> +#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC64) >> typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; >> #else >> typedef struct { pte_t pte; } real_pte_t; >> @@ -180,10 +183,10 @@ typedef pte_basic_t pte_t; >> #define pte_val(x) (x) >> #define __pte(x) (x) >> >> -#ifdef CONFIG_PPC_64K_PAGES >> +#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC64) >> typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; >> #else >> -typedef unsigned long real_pte_t; >> +typedef pte_t real_pte_t; >> #endif >> >> >> diff --git a/arch/powerpc/include/asm/pgtable.h >> b/arch/powerpc/include/asm/pgtable.h >> index dbb8ca1..0d447fb 100644 >> --- a/arch/powerpc/include/asm/pgtable.h >> +++ b/arch/powerpc/include/asm/pgtable.h >> @@ -39,6 +39,9 @@ extern void paging_init(void); >> >> #include <asm-generic/pgtable.h> >> >> +#define PGD_T_LOG2 (__builtin_ffs(sizeof(pgd_t)) - 1) >> +#define PMD_T_LOG2 (__builtin_ffs(sizeof(pmd_t)) - 1) >> +#define PTE_T_LOG2 (__builtin_ffs(sizeof(pte_t)) - 1) >> >> /* >> * This gets called at the end of handling a page fault, when >> diff --git a/arch/powerpc/kernel/asm-offsets.c >> b/arch/powerpc/kernel/asm-offsets.c >> index 92768d3..98b8bb6 100644 >> --- a/arch/powerpc/kernel/asm-offsets.c >> +++ b/arch/powerpc/kernel/asm-offsets.c >> @@ -375,6 +375,10 @@ int main(void) >> DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, >> arch.fault_dear)); >> DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); >> #endif >> +#ifdef CONFIG_44x >> + DEFINE(PMD_SHIFT, PMD_SHIFT); >> + DEFINE(PTE_SHIFT, PTE_SHIFT); >> +#endif >> >> return 0; >> } >> diff --git a/arch/powerpc/kernel/head_44x.S >> b/arch/powerpc/kernel/head_44x.S >> index f3a1ea9..6525124 100644 >> --- a/arch/powerpc/kernel/head_44x.S >> +++ b/arch/powerpc/kernel/head_44x.S >> @@ -391,12 +391,14 @@ interrupt_base: >> rlwimi r13,r12,10,30,30 >> >> /* Load the PTE */ >> - rlwinm r12, r10, 13, 19, 29 /* Compute pgdir/pmd offset */ >> + /* Compute pgdir/pmd offset */ >> + rlwinm r12, r10, PPC44x_PGD_OFF_SHIFT, PPC44x_PGD_OFF_MASK, 29 >> lwzx r11, r12, r11 /* Get pgd/pmd entry */ >> rlwinm. r12, r11, 0, 0, 20 /* Extract pt base address */ >> beq 2f /* Bail if no table */ >> >> - rlwimi r12, r10, 23, 20, 28 /* Compute pte address */ >> + /* Compute pte address */ >> + rlwimi r12, r10, PPC44x_PTE_ADD_SHIFT, PPC44x_PTE_ADD_MASK, 28 >> lwz r11, 0(r12) /* Get high word of pte entry */ >> lwz r12, 4(r12) /* Get low word of pte entry */ >> >> @@ -485,12 +487,14 @@ tlb_44x_patch_hwater_D: >> /* Make up the required permissions */ >> li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_HWEXEC >> >> - rlwinm r12, r10, 13, 19, 29 /* Compute pgdir/pmd offset */ >> + /* Compute pgdir/pmd offset */ >> + rlwinm r12, r10, PPC44x_PGD_OFF_SHIFT, PPC44x_PGD_OFF_MASK, 29 >> lwzx r11, r12, r11 /* Get pgd/pmd entry */ >> rlwinm. r12, r11, 0, 0, 20 /* Extract pt base address */ >> beq 2f /* Bail if no table */ >> >> - rlwimi r12, r10, 23, 20, 28 /* Compute pte address */ >> + /* Compute pte address */ >> + rlwimi r12, r10, PPC44x_PTE_ADD_SHIFT, PPC44x_PTE_ADD_MASK, 28 >> lwz r11, 0(r12) /* Get high word of pte entry */ >> lwz r12, 4(r12) /* Get low word of pte entry */ >> >> @@ -554,15 +558,15 @@ tlb_44x_patch_hwater_I: >> */ >> finish_tlb_load: >> /* Combine RPN & ERPN an write WS 0 */ >> - rlwimi r11,r12,0,0,19 >> + rlwimi r11,r12,0,0,PPC44x_RPN_MASK >> tlbwe r11,r13,PPC44x_TLB_XLAT >> >> /* >> * Create WS1. This is the faulting address (EPN), >> * page size, and valid flag. >> */ >> - li r11,PPC44x_TLB_VALID | PPC44x_TLB_4K >> - rlwimi r10,r11,0,20,31 /* Insert valid and page >> size*/ >> + li r11,PPC44x_TLB_VALID | PPC44x_TLBE_SIZE >> + rlwimi r10,r11,0,PPC44x_PTE_ADD_MASK,31/* Insert valid and >> page size*/ >> tlbwe r10,r13,PPC44x_TLB_PAGEID /* Write PAGEID */ >> >> /* And WS 2 */ >> @@ -634,12 +638,12 @@ _GLOBAL(set_context) >> * goes at the beginning of the data segment, which is page-aligned. >> */ >> .data >> - .align 12 >> + .align PAGE_SHIFT >> .globl sdata >> sdata: >> .globl empty_zero_page >> empty_zero_page: >> - .space 4096 >> + .space PAGE_SIZE >> >> /* >> * To support >32-bit physical addresses, we use an 8KB pgdir. >> diff --git a/arch/powerpc/kernel/misc_32.S >> b/arch/powerpc/kernel/misc_32.S >> index 7a6dfbc..0110fcd 100644 >> --- a/arch/powerpc/kernel/misc_32.S >> +++ b/arch/powerpc/kernel/misc_32.S >> @@ -589,8 +589,8 @@ _GLOBAL(__flush_dcache_icache) >> BEGIN_FTR_SECTION >> blr >> END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) >> - rlwinm r3,r3,0,0,19 /* Get page base address */ >> - li r4,4096/L1_CACHE_BYTES /* Number of lines in a page */ >> + rlwinm r3,r3,0,0,PPC44x_RPN_MASK /* Get page base address */ >> + li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a >> page */ >> mtctr r4 >> mr r6,r3 >> 0: dcbst 0,r3 /* Write line to ram */ >> @@ -630,8 +630,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) >> rlwinm r0,r10,0,28,26 /* clear DR */ >> mtmsr r0 >> isync >> - rlwinm r3,r3,0,0,19 /* Get page base address */ >> - li r4,4096/L1_CACHE_BYTES /* Number of lines in a page */ >> + rlwinm r3,r3,0,0,PPC44x_RPN_MASK /* Get page base address */ >> + li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a >> page */ >> mtctr r4 >> mr r6,r3 >> 0: dcbst 0,r3 /* Write line to ram */ >> @@ -655,7 +655,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) >> * void clear_pages(void *page, int order) ; >> */ >> _GLOBAL(clear_pages) >> - li r0,4096/L1_CACHE_BYTES >> + li r0,PAGE_SIZE/L1_CACHE_BYTES >> slw r0,r0,r4 >> mtctr r0 >> #ifdef CONFIG_8xx >> @@ -713,7 +713,7 @@ _GLOBAL(copy_page) >> dcbt r5,r4 >> li r11,L1_CACHE_BYTES+4 >> #endif /* MAX_COPY_PREFETCH */ >> - li r0,4096/L1_CACHE_BYTES - MAX_COPY_PREFETCH >> + li r0,PAGE_SIZE/L1_CACHE_BYTES - MAX_COPY_PREFETCH >> crclr 4*cr0+eq >> 2: >> mtctr r0 >> diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c >> index 2001abd..4eed001 100644 >> --- a/arch/powerpc/mm/pgtable_32.c >> +++ b/arch/powerpc/mm/pgtable_32.c >> @@ -72,12 +72,7 @@ extern unsigned long p_mapped_by_tlbcam(unsigned >> long pa); >> #define p_mapped_by_tlbcam(x) (0UL) >> #endif /* HAVE_TLBCAM */ >> >> -#ifdef CONFIG_PTE_64BIT >> -/* 44x uses an 8kB pgdir because it has 8-byte Linux PTEs. */ >> -#define PGDIR_ORDER 1 >> -#else >> -#define PGDIR_ORDER 0 >> -#endif >> +#define PGDIR_ORDER max(32 + PGD_T_LOG2 - PGDIR_SHIFT - >> PAGE_SHIFT, 0) >> >> pgd_t *pgd_alloc(struct mm_struct *mm) >> { >> @@ -400,7 +395,7 @@ void kernel_map_pages(struct page *page, int >> numpages, int enable) >> #endif /* CONFIG_DEBUG_PAGEALLOC */ >> >> static int fixmaps; >> -unsigned long FIXADDR_TOP = 0xfffff000; >> +unsigned long FIXADDR_TOP = (-PAGE_SIZE); >> EXPORT_SYMBOL(FIXADDR_TOP); >> >> void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, >> pgprot_t flags) >> diff --git a/arch/powerpc/platforms/Kconfig.cputype >> b/arch/powerpc/platforms/Kconfig.cputype >> index 7f65127..a1386a4 100644 >> --- a/arch/powerpc/platforms/Kconfig.cputype >> +++ b/arch/powerpc/platforms/Kconfig.cputype >> @@ -202,7 +202,7 @@ config PPC_STD_MMU_32 >> >> config PPC_MM_SLICES >> bool >> - default y if HUGETLB_PAGE || PPC_64K_PAGES >> + default y if HUGETLB_PAGE || (PPC64 && PPC_64K_PAGES) >> default n >> >> config VIRT_CPU_ACCOUNTING >> > > -- Grüsse / regards, Christian Ehrhardt IBM Linux Technology Center, Open Virtualization ^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures. 2008-10-22 14:28 ` Christian Ehrhardt 2008-10-22 17:54 ` Christian Ehrhardt @ 2008-10-31 23:23 ` Hollis Blanchard 2008-11-01 11:30 ` Josh Boyer 2008-11-11 13:19 ` Josh Boyer 1 sibling, 2 replies; 35+ messages in thread From: Hollis Blanchard @ 2008-10-31 23:23 UTC (permalink / raw) To: Christian Ehrhardt Cc: Hollis Blanchard, dzu, linuxppc-dev, pvr, Ilya Yanok, Wolfgang Denk On Wed, Oct 22, 2008 at 9:28 AM, Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> wrote: > Hi Ilya, > I just tried your patch on my 440 board because it would help us in our > environment. > Unfortunately I run into a bug on early boot (mark_bootmem). > > A log can be found in this mail, this is the bug when running with 64k page > size. > I tried this with and without your 2/2 265k patch and also with page size > configured to 16k, the error is the same in all cases. > > I used an earlier version of your patch in the past and it worked fine. > Applying this old patch causes the same problem. > Therefore I expect that there was some other code changed that breaks with > page size != 4k. This patch seems to solve the problem for me, but I have to run and haven't yet worked out if it's the right fix. diff --git a/mm/bootmem.c b/mm/bootmem.c --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -300,7 +300,7 @@ static int __init mark_bootmem(unsigned unsigned long max; if (pos < bdata->node_min_pfn || - pos >= bdata->node_low_pfn) { + pos > bdata->node_low_pfn) { BUG_ON(pos != start); continue; } @@ -399,7 +399,7 @@ int __init reserve_bootmem(unsigned long unsigned long start, end; start = PFN_DOWN(addr); - end = PFN_UP(addr + size); + end = PFN_DOWN(addr + size); return mark_bootmem(start, end, 1, flags); } Looks like the breakage may have been accidentally introduced by Johannes Weiner <hannes@saeurebad.de> on Jul 24 (post 2.6.26). FWIW, the boards Christian and I are hitting the problem on are Sequoias with 256MB of RAM. cuImage is reporting only 0xffff000 bytes of RAM though, which may be exacerbating the situation. -Hollis ^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures. 2008-10-31 23:23 ` Hollis Blanchard @ 2008-11-01 11:30 ` Josh Boyer 2008-11-01 21:55 ` Benjamin Herrenschmidt 2008-11-11 13:19 ` Josh Boyer 1 sibling, 1 reply; 35+ messages in thread From: Josh Boyer @ 2008-11-01 11:30 UTC (permalink / raw) To: Hollis Blanchard Cc: dzu, Ilya Yanok, linuxppc-dev, Hollis Blanchard, pvr, Wolfgang Denk On Fri, Oct 31, 2008 at 06:23:28PM -0500, Hollis Blanchard wrote: >On Wed, Oct 22, 2008 at 9:28 AM, Christian Ehrhardt ><ehrhardt@linux.vnet.ibm.com> wrote: >> Hi Ilya, >> I just tried your patch on my 440 board because it would help us in our >> environment. >> Unfortunately I run into a bug on early boot (mark_bootmem). >> >> A log can be found in this mail, this is the bug when running with 64k page >> size. >> I tried this with and without your 2/2 265k patch and also with page size >> configured to 16k, the error is the same in all cases. >> >> I used an earlier version of your patch in the past and it worked fine. >> Applying this old patch causes the same problem. >> Therefore I expect that there was some other code changed that breaks with >> page size != 4k. > >This patch seems to solve the problem for me, but I have to run and >haven't yet worked out if it's the right fix. > >diff --git a/mm/bootmem.c b/mm/bootmem.c >--- a/mm/bootmem.c >+++ b/mm/bootmem.c >@@ -300,7 +300,7 @@ static int __init mark_bootmem(unsigned > unsigned long max; > > if (pos < bdata->node_min_pfn || >- pos >= bdata->node_low_pfn) { >+ pos > bdata->node_low_pfn) { > BUG_ON(pos != start); > continue; > } >@@ -399,7 +399,7 @@ int __init reserve_bootmem(unsigned long > unsigned long start, end; > > start = PFN_DOWN(addr); >- end = PFN_UP(addr + size); >+ end = PFN_DOWN(addr + size); > > return mark_bootmem(start, end, 1, flags); > } > >Looks like the breakage may have been accidentally introduced by >Johannes Weiner <hannes@saeurebad.de> on Jul 24 (post 2.6.26). > >FWIW, the boards Christian and I are hitting the problem on are >Sequoias with 256MB of RAM. cuImage is reporting only 0xffff000 bytes >of RAM though, which may be exacerbating the situation. That is on purpose. The chip has an errata that causes badness if you use the last XX bytes of DRAM. I forget exactly what XX is, but we just remove the last page. josh ^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures. 2008-11-01 11:30 ` Josh Boyer @ 2008-11-01 21:55 ` Benjamin Herrenschmidt 2008-11-02 13:41 ` Josh Boyer 0 siblings, 1 reply; 35+ messages in thread From: Benjamin Herrenschmidt @ 2008-11-01 21:55 UTC (permalink / raw) To: Josh Boyer Cc: Wolfgang Denk, Ilya Yanok, Hollis Blanchard, linuxppc-dev, pvr, dzu, Hollis Blanchard On Sat, 2008-11-01 at 07:30 -0400, Josh Boyer wrote: > > That is on purpose. The chip has an errata that causes badness if > you use the last XX bytes of DRAM. I forget exactly what XX is, but > we just remove the last page. Doing that from the device-tree is very hairy tho... you end up with informations in there that aren't aligned etc... oh well. Ben. ^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures. 2008-11-01 21:55 ` Benjamin Herrenschmidt @ 2008-11-02 13:41 ` Josh Boyer 2008-11-02 21:33 ` Benjamin Herrenschmidt 0 siblings, 1 reply; 35+ messages in thread From: Josh Boyer @ 2008-11-02 13:41 UTC (permalink / raw) To: Benjamin Herrenschmidt Cc: Wolfgang Denk, Ilya Yanok, Hollis Blanchard, linuxppc-dev, pvr, dzu, Hollis Blanchard On Sun, Nov 02, 2008 at 08:55:02AM +1100, Benjamin Herrenschmidt wrote: >On Sat, 2008-11-01 at 07:30 -0400, Josh Boyer wrote: >> >> That is on purpose. The chip has an errata that causes badness if >> you use the last XX bytes of DRAM. I forget exactly what XX is, but >> we just remove the last page. > >Doing that from the device-tree is very hairy tho... you end up with >informations in there that aren't aligned etc... oh well. What? -ENOTVERBOSEENOUGH. I don't see how this is really different from U-Boot just passing in a smaller memory size in the old arch/ppc world. (And I think U-Boot will actually fixup the device tree in a similar manner itself these days.) So if there are problems with this, please do tell. josh ^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures. 2008-11-02 13:41 ` Josh Boyer @ 2008-11-02 21:33 ` Benjamin Herrenschmidt 2008-11-03 0:33 ` Josh Boyer 0 siblings, 1 reply; 35+ messages in thread From: Benjamin Herrenschmidt @ 2008-11-02 21:33 UTC (permalink / raw) To: Josh Boyer Cc: Wolfgang Denk, Ilya Yanok, Hollis Blanchard, linuxppc-dev, pvr, dzu, Hollis Blanchard On Sun, 2008-11-02 at 08:41 -0500, Josh Boyer wrote: > On Sun, Nov 02, 2008 at 08:55:02AM +1100, Benjamin Herrenschmidt wrote: > >On Sat, 2008-11-01 at 07:30 -0400, Josh Boyer wrote: > >> > >> That is on purpose. The chip has an errata that causes badness if > >> you use the last XX bytes of DRAM. I forget exactly what XX is, but > >> we just remove the last page. > > > >Doing that from the device-tree is very hairy tho... you end up with > >informations in there that aren't aligned etc... oh well. > > What? -ENOTVERBOSEENOUGH. > > I don't see how this is really different from U-Boot just passing in > a smaller memory size in the old arch/ppc world. (And I think U-Boot > will actually fixup the device tree in a similar manner itself these > days.) So if there are problems with this, please do tell. Is it cropping the memory nodes or using the reserve map ? Ben. ^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures. 2008-11-02 21:33 ` Benjamin Herrenschmidt @ 2008-11-03 0:33 ` Josh Boyer 2008-11-03 0:43 ` Benjamin Herrenschmidt 0 siblings, 1 reply; 35+ messages in thread From: Josh Boyer @ 2008-11-03 0:33 UTC (permalink / raw) To: benh Cc: Wolfgang Denk, Ilya Yanok, Hollis Blanchard, linuxppc-dev, pvr, dzu, Hollis Blanchard On Mon, 03 Nov 2008 08:33:16 +1100 Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote: > On Sun, 2008-11-02 at 08:41 -0500, Josh Boyer wrote: > > On Sun, Nov 02, 2008 at 08:55:02AM +1100, Benjamin Herrenschmidt wrote: > > >On Sat, 2008-11-01 at 07:30 -0400, Josh Boyer wrote: > > >> > > >> That is on purpose. The chip has an errata that causes badness if > > >> you use the last XX bytes of DRAM. I forget exactly what XX is, but > > >> we just remove the last page. > > > > > >Doing that from the device-tree is very hairy tho... you end up with > > >informations in there that aren't aligned etc... oh well. > > > > What? -ENOTVERBOSEENOUGH. > > > > I don't see how this is really different from U-Boot just passing in > > a smaller memory size in the old arch/ppc world. (And I think U-Boot > > will actually fixup the device tree in a similar manner itself these > > days.) So if there are problems with this, please do tell. > > Is it cropping the memory nodes or using the reserve map ? Cropping the size of the memory node. That was simplest to do from the cuboot wrapper at the time. If marking it reserved via a reserve map is more elegant and correct, we could do that. But I will still like to know what about the other way is hairy please. josh ^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures. 2008-11-03 0:33 ` Josh Boyer @ 2008-11-03 0:43 ` Benjamin Herrenschmidt 2008-11-03 11:26 ` Josh Boyer 2008-11-03 19:55 ` Hollis Blanchard 0 siblings, 2 replies; 35+ messages in thread From: Benjamin Herrenschmidt @ 2008-11-03 0:43 UTC (permalink / raw) To: Josh Boyer Cc: Wolfgang Denk, Ilya Yanok, Hollis Blanchard, linuxppc-dev, pvr, dzu, Hollis Blanchard > Cropping the size of the memory node. That was simplest to do from the > cuboot wrapper at the time. If marking it reserved via a reserve map > is more elegant and correct, we could do that. > > But I will still like to know what about the other way is hairy please. I don't like it :-) Bad feeling ... don't like having a memory node entry that isn't aligned to some large power of two typically. Cheers, Ben. ^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures. 2008-11-03 0:43 ` Benjamin Herrenschmidt @ 2008-11-03 11:26 ` Josh Boyer 2008-11-03 20:17 ` Benjamin Herrenschmidt 2008-11-03 19:55 ` Hollis Blanchard 1 sibling, 1 reply; 35+ messages in thread From: Josh Boyer @ 2008-11-03 11:26 UTC (permalink / raw) To: benh Cc: Wolfgang Denk, Ilya Yanok, Hollis Blanchard, linuxppc-dev, pvr, dzu, Hollis Blanchard On Mon, 03 Nov 2008 11:43:54 +1100 Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote: > > > Cropping the size of the memory node. That was simplest to do from the > > cuboot wrapper at the time. If marking it reserved via a reserve map > > is more elegant and correct, we could do that. > > > > But I will still like to know what about the other way is hairy please. > > I don't like it :-) Bad feeling ... don't like having a memory > node entry that isn't aligned to some large power of two typically. Erm, ok. And does your heebie-geebies extend to people using the mem= parameter in a similar fashion? josh ^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures. 2008-11-03 11:26 ` Josh Boyer @ 2008-11-03 20:17 ` Benjamin Herrenschmidt 0 siblings, 0 replies; 35+ messages in thread From: Benjamin Herrenschmidt @ 2008-11-03 20:17 UTC (permalink / raw) To: Josh Boyer Cc: Wolfgang Denk, Ilya Yanok, Hollis Blanchard, linuxppc-dev, pvr, dzu, Hollis Blanchard On Mon, 2008-11-03 at 06:26 -0500, Josh Boyer wrote: > On Mon, 03 Nov 2008 11:43:54 +1100 > Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote: > > > > > > Cropping the size of the memory node. That was simplest to do from the > > > cuboot wrapper at the time. If marking it reserved via a reserve map > > > is more elegant and correct, we could do that. > > > > > > But I will still like to know what about the other way is hairy please. > > > > I don't like it :-) Bad feeling ... don't like having a memory > > node entry that isn't aligned to some large power of two typically. > > Erm, ok. And does your heebie-geebies extend to people using the mem= > parameter in a similar fashion? Nah, not really. It's not that it won't work, I suppose it does, though I would have preferred a way to "reserve" that memory rather than take it off. In fact, that last page could be used for other things, for example it could be used as a dummy page to point stale DMA to or whatever else. Ben. ^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures. 2008-11-03 0:43 ` Benjamin Herrenschmidt 2008-11-03 11:26 ` Josh Boyer @ 2008-11-03 19:55 ` Hollis Blanchard 2008-11-03 20:00 ` Josh Boyer 1 sibling, 1 reply; 35+ messages in thread From: Hollis Blanchard @ 2008-11-03 19:55 UTC (permalink / raw) To: benh; +Cc: dzu, Ilya Yanok, linuxppc-dev, pvr, Wolfgang Denk On Mon, 2008-11-03 at 11:43 +1100, Benjamin Herrenschmidt wrote: > > Cropping the size of the memory node. That was simplest to do from the > > cuboot wrapper at the time. If marking it reserved via a reserve map > > is more elegant and correct, we could do that. > > > > But I will still like to know what about the other way is hairy please. > > I don't like it :-) Bad feeling ... don't like having a memory > node entry that isn't aligned to some large power of two typically. More specifically, mm/bootmem.c seems to be making the implicit assumption that memory size is an even multiple of PAGE_SIZE. With 4K pages, 0xffff000 bytes of RAM fits; with 64K pages it does not. Using the device tree reserve map stuff does indeed seem to solve the problem. However, I really don't understand the layering in arch/powerpc/boot at all, so I'll just put this patch out here and people can play with wrappers and prototypes all they want: powerpc/4xx: work around CHIP11 errata in a more PAGE_SIZE-friendly way The current CHIP11 errata truncates the device tree memory node, and assumes a 4K page size. This breaks kernels with non-4K PAGE_SIZE. Instead, use a device tree memory reservation to reserve only the 256 bytes actually affected by the errata, leaving the total memory size unaltered. Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com> diff --git a/arch/powerpc/boot/4xx.c b/arch/powerpc/boot/4xx.c --- a/arch/powerpc/boot/4xx.c +++ b/arch/powerpc/boot/4xx.c @@ -21,7 +21,7 @@ #include "reg.h" #include "dcr.h" -static unsigned long chip_11_errata(unsigned long memsize) +static void chip_11_errata(unsigned long memsize) { unsigned long pvr; @@ -31,13 +31,11 @@ static unsigned long chip_11_errata(unsi case 0x40000850: case 0x400008d0: case 0x200008d0: - memsize -= 4096; + fdt_reserve_mem(memsize - 256, 256); break; default: break; } - - return memsize; } /* Read the 4xx SDRAM controller to get size of system memory. */ @@ -53,7 +51,7 @@ void ibm4xx_sdram_fixup_memsize(void) memsize += SDRAM_CONFIG_BANK_SIZE(bank_config); } - memsize = chip_11_errata(memsize); + chip_11_errata(memsize); dt_fixup_memory(0, memsize); } @@ -219,7 +217,7 @@ void ibm4xx_denali_fixup_memsize(void) bank = 4; /* 4 banks */ memsize = cs * (1 << (col+row)) * bank * dpath; - memsize = chip_11_errata(memsize); + chip_11_errata(memsize); dt_fixup_memory(0, memsize); } diff --git a/arch/powerpc/boot/libfdt-wrapper.c b/arch/powerpc/boot/libfdt-wrapper.c --- a/arch/powerpc/boot/libfdt-wrapper.c +++ b/arch/powerpc/boot/libfdt-wrapper.c @@ -167,6 +167,11 @@ static unsigned long fdt_wrapper_finaliz return (unsigned long)fdt; } +int fdt_reserve_mem(unsigned long addr, unsigned long bytes) +{ + return fdt_add_mem_rsv(fdt, addr, bytes); +} + void fdt_init(void *blob) { int err; diff --git a/arch/powerpc/boot/ops.h b/arch/powerpc/boot/ops.h --- a/arch/powerpc/boot/ops.h +++ b/arch/powerpc/boot/ops.h @@ -83,6 +83,7 @@ extern struct loader_info loader_info; void start(void); void fdt_init(void *blob); +int fdt_reserve_mem(unsigned long addr, unsigned long bytes); int serial_console_init(void); int ns16550_console_init(void *devp, struct serial_console_data *scdp); int mpsc_console_init(void *devp, struct serial_console_data *scdp); -- Hollis Blanchard IBM Linux Technology Center ^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures. 2008-11-03 19:55 ` Hollis Blanchard @ 2008-11-03 20:00 ` Josh Boyer 2008-11-05 17:33 ` Hollis Blanchard 0 siblings, 1 reply; 35+ messages in thread From: Josh Boyer @ 2008-11-03 20:00 UTC (permalink / raw) To: Hollis Blanchard; +Cc: Wolfgang Denk, Ilya Yanok, linuxppc-dev, pvr, dzu, david On Mon, 03 Nov 2008 13:55:21 -0600 Hollis Blanchard <hollisb@us.ibm.com> wrote: > On Mon, 2008-11-03 at 11:43 +1100, Benjamin Herrenschmidt wrote: > > > Cropping the size of the memory node. That was simplest to do from the > > > cuboot wrapper at the time. If marking it reserved via a reserve map > > > is more elegant and correct, we could do that. > > > > > > But I will still like to know what about the other way is hairy please. > > > > I don't like it :-) Bad feeling ... don't like having a memory > > node entry that isn't aligned to some large power of two typically. > > More specifically, mm/bootmem.c seems to be making the implicit > assumption that memory size is an even multiple of PAGE_SIZE. With 4K > pages, 0xffff000 bytes of RAM fits; with 64K pages it does not. Hmm.. I dunno what to think about that. Again, how does mem= play into this? (I will look myself in a bit, but if someone knows offhand that would be nice..) > Using the device tree reserve map stuff does indeed seem to solve the > problem. However, I really don't understand the layering in > arch/powerpc/boot at all, so I'll just put this patch out here and > people can play with wrappers and prototypes all they want: This actually looks pretty nice. I'll wait for David to Ack the fdt parts. josh > powerpc/4xx: work around CHIP11 errata in a more PAGE_SIZE-friendly way > > The current CHIP11 errata truncates the device tree memory node, and assumes a > 4K page size. This breaks kernels with non-4K PAGE_SIZE. > > Instead, use a device tree memory reservation to reserve only the 256 bytes > actually affected by the errata, leaving the total memory size unaltered. > > Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com> > > diff --git a/arch/powerpc/boot/4xx.c b/arch/powerpc/boot/4xx.c > --- a/arch/powerpc/boot/4xx.c > +++ b/arch/powerpc/boot/4xx.c > @@ -21,7 +21,7 @@ > #include "reg.h" > #include "dcr.h" > > -static unsigned long chip_11_errata(unsigned long memsize) > +static void chip_11_errata(unsigned long memsize) > { > unsigned long pvr; > > @@ -31,13 +31,11 @@ static unsigned long chip_11_errata(unsi > case 0x40000850: > case 0x400008d0: > case 0x200008d0: > - memsize -= 4096; > + fdt_reserve_mem(memsize - 256, 256); > break; > default: > break; > } > - > - return memsize; > } > > /* Read the 4xx SDRAM controller to get size of system memory. */ > @@ -53,7 +51,7 @@ void ibm4xx_sdram_fixup_memsize(void) > memsize += SDRAM_CONFIG_BANK_SIZE(bank_config); > } > > - memsize = chip_11_errata(memsize); > + chip_11_errata(memsize); > dt_fixup_memory(0, memsize); > } > > @@ -219,7 +217,7 @@ void ibm4xx_denali_fixup_memsize(void) > bank = 4; /* 4 banks */ > > memsize = cs * (1 << (col+row)) * bank * dpath; > - memsize = chip_11_errata(memsize); > + chip_11_errata(memsize); > dt_fixup_memory(0, memsize); > } > > diff --git a/arch/powerpc/boot/libfdt-wrapper.c b/arch/powerpc/boot/libfdt-wrapper.c > --- a/arch/powerpc/boot/libfdt-wrapper.c > +++ b/arch/powerpc/boot/libfdt-wrapper.c > @@ -167,6 +167,11 @@ static unsigned long fdt_wrapper_finaliz > return (unsigned long)fdt; > } > > +int fdt_reserve_mem(unsigned long addr, unsigned long bytes) > +{ > + return fdt_add_mem_rsv(fdt, addr, bytes); > +} > + > void fdt_init(void *blob) > { > int err; > diff --git a/arch/powerpc/boot/ops.h b/arch/powerpc/boot/ops.h > --- a/arch/powerpc/boot/ops.h > +++ b/arch/powerpc/boot/ops.h > @@ -83,6 +83,7 @@ extern struct loader_info loader_info; > > void start(void); > void fdt_init(void *blob); > +int fdt_reserve_mem(unsigned long addr, unsigned long bytes); > int serial_console_init(void); > int ns16550_console_init(void *devp, struct serial_console_data *scdp); > int mpsc_console_init(void *devp, struct serial_console_data *scdp); > > ^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures. 2008-11-03 20:00 ` Josh Boyer @ 2008-11-05 17:33 ` Hollis Blanchard 2008-11-06 1:48 ` David Gibson 0 siblings, 1 reply; 35+ messages in thread From: Hollis Blanchard @ 2008-11-05 17:33 UTC (permalink / raw) To: Josh Boyer; +Cc: Wolfgang Denk, Ilya Yanok, linuxppc-dev, pvr, dzu, david On Mon, 2008-11-03 at 15:00 -0500, Josh Boyer wrote: > On Mon, 03 Nov 2008 13:55:21 -0600 > Hollis Blanchard <hollisb@us.ibm.com> wrote: > > > On Mon, 2008-11-03 at 11:43 +1100, Benjamin Herrenschmidt wrote: > > > > Cropping the size of the memory node. That was simplest to do from the > > > > cuboot wrapper at the time. If marking it reserved via a reserve map > > > > is more elegant and correct, we could do that. > > > > > > > > But I will still like to know what about the other way is hairy please. > > > > > > I don't like it :-) Bad feeling ... don't like having a memory > > > node entry that isn't aligned to some large power of two typically. > > > > More specifically, mm/bootmem.c seems to be making the implicit > > assumption that memory size is an even multiple of PAGE_SIZE. With 4K > > pages, 0xffff000 bytes of RAM fits; with 64K pages it does not. > > Hmm.. I dunno what to think about that. Again, how does mem= play > into this? (I will look myself in a bit, but if someone knows offhand > that would be nice..) > > > Using the device tree reserve map stuff does indeed seem to solve the > > problem. However, I really don't understand the layering in > > arch/powerpc/boot at all, so I'll just put this patch out here and > > people can play with wrappers and prototypes all they want: > > This actually looks pretty nice. I'll wait for David to Ack the fdt > parts. David? -- Hollis Blanchard IBM Linux Technology Center > > powerpc/4xx: work around CHIP11 errata in a more PAGE_SIZE-friendly way > > > > The current CHIP11 errata truncates the device tree memory node, and assumes a > > 4K page size. This breaks kernels with non-4K PAGE_SIZE. > > > > Instead, use a device tree memory reservation to reserve only the 256 bytes > > actually affected by the errata, leaving the total memory size unaltered. > > > > Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com> > > > > diff --git a/arch/powerpc/boot/4xx.c b/arch/powerpc/boot/4xx.c > > --- a/arch/powerpc/boot/4xx.c > > +++ b/arch/powerpc/boot/4xx.c > > @@ -21,7 +21,7 @@ > > #include "reg.h" > > #include "dcr.h" > > > > -static unsigned long chip_11_errata(unsigned long memsize) > > +static void chip_11_errata(unsigned long memsize) > > { > > unsigned long pvr; > > > > @@ -31,13 +31,11 @@ static unsigned long chip_11_errata(unsi > > case 0x40000850: > > case 0x400008d0: > > case 0x200008d0: > > - memsize -= 4096; > > + fdt_reserve_mem(memsize - 256, 256); > > break; > > default: > > break; > > } > > - > > - return memsize; > > } > > > > /* Read the 4xx SDRAM controller to get size of system memory. */ > > @@ -53,7 +51,7 @@ void ibm4xx_sdram_fixup_memsize(void) > > memsize += SDRAM_CONFIG_BANK_SIZE(bank_config); > > } > > > > - memsize = chip_11_errata(memsize); > > + chip_11_errata(memsize); > > dt_fixup_memory(0, memsize); > > } > > > > @@ -219,7 +217,7 @@ void ibm4xx_denali_fixup_memsize(void) > > bank = 4; /* 4 banks */ > > > > memsize = cs * (1 << (col+row)) * bank * dpath; > > - memsize = chip_11_errata(memsize); > > + chip_11_errata(memsize); > > dt_fixup_memory(0, memsize); > > } > > > > diff --git a/arch/powerpc/boot/libfdt-wrapper.c b/arch/powerpc/boot/libfdt-wrapper.c > > --- a/arch/powerpc/boot/libfdt-wrapper.c > > +++ b/arch/powerpc/boot/libfdt-wrapper.c > > @@ -167,6 +167,11 @@ static unsigned long fdt_wrapper_finaliz > > return (unsigned long)fdt; > > } > > > > +int fdt_reserve_mem(unsigned long addr, unsigned long bytes) > > +{ > > + return fdt_add_mem_rsv(fdt, addr, bytes); > > +} > > + > > void fdt_init(void *blob) > > { > > int err; > > diff --git a/arch/powerpc/boot/ops.h b/arch/powerpc/boot/ops.h > > --- a/arch/powerpc/boot/ops.h > > +++ b/arch/powerpc/boot/ops.h > > @@ -83,6 +83,7 @@ extern struct loader_info loader_info; > > > > void start(void); > > void fdt_init(void *blob); > > +int fdt_reserve_mem(unsigned long addr, unsigned long bytes); > > int serial_console_init(void); > > int ns16550_console_init(void *devp, struct serial_console_data *scdp); > > int mpsc_console_init(void *devp, struct serial_console_data *scdp); > > > > ^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures. 2008-11-05 17:33 ` Hollis Blanchard @ 2008-11-06 1:48 ` David Gibson 0 siblings, 0 replies; 35+ messages in thread From: David Gibson @ 2008-11-06 1:48 UTC (permalink / raw) To: Hollis Blanchard; +Cc: Wolfgang Denk, dzu, linuxppc-dev, pvr, Ilya Yanok On Wed, Nov 05, 2008 at 11:33:28AM -0600, Hollis Blanchard wrote: > On Mon, 2008-11-03 at 15:00 -0500, Josh Boyer wrote: > > On Mon, 03 Nov 2008 13:55:21 -0600 > > Hollis Blanchard <hollisb@us.ibm.com> wrote: > > > > > On Mon, 2008-11-03 at 11:43 +1100, Benjamin Herrenschmidt wrote: > > > > > Cropping the size of the memory node. That was simplest to do from the > > > > > cuboot wrapper at the time. If marking it reserved via a reserve map > > > > > is more elegant and correct, we could do that. > > > > > > > > > > But I will still like to know what about the other way is hairy please. > > > > > > > > I don't like it :-) Bad feeling ... don't like having a memory > > > > node entry that isn't aligned to some large power of two typically. > > > > > > More specifically, mm/bootmem.c seems to be making the implicit > > > assumption that memory size is an even multiple of PAGE_SIZE. With 4K > > > pages, 0xffff000 bytes of RAM fits; with 64K pages it does not. > > > > Hmm.. I dunno what to think about that. Again, how does mem= play > > into this? (I will look myself in a bit, but if someone knows offhand > > that would be nice..) > > > > > Using the device tree reserve map stuff does indeed seem to solve the > > > problem. However, I really don't understand the layering in > > > arch/powerpc/boot at all, so I'll just put this patch out here and > > > people can play with wrappers and prototypes all they want: > > > > This actually looks pretty nice. I'll wait for David to Ack the fdt > > parts. > > David? Sorry, I've been on leave for a few days. I assume you mean the new call through to fdt_add_mem_rsv(). Hrm.. currently all the things in fdt_wrapper are hooks called through dt_ops. Adding such a trivial wrapper seems a little silly. There have been other people wanting to use other libfdt features directly, knowing that they have a flat tree on their system. I think it would be more sensible, I think, to just expose the global fdt pointer, so that people can use the libfdt functions directly, without having to go through the wrapper code. Unless of course there is occasion to use this "add reserve" callback on real OF systems, in which case it should be a new dt_ops hook. -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson ^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures. 2008-10-31 23:23 ` Hollis Blanchard 2008-11-01 11:30 ` Josh Boyer @ 2008-11-11 13:19 ` Josh Boyer 2008-11-11 15:00 ` Hollis Blanchard 1 sibling, 1 reply; 35+ messages in thread From: Josh Boyer @ 2008-11-11 13:19 UTC (permalink / raw) To: Hollis Blanchard Cc: dzu, Ilya Yanok, linuxppc-dev, Hollis Blanchard, pvr, Wolfgang Denk On Fri, Oct 31, 2008 at 06:23:28PM -0500, Hollis Blanchard wrote: >On Wed, Oct 22, 2008 at 9:28 AM, Christian Ehrhardt ><ehrhardt@linux.vnet.ibm.com> wrote: >> Hi Ilya, >> I just tried your patch on my 440 board because it would help us in our >> environment. >> Unfortunately I run into a bug on early boot (mark_bootmem). >> >> A log can be found in this mail, this is the bug when running with 64k page >> size. >> I tried this with and without your 2/2 265k patch and also with page size >> configured to 16k, the error is the same in all cases. >> >> I used an earlier version of your patch in the past and it worked fine. >> Applying this old patch causes the same problem. >> Therefore I expect that there was some other code changed that breaks with >> page size != 4k. > >This patch seems to solve the problem for me, but I have to run and >haven't yet worked out if it's the right fix. > >diff --git a/mm/bootmem.c b/mm/bootmem.c >--- a/mm/bootmem.c >+++ b/mm/bootmem.c >@@ -300,7 +300,7 @@ static int __init mark_bootmem(unsigned > unsigned long max; > > if (pos < bdata->node_min_pfn || >- pos >= bdata->node_low_pfn) { >+ pos > bdata->node_low_pfn) { > BUG_ON(pos != start); > continue; > } >@@ -399,7 +399,7 @@ int __init reserve_bootmem(unsigned long > unsigned long start, end; > > start = PFN_DOWN(addr); >- end = PFN_UP(addr + size); >+ end = PFN_DOWN(addr + size); > > return mark_bootmem(start, end, 1, flags); > } Hollis, if I'm understanding things correctly this patch is no longer needed if we do the memory reserve in the boot wrapper for the errata. Correct? josh ^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [PATCH 1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures. 2008-11-11 13:19 ` Josh Boyer @ 2008-11-11 15:00 ` Hollis Blanchard 0 siblings, 0 replies; 35+ messages in thread From: Hollis Blanchard @ 2008-11-11 15:00 UTC (permalink / raw) To: Josh Boyer; +Cc: Wolfgang Denk, Ilya Yanok, linuxppc-dev, pvr, dzu On Tue, 2008-11-11 at 08:19 -0500, Josh Boyer wrote: > On Fri, Oct 31, 2008 at 06:23:28PM -0500, Hollis Blanchard wrote: > >On Wed, Oct 22, 2008 at 9:28 AM, Christian Ehrhardt > ><ehrhardt@linux.vnet.ibm.com> wrote: > >> Hi Ilya, > >> I just tried your patch on my 440 board because it would help us in our > >> environment. > >> Unfortunately I run into a bug on early boot (mark_bootmem). > >> > >> A log can be found in this mail, this is the bug when running with 64k page > >> size. > >> I tried this with and without your 2/2 265k patch and also with page size > >> configured to 16k, the error is the same in all cases. > >> > >> I used an earlier version of your patch in the past and it worked fine. > >> Applying this old patch causes the same problem. > >> Therefore I expect that there was some other code changed that breaks with > >> page size != 4k. > > > >This patch seems to solve the problem for me, but I have to run and > >haven't yet worked out if it's the right fix. > > > >diff --git a/mm/bootmem.c b/mm/bootmem.c > >--- a/mm/bootmem.c > >+++ b/mm/bootmem.c > >@@ -300,7 +300,7 @@ static int __init mark_bootmem(unsigned > > unsigned long max; > > > > if (pos < bdata->node_min_pfn || > >- pos >= bdata->node_low_pfn) { > >+ pos > bdata->node_low_pfn) { > > BUG_ON(pos != start); > > continue; > > } > >@@ -399,7 +399,7 @@ int __init reserve_bootmem(unsigned long > > unsigned long start, end; > > > > start = PFN_DOWN(addr); > >- end = PFN_UP(addr + size); > >+ end = PFN_DOWN(addr + size); > > > > return mark_bootmem(start, end, 1, flags); > > } > > > Hollis, if I'm understanding things correctly this patch is no > longer needed if we do the memory reserve in the boot wrapper for > the errata. Correct? Correct. -- Hollis Blanchard IBM Linux Technology Center ^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures 2008-10-16 2:22 ` [PATCH 1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures Ilya Yanok 2008-10-17 15:54 ` prodyut hazarika 2008-10-22 14:28 ` Christian Ehrhardt @ 2008-11-10 15:09 ` Milton Miller 2008-11-10 16:50 ` Ilya Yanok 2 siblings, 1 reply; 35+ messages in thread From: Milton Miller @ 2008-11-10 15:09 UTC (permalink / raw) To: Ilya Yanok Cc: Wolfgang Denk, dzu, linux-ppc, Vladimir Panfilov, Paul Mackerras On 2008-10-16 at 02:22:31, Ilya Yanok wrote: I started out looking at the too minimal decription of patch 2/2, and that morphed into talking about both patches. > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig > index 587da5e..9627cfd 100644 > --- a/arch/powerpc/Kconfig > +++ b/arch/powerpc/Kconfig > @@ -402,16 +402,30 @@ config PPC_HAS_HASH_64K > depends on PPC64 > default n > > -config PPC_64K_PAGES > - bool "64k page size" > - depends on PPC64 > - select PPC_HAS_HASH_64K > +choice > + prompt "Page size" > + default PPC_4K_PAGES > help > - This option changes the kernel logical page size to 64k. On > machines > + The PAGE_SIZE definition. Increasing the page size may > + improve the system performance in some dedicated cases like > software > + RAID with accelerated calculations. In PPC64 case on machines > without processor support for 64k pages, the kernel will > simulate > them by loading each individual 4k page on demand > transparently, > while on hardware with such support, it will be used to map > normal application pages. > + If unsure, set it to 4 KB. > + This is less understandable (more hacker jargon) and too application specific. (Josh, since this is cross-sub-platform we need to make sure this fragment gets proper review). Also, we need to check the help placement, as I seem to remember the config programs looking at the first choice instead of the choice tag. Or should the help be split by option? Lets try this Select the kernel logical page size. Increasing the page size will reduce software overhead at each page boundary, allow hardware prefetch mechanisms to be more effective, and allow larger dma transfers increasing IO efficiency and reducing overhead. However the utilization of memory will increase. For example, each cached file will using a multiple of the page size to hold its contents and the difference between the end of file and the end of page is wasted. Some dedicated systems, such as software raid serving with accelerated calculations, have shown significant increases. If you configure a 64 bit kernel for 64k pages but the processor does not support them, then the kernel will simulate them with 4k pages, loading them on demand, but with the reduced software overhead and larger internal fragmentation. For the 32 bit kernel, a large page option will not be offered unless it is supported by the configured processor. If unsure, choose 4K_PAGES. > +config PPC_4K_PAGES > + bool "4k page size" > + > +config PPC_16K_PAGES > + bool "16k page size" if 44x > + > +config PPC_64K_PAGES > + bool "64k page size" if 44x || PPC64 > + select PPC_HAS_HASH_64K if PPC64 > + > +endchoice > > diff --git a/arch/powerpc/include/asm/highmem.h > b/arch/powerpc/include/asm/highmem.h > index 5d99b64..dc1132c 100644 > --- a/arch/powerpc/include/asm/highmem.h > +++ b/arch/powerpc/include/asm/highmem.h > @@ -38,9 +38,15 @@ extern pte_t *pkmap_page_table; > * easily, subsequent pte tables have to be allocated in one physical > * chunk of RAM. > */ > +#if defined(CONFIG_PPC_64K_PAGES) && !defined(CONFIG_PPC64) In patch 2/2 I was going to comment about the precedence of PPC64 vs 64K_PAGES, but then I realized this file is only included when CONFIG_HIGHMEM is set and that depends on PPC32 , so it will never be set. Please remove the additional noise && !defined(CONFIG_PPC64). > +#define PKMAP_ORDER (27 - PAGE_SHIFT) where did the value 27 come from? > +#define LAST_PKMAP (1 << PKMAP_ORDER) > +#define PKMAP_BASE (FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1)) > +#else > #define LAST_PKMAP (1 << PTE_SHIFT) > -#define LAST_PKMAP_MASK (LAST_PKMAP-1) > #define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1)) > & PMD_MASK) > +#endif > +#define LAST_PKMAP_MASK (LAST_PKMAP-1) and why not set PKMAP_ORDER on both sides of the else, keepign LAST_PKMAP common? > #define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) > #define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) > > > diff --git a/arch/powerpc/include/asm/pgtable.h > b/arch/powerpc/include/asm/pgtable.h > index dbb8ca1..0d447fb 100644 > --- a/arch/powerpc/include/asm/pgtable.h > +++ b/arch/powerpc/include/asm/pgtable.h > @@ -39,6 +39,9 @@ extern void paging_init(void); > > #include <asm-generic/pgtable.h> > > +#define PGD_T_LOG2 (__builtin_ffs(sizeof(pgd_t)) - 1) > +#define PMD_T_LOG2 (__builtin_ffs(sizeof(pmd_t)) - 1) > +#define PTE_T_LOG2 (__builtin_ffs(sizeof(pte_t)) - 1) > > diff --git a/arch/powerpc/include/asm/mmu-44x.h > b/arch/powerpc/include/asm/mmu-44x.h > index a825524..2ca18e8 100644 > --- a/arch/powerpc/include/asm/mmu-44x.h > +++ b/arch/powerpc/include/asm/mmu-44x.h > +#define PPC44x_PGD_OFF_SHIFT (32 - PMD_SHIFT + 2) > +#define PPC44x_PGD_OFF_MASK (PMD_SHIFT - 2) > +#define PPC44x_PTE_ADD_SHIFT (32 - PMD_SHIFT + PTE_SHIFT + 3) > +#define PPC44x_PTE_ADD_MASK (32 - 3 - PTE_SHIFT) > +#define PPC44x_RPN_MASK (31 - PAGE_SHIFT) > + Are the values 2 and 3 related to the new defines PG*_T_LOG2 ? milton ^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures 2008-11-10 15:09 ` [1/2] " Milton Miller @ 2008-11-10 16:50 ` Ilya Yanok 0 siblings, 0 replies; 35+ messages in thread From: Ilya Yanok @ 2008-11-10 16:50 UTC (permalink / raw) To: Milton Miller Cc: Wolfgang Denk, dzu, linux-ppc, Vladimir Panfilov, Paul Mackerras Hello Milton, Milton Miller wrote: > I started out looking at the too minimal decription of patch 2/2, and > that morphed into talking about both patches. > >> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig >> index 587da5e..9627cfd 100644 >> --- a/arch/powerpc/Kconfig >> +++ b/arch/powerpc/Kconfig >> @@ -402,16 +402,30 @@ config PPC_HAS_HASH_64K >> depends on PPC64 >> default n >> >> -config PPC_64K_PAGES >> - bool "64k page size" >> - depends on PPC64 >> - select PPC_HAS_HASH_64K >> +choice >> + prompt "Page size" >> + default PPC_4K_PAGES >> help >> - This option changes the kernel logical page size to 64k. On >> machines >> + The PAGE_SIZE definition. Increasing the page size may >> + improve the system performance in some dedicated cases like >> software >> + RAID with accelerated calculations. In PPC64 case on machines >> without processor support for 64k pages, the kernel will >> simulate >> them by loading each individual 4k page on demand >> transparently, >> while on hardware with such support, it will be used to map >> normal application pages. >> + If unsure, set it to 4 KB. >> + > > This is less understandable (more hacker jargon) and too application > specific. (Josh, since this is cross-sub-platform we need to make > sure this fragment gets proper review). > > Also, we need to check the help placement, as I seem to remember the > config programs looking at the first choice instead of the choice > tag. Or should the help be split by option? Help at the choice tag works properly. > Lets try this > > Select the kernel logical page size. Increasing the page size will > reduce software overhead at each page boundary, allow hardware > prefetch mechanisms to be more effective, and allow larger dma > transfers increasing IO efficiency and reducing overhead. However the > utilization of memory will increase. For example, each cached file > will using a multiple of the page size to hold its contents and the > difference between the end of file and the end of page is wasted. > > Some dedicated systems, such as software raid serving with accelerated > calculations, have shown significant increases. > > If you configure a 64 bit kernel for 64k pages but the processor does > not support them, then the kernel will simulate them with 4k pages, > loading them on demand, but with the reduced software overhead and > larger internal fragmentation. For the 32 bit kernel, a large page > option will not be offered unless it is supported by the configured > processor. > > If unsure, choose 4K_PAGES. This looks much better for me. I'll include this help message in updated patch. >> +config PPC_4K_PAGES >> + bool "4k page size" >> + >> +config PPC_16K_PAGES >> + bool "16k page size" if 44x >> + >> +config PPC_64K_PAGES >> + bool "64k page size" if 44x || PPC64 >> + select PPC_HAS_HASH_64K if PPC64 >> + >> +endchoice >> > > >> diff --git a/arch/powerpc/include/asm/highmem.h >> b/arch/powerpc/include/asm/highmem.h >> index 5d99b64..dc1132c 100644 >> --- a/arch/powerpc/include/asm/highmem.h >> +++ b/arch/powerpc/include/asm/highmem.h >> @@ -38,9 +38,15 @@ extern pte_t *pkmap_page_table; >> * easily, subsequent pte tables have to be allocated in one physical >> * chunk of RAM. >> */ >> +#if defined(CONFIG_PPC_64K_PAGES) && !defined(CONFIG_PPC64) > > In patch 2/2 I was going to comment about the precedence of PPC64 vs > 64K_PAGES, but then I realized this file is only included when > CONFIG_HIGHMEM is set and that depends on PPC32 , so it will never be > set. Please remove the additional noise && !defined(CONFIG_PPC64). Ok. >> +#define PKMAP_ORDER (27 - PAGE_SHIFT) > where did the value 27 come from? Hm... It's pretty much experimental. There is the range of values which gives us a proper virtual memory map (VMALLOC_BEGIN < VMALLOC_END) and I have no clean idea which one we should use. >> +#define LAST_PKMAP (1 << PKMAP_ORDER) >> +#define PKMAP_BASE (FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1)) >> +#else >> #define LAST_PKMAP (1 << PTE_SHIFT) >> -#define LAST_PKMAP_MASK (LAST_PKMAP-1) >> #define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1)) >> & PMD_MASK) >> +#endif >> +#define LAST_PKMAP_MASK (LAST_PKMAP-1) > > and why not set PKMAP_ORDER on both sides of the else, keepign > LAST_PKMAP common? We can do this but I can't see much sense here... We still need to define PKMAP_BASE differently. >> #define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) >> #define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) >> >> > > >> diff --git a/arch/powerpc/include/asm/pgtable.h >> b/arch/powerpc/include/asm/pgtable.h >> index dbb8ca1..0d447fb 100644 >> --- a/arch/powerpc/include/asm/pgtable.h >> +++ b/arch/powerpc/include/asm/pgtable.h >> @@ -39,6 +39,9 @@ extern void paging_init(void); >> >> #include <asm-generic/pgtable.h> >> >> +#define PGD_T_LOG2 (__builtin_ffs(sizeof(pgd_t)) - 1) >> +#define PMD_T_LOG2 (__builtin_ffs(sizeof(pmd_t)) - 1) >> +#define PTE_T_LOG2 (__builtin_ffs(sizeof(pte_t)) - 1) >> > >> diff --git a/arch/powerpc/include/asm/mmu-44x.h >> b/arch/powerpc/include/asm/mmu-44x.h >> index a825524..2ca18e8 100644 >> --- a/arch/powerpc/include/asm/mmu-44x.h >> +++ b/arch/powerpc/include/asm/mmu-44x.h > >> +#define PPC44x_PGD_OFF_SHIFT (32 - PMD_SHIFT + 2) >> +#define PPC44x_PGD_OFF_MASK (PMD_SHIFT - 2) >> +#define PPC44x_PTE_ADD_SHIFT (32 - PMD_SHIFT + PTE_SHIFT + 3) >> +#define PPC44x_PTE_ADD_MASK (32 - 3 - PTE_SHIFT) >> +#define PPC44x_RPN_MASK (31 - PAGE_SHIFT) >> + > > Are the values 2 and 3 related to the new defines PG*_T_LOG2 ? Looks like you are right. Thanks for your comments. Regards, Ilya. ^ permalink raw reply [flat|nested] 35+ messages in thread
* [PATCH 2/2] powerpc: support for 256K pages on PPC 44x 2008-10-16 2:22 [RFC PATCH] Support for big page sizes on 44x (Updated) Ilya Yanok 2008-10-16 2:22 ` [PATCH 1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures Ilya Yanok @ 2008-10-16 2:22 ` Ilya Yanok 2008-11-10 15:09 ` [2/2] " Milton Miller 2008-11-11 2:17 ` [RFC PATCH] Support for big page sizes on 44x (Updated) Benjamin Herrenschmidt 2008-11-24 20:32 ` Hollis Blanchard 3 siblings, 1 reply; 35+ messages in thread From: Ilya Yanok @ 2008-10-16 2:22 UTC (permalink / raw) To: linuxppc-dev; +Cc: yanok, dzu, pvr, wd This patch adds support for 256K pages on PPC 44x along with some hacks needed for this. Signed-off-by: Yuri Tikhonov <yur@emcraft.com> Signed-off-by: Vladimir Panfilov <pvr@emcraft.com> Signed-off-by: Ilya Yanok <yanok@emcraft.com> --- arch/powerpc/Kconfig | 8 ++++++++ arch/powerpc/include/asm/highmem.h | 3 ++- arch/powerpc/include/asm/mmu-44x.h | 2 ++ arch/powerpc/include/asm/page.h | 6 ++++-- arch/powerpc/include/asm/page_32.h | 4 ++++ arch/powerpc/include/asm/thread_info.h | 4 ++++ arch/powerpc/kernel/head_booke.h | 11 +++++++++-- 7 files changed, 33 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 9627cfd..7df5528 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -425,6 +425,14 @@ config PPC_64K_PAGES bool "64k page size" if 44x || PPC64 select PPC_HAS_HASH_64K if PPC64 +config PPC_256K_PAGES + bool "256k page size" if 44x + depends on BROKEN + help + ELF standard supports only page sizes up to 64K so you need a patched + binutils in order to use 256K pages. Chose it only if you know what + you are doing. + endchoice config FORCE_MAX_ZONEORDER diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h index dc1132c..0b4ac6a 100644 --- a/arch/powerpc/include/asm/highmem.h +++ b/arch/powerpc/include/asm/highmem.h @@ -38,7 +38,8 @@ extern pte_t *pkmap_page_table; * easily, subsequent pte tables have to be allocated in one physical * chunk of RAM. */ -#if defined(CONFIG_PPC_64K_PAGES) && !defined(CONFIG_PPC64) +#if defined(CONFIG_PPC_256K_PAGES) || \ + (defined(CONFIG_PPC_64K_PAGES) && !defined(CONFIG_PPC64)) #define PKMAP_ORDER (27 - PAGE_SHIFT) #define LAST_PKMAP (1 << PKMAP_ORDER) #define PKMAP_BASE (FIXADDR_START - PAGE_SIZE*(LAST_PKMAP + 1)) diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h index 2ca18e8..b943462 100644 --- a/arch/powerpc/include/asm/mmu-44x.h +++ b/arch/powerpc/include/asm/mmu-44x.h @@ -81,6 +81,8 @@ typedef struct { #define PPC44x_TLBE_SIZE PPC44x_TLB_16K #elif (PAGE_SHIFT == 16) #define PPC44x_TLBE_SIZE PPC44x_TLB_64K +#elif (PAGE_SHIFT == 18) +#define PPC44x_TLBE_SIZE PPC44x_TLB_256K #else #error "Unsupported PAGE_SIZE" #endif diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 537d5b1..f42c918 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -15,12 +15,14 @@ #include <asm/types.h> /* - * On regular PPC32 page size is 4K (but we support 4K/16K/64K pages + * On regular PPC32 page size is 4K (but we support 4K/16K/64K/256K pages * on PPC44x). For PPC64 we support either 4K or 64K software * page size. When using 64K pages however, whether we are really supporting * 64K pages in HW or not is irrelevant to those definitions. */ -#if defined(CONFIG_PPC_64K_PAGES) +#if defined(CONFIG_PPC_256K_PAGES) +#define PAGE_SHIFT 18 +#elif defined(CONFIG_PPC_64K_PAGES) #define PAGE_SHIFT 16 #elif defined(CONFIG_PPC_16K_PAGES) #define PAGE_SHIFT 14 diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h index ebfae53..273369a 100644 --- a/arch/powerpc/include/asm/page_32.h +++ b/arch/powerpc/include/asm/page_32.h @@ -20,7 +20,11 @@ */ #ifdef CONFIG_PTE_64BIT typedef unsigned long long pte_basic_t; +#ifdef CONFIG_PPC_256K_PAGES +#define PTE_SHIFT (PAGE_SHIFT - 7) +#else #define PTE_SHIFT (PAGE_SHIFT - 3) /* 512 ptes per page */ +#endif #else typedef unsigned long pte_basic_t; #define PTE_SHIFT (PAGE_SHIFT - 2) /* 1024 ptes per page */ diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 9665a26..3c8bbab 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -15,8 +15,12 @@ #ifdef CONFIG_PPC64 #define THREAD_SHIFT 14 #else +#ifdef CONFIG_PPC_256K_PAGES +#define THREAD_SHIFT 15 +#else #define THREAD_SHIFT 13 #endif +#endif #define THREAD_SIZE (1 << THREAD_SHIFT) diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index fce2df9..acd4b47 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -9,6 +9,13 @@ li r26,vector_label@l; \ mtspr SPRN_IVOR##vector_number,r26; \ sync +#ifndef CONFIG_PPC_256K_PAGES +#define ALLOC_STACK_FRAME(reg, val) addi reg,reg,val +#else +#define ALLOC_STACK_FRAME(reg, val) \ + addis reg,reg,val@ha; \ + addi reg,reg,val@l +#endif #define NORMAL_EXCEPTION_PROLOG \ mtspr SPRN_SPRG0,r10; /* save two registers to work with */\ @@ -20,7 +27,7 @@ beq 1f; \ mfspr r1,SPRN_SPRG3; /* if from user, start at top of */\ lwz r1,THREAD_INFO-THREAD(r1); /* this thread's kernel stack */\ - addi r1,r1,THREAD_SIZE; \ + ALLOC_STACK_FRAME(r1, THREAD_SIZE); \ 1: subi r1,r1,INT_FRAME_SIZE; /* Allocate an exception frame */\ mr r11,r1; \ stw r10,_CCR(r11); /* save various registers */\ @@ -112,7 +119,7 @@ andi. r10,r10,MSR_PR; \ mfspr r11,SPRN_SPRG3; /* if from user, start at top of */\ lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\ - addi r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack frame */\ + ALLOC_STACK_FRAME(r11 ,EXC_LVL_FRAME_OVERHEAD); /* allocate stack frame */\ beq 1f; \ /* COMING FROM USER MODE */ \ stw r9,_CCR(r11); /* save CR */\ -- 1.5.6.1 ^ permalink raw reply related [flat|nested] 35+ messages in thread
* Re: [2/2] powerpc: support for 256K pages on PPC 44x 2008-10-16 2:22 ` [PATCH 2/2] powerpc: support for 256K pages on PPC 44x Ilya Yanok @ 2008-11-10 15:09 ` Milton Miller 2008-11-10 16:24 ` Ilya Yanok 0 siblings, 1 reply; 35+ messages in thread From: Milton Miller @ 2008-11-10 15:09 UTC (permalink / raw) To: Ilya Yanok; +Cc: linux-ppc, Vladimir Panfilov, Wolfgang Denk, dzu On 2008-10-16 at 02:22:32, Ilya Yanok wrote: > > This patch adds support for 256K pages on PPC 44x along with > some hacks needed for this. This description is insufficient, it describes neither the hacks nor why they are required. > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig > index 9627cfd..7df5528 100644 > --- a/arch/powerpc/Kconfig > +++ b/arch/powerpc/Kconfig > @@ -425,6 +425,14 @@ config PPC_64K_PAGES > bool "64k page size" if 44x || PPC64 > select PPC_HAS_HASH_64K if PPC64 > > +config PPC_256K_PAGES > + bool "256k page size" if 44x > + depends on BROKEN I know it was not your original choice, but I feel BROKEN is too strong. It should be under embedded, and maybe a second choice "I am using standard binutils" that defaults to yes and is set to no (so that all yes config does not enable it by accident), but I feel labeling this BROKEN for an external dependency is wrong. > + help > + ELF standard supports only page sizes up to 64K so you need > a patched > + binutils in order to use 256K pages. Chose it only if you > know what > + you are doing. > + > endchoice > > config FORCE_MAX_ZONEORDER > diff --git a/arch/powerpc/include/asm/highmem.h > b/arch/powerpc/include/asm/highmem.h > index dc1132c..0b4ac6a 100644 > --- a/arch/powerpc/include/asm/highmem.h > +++ b/arch/powerpc/include/asm/highmem.h > @@ -38,7 +38,8 @@ extern pte_t *pkmap_page_table; > * easily, subsequent pte tables have to be allocated in one physical > * chunk of RAM. > */ > -#if defined(CONFIG_PPC_64K_PAGES) && !defined(CONFIG_PPC64) > +#if defined(CONFIG_PPC_256K_PAGES) || \ > + (defined(CONFIG_PPC_64K_PAGES) && !defined(CONFIG_PPC64)) Just because 256K pages is not selectable on PPC64 doesn't mean that this is the right grouping. However, as I said on the previous patch, this file is never included on PPC64 so the clause should be removed. > diff --git a/arch/powerpc/include/asm/page_32.h > b/arch/powerpc/include/asm/page_32.h > index ebfae53..273369a 100644 > --- a/arch/powerpc/include/asm/page_32.h > +++ b/arch/powerpc/include/asm/page_32.h > @@ -20,7 +20,11 @@ > */ > #ifdef CONFIG_PTE_64BIT > typedef unsigned long long pte_basic_t; > +#ifdef CONFIG_PPC_256K_PAGES > +#define PTE_SHIFT (PAGE_SHIFT - 7) This seems to be missing the comment on how many ptes are actually in the page that are in the other if and else cases. > +#else > #define PTE_SHIFT (PAGE_SHIFT - 3) /* 512 ptes per page */ > +#endif > #else > typedef unsigned long pte_basic_t; > #define PTE_SHIFT (PAGE_SHIFT - 2) /* 1024 ptes per page > */ > diff --git a/arch/powerpc/include/asm/thread_info.h > b/arch/powerpc/include/asm/thread_info.h > index 9665a26..3c8bbab 100644 > --- a/arch/powerpc/include/asm/thread_info.h > +++ b/arch/powerpc/include/asm/thread_info.h > @@ -15,8 +15,12 @@ > #ifdef CONFIG_PPC64 > #define THREAD_SHIFT 14 > #else > +#ifdef CONFIG_PPC_256K_PAGES > +#define THREAD_SHIFT 15 > +#else > #define THREAD_SHIFT 13 > #endif > +#endif > > #define THREAD_SIZE (1 << THREAD_SHIFT) So this appears to be the one hack. For some unknown reason, you are increasing the kernel stack from 8k to 32k when selecting 256k pages. What data structure is ballooning in size so much that you need the additional kernel stack space on 256k pages but not on 64k pages? Is this really tied to 256k base page size? > > diff --git a/arch/powerpc/kernel/head_booke.h > b/arch/powerpc/kernel/head_booke.h > index fce2df9..acd4b47 100644 > --- a/arch/powerpc/kernel/head_booke.h > +++ b/arch/powerpc/kernel/head_booke.h > @@ -9,6 +9,13 @@ > li r26,vector_label@l; \ > mtspr SPRN_IVOR##vector_number,r26; \ > sync > +#ifndef CONFIG_PPC_256K_PAGES > +#define ALLOC_STACK_FRAME(reg, val) addi reg,reg,val > +#else > +#define ALLOC_STACK_FRAME(reg, val) \ > + addis reg,reg,val@ha; \ > + addi reg,reg,val@l > +#endif And this is directly related to choosing the stack size of 32k, which can not be added in a single instruction and larger even than what the 64 bit kernel uses. So even further explanation of the analysis is required. > > #define NORMAL_EXCEPTION_PROLOG > \ > mtspr SPRN_SPRG0,r10; /* save two registers to work > with */\ > @@ -20,7 +27,7 @@ > beq 1f; > \ > mfspr r1,SPRN_SPRG3; /* if from user, start at top > of */\ > lwz r1,THREAD_INFO-THREAD(r1); /* this thread's kernel > stack */\ > - addi r1,r1,THREAD_SIZE; > \ > + ALLOC_STACK_FRAME(r1, THREAD_SIZE); > \ > 1: subi r1,r1,INT_FRAME_SIZE; /* Allocate an exception frame > */\ > mr r11,r1; > \ > stw r10,_CCR(r11); /* save various registers > */\ > @@ -112,7 +119,7 @@ > andi. r10,r10,MSR_PR; > \ > mfspr r11,SPRN_SPRG3; /* if from user, start at top > of */\ > lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel > stack */\ > - addi r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack > frame */\ > + ALLOC_STACK_FRAME(r11 ,EXC_LVL_FRAME_OVERHEAD); /* allocate > stack frame */\ > beq 1f; > \ > /* COMING FROM USER MODE */ > \ > stw r9,_CCR(r11); /* save CR > */\ milton ^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [2/2] powerpc: support for 256K pages on PPC 44x 2008-11-10 15:09 ` [2/2] " Milton Miller @ 2008-11-10 16:24 ` Ilya Yanok 2008-11-11 14:59 ` Milton Miller 0 siblings, 1 reply; 35+ messages in thread From: Ilya Yanok @ 2008-11-10 16:24 UTC (permalink / raw) To: Milton Miller; +Cc: linux-ppc, Vladimir Panfilov, Wolfgang Denk, dzu Hello Milton, Milton Miller wrote: >> This patch adds support for 256K pages on PPC 44x along with >> some hacks needed for this. > > This description is insufficient, it describes neither the hacks nor > why they are required. Ok. Actually there is only one hack -- increasing kernel stack size. We do this because with 256K pages we get division by zero in kernel/fork.c: /* * The default maximum number of threads is set to a safe * value: the thread structures can take up at most half * of memory. */ max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE); so setting THREAD_SIZE to bigger value we can avoid this. I don't think it's very clean solution but at least we stay powerpc-specific. >> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig >> index 9627cfd..7df5528 100644 >> --- a/arch/powerpc/Kconfig >> +++ b/arch/powerpc/Kconfig >> @@ -425,6 +425,14 @@ config PPC_64K_PAGES >> bool "64k page size" if 44x || PPC64 >> select PPC_HAS_HASH_64K if PPC64 >> >> +config PPC_256K_PAGES >> + bool "256k page size" if 44x >> + depends on BROKEN > > I know it was not your original choice, but I feel BROKEN is too > strong. It should be under embedded, and maybe a second choice "I am > using standard binutils" that defaults to yes and is set to no (so > that all yes config does not enable it by accident), but I feel > labeling this BROKEN for an external dependency is wrong. Hm... maybe you are right. I'm looking forward for additional comments on this. >> + help >> + ELF standard supports only page sizes up to 64K so you need >> a patched >> + binutils in order to use 256K pages. Chose it only if you >> know what >> + you are doing. >> + >> endchoice >> >> config FORCE_MAX_ZONEORDER >> diff --git a/arch/powerpc/include/asm/highmem.h >> b/arch/powerpc/include/asm/highmem.h >> index dc1132c..0b4ac6a 100644 >> --- a/arch/powerpc/include/asm/highmem.h >> +++ b/arch/powerpc/include/asm/highmem.h >> @@ -38,7 +38,8 @@ extern pte_t *pkmap_page_table; >> * easily, subsequent pte tables have to be allocated in one physical >> * chunk of RAM. >> */ >> -#if defined(CONFIG_PPC_64K_PAGES) && !defined(CONFIG_PPC64) >> +#if defined(CONFIG_PPC_256K_PAGES) || \ >> + (defined(CONFIG_PPC_64K_PAGES) && !defined(CONFIG_PPC64)) > > Just because 256K pages is not selectable on PPC64 doesn't mean that > this is the right grouping. However, as I said on the previous > patch, this file is never included on PPC64 so the clause should be > removed. Ok. >> diff --git a/arch/powerpc/include/asm/page_32.h >> b/arch/powerpc/include/asm/page_32.h >> index ebfae53..273369a 100644 >> --- a/arch/powerpc/include/asm/page_32.h >> +++ b/arch/powerpc/include/asm/page_32.h >> @@ -20,7 +20,11 @@ >> */ >> #ifdef CONFIG_PTE_64BIT >> typedef unsigned long long pte_basic_t; >> +#ifdef CONFIG_PPC_256K_PAGES >> +#define PTE_SHIFT (PAGE_SHIFT - 7) > > This seems to be missing the comment on how many ptes are actually in > the page that are in the other if and else cases. Ok. I'll fix this. Actually it's another hack: we don't use full page for PTE table because we need to reserve something for PGD >> +#else >> #define PTE_SHIFT (PAGE_SHIFT - 3) /* 512 ptes per page */ >> +#endif >> #else >> typedef unsigned long pte_basic_t; >> #define PTE_SHIFT (PAGE_SHIFT - 2) /* 1024 ptes per page */ >> diff --git a/arch/powerpc/include/asm/thread_info.h >> b/arch/powerpc/include/asm/thread_info.h >> index 9665a26..3c8bbab 100644 >> --- a/arch/powerpc/include/asm/thread_info.h >> +++ b/arch/powerpc/include/asm/thread_info.h >> @@ -15,8 +15,12 @@ >> #ifdef CONFIG_PPC64 >> #define THREAD_SHIFT 14 >> #else >> +#ifdef CONFIG_PPC_256K_PAGES >> +#define THREAD_SHIFT 15 >> +#else >> #define THREAD_SHIFT 13 >> #endif >> +#endif >> >> #define THREAD_SIZE (1 << THREAD_SHIFT) > > > So this appears to be the one hack. For some unknown reason, you are > increasing the kernel stack from 8k to 32k when selecting 256k > pages. What data structure is ballooning in size so much that you > need the additional kernel stack space on 256k pages but not on 64k > pages? Is this really tied to 256k base page size? We don't really need additional stack space. Just trying to avoid division by zero. Regards, Ilya. ^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [2/2] powerpc: support for 256K pages on PPC 44x 2008-11-10 16:24 ` Ilya Yanok @ 2008-11-11 14:59 ` Milton Miller 2008-11-14 4:32 ` Re[2]: " Yuri Tikhonov 0 siblings, 1 reply; 35+ messages in thread From: Milton Miller @ 2008-11-11 14:59 UTC (permalink / raw) To: Ilya Yanok; +Cc: linux-ppc, Vladimir Panfilov, Wolfgang Denk, dzu Sorry for the slow reply, but my shell account is broken and I had to post from home. On Nov 10, 2008, at 10:24 AM, Ilya Yanok wrote: >>> This patch adds support for 256K pages on PPC 44x along with >>> some hacks needed for this. >> >> This description is insufficient, it describes neither the hacks nor >> why they are required. > > Ok. Actually there is only one hack -- increasing kernel stack size. We > do this because with 256K pages we get division by zero in > kernel/fork.c: > > /* > * The default maximum number of threads is set to a safe > * value: the thread structures can take up at most half > * of memory. > */ > max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE); > > so setting THREAD_SIZE to bigger value we can avoid this. I don't think > it's very clean solution but at least we stay powerpc-specific. And why is keeping a line of code intact, which doesn't even match its comment, by creating a hack workaround that increases memory consumption, that is triggered by enabling an option that already increases memory pressure, just to stay architecture specific anything like sanity? No. Submit a patch to address the division by zero instead. Btw, I did some research for you (all are from torvalds/old-2.6-bkcvs, and the last patch has been edited for relevance along with one of the descriptions): v2.6.10-rc2-g63f96a6 commit 63f96a6d9c1a54875f3bd07a6337993bc5180ecb Author: torvalds <torvalds> Commit: torvalds <torvalds> Merge bk://linux-mtd.bkbits.net/mtd-bugsonly-2.6 into ppc970.osdl.org:/home/torvalds/v2.6/linux 2004/11/16 17:29:15-08:00 dhowells [PATCH] Fork fix fix The attached patch fixes the fork fix to avoid the divide-by-zero error I'd previously fixed, but without using any sort of conditional. Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: Linus Torvalds <torvalds@osdl.org> ... BKrev: 419aaa45h5IsCw4CAYMVTOWK9oVaBA diff --git a/kernel/fork.c b/kernel/fork.c index f5fba87..f157ad6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -118,10 +118,7 @@ void __init fork_init(unsigned long mempages) * value: the thread structures can take up at most half * of memory. */ - if (THREAD_SIZE >= PAGE_SIZE) - max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 8; - else - max_threads = mempages / 8; + max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE); /* * we need to allow at least 20 threads to boot a system v2.6.10-rc1-g368b064 commit 368b06415c11e286f6ab3fe7c52bdd5b9b6f3008 Author: dhowells <dhowells> Commit: dhowells <dhowells> [PATCH] fix page size assumption in fork() The attached patch fixes fork to get rid of the assumption that THREAD_SIZE >= PAGE_SIZE (on the FR-V the smallest available page size is 16KB). Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> BKrev: 4193db17ZJRaaVNEGezHMBUmByER4A diff --git a/kernel/fork.c b/kernel/fork.c index eb689d9..f5fba87 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -118,7 +118,11 @@ void __init fork_init(unsigned long mempages) * value: the thread structures can take up at most half * of memory. */ - max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 8; + if (THREAD_SIZE >= PAGE_SIZE) + max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 8; + else + max_threads = mempages / 8; + /* * we need to allow at least 20 threads to boot a system */ v2.4.0-g4214e42 commit 4214e42f96d4051cb77b1b7c2b041715db84ffd9 Author: torvalds <torvalds> Commit: torvalds <torvalds> v2.4.9.11 -> v2.4.9.12 - Alan Cox: much more merging - Pete Zaitcev: ymfpci race fixes - Andrea Arkangeli: VM race fix and OOM tweak. - Arjan Van de Ven: merge RH kernel fixes - Andi Kleen: use more readable 'likely()/unlikely()' instead of __builtin_expect() - Keith Owens: fix 64-bit ELF types - Gerd Knorr: mark more broken PCI bridges, update btaudio driver - Paul Mackerras: powermac driver update - me: clean up PTRACE_DETACH to use common infrastructure BKrev: 3c603e338Tv2BTX9tkeBFGWLdI-r4Q diff --git a/kernel/fork.c b/kernel/fork.c index 9179e23..91aeda9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -72,7 +72,7 @@ void __init fork_init(unsigned long mempages) * value: the thread structures can take up at most half * of memory. */ - max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 16; + max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 8; init_task.rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; init_task.rlim[RLIMIT_NPROC].rlim_max = max_threads/2; v2.4.0-gcaeb6d6 commit caeb6d68179ecd9dfeac8fa17daa7150163fa318 Author: torvalds <torvalds> Commit: torvalds <torvalds> v2.4.9.10 -> v2.4.9.11 - Neil Brown: md cleanups/fixes - Andrew Morton: console locking merge - Andrea Arkangeli: major VM merge BKrev: 3c603e2fnBNvsVsBbJrGD3fFs4xTFg diff --git a/kernel/fork.c b/kernel/fork.c index ebfbf2b..9179e23 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -72,7 +72,7 @@ void __init fork_init(unsigned long mempages) * value: the thread structures can take up at most half * of memory. */ - max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 2; + max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 16; init_task.rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; init_task.rlim[RLIMIT_NPROC].rlim_max = max_threads/2; So the comment has been wrong since 2.4.9.11, and has been changed several times since then. It can be changed. And if you fix the comment you might even get bonus points. Thinking about it though, the minimum divide should be at least 2 (a partial page for a stack and a page for a flat or omagic binary). >>> >>> #ifdef CONFIG_PTE_64BIT >>> typedef unsigned long long pte_basic_t; >>> +#ifdef CONFIG_PPC_256K_PAGES >>> +#define PTE_SHIFT (PAGE_SHIFT - 7) >> >> This seems to be missing the comment on how many ptes are actually in >> the page that are in the other if and else cases. > > Ok. I'll fix this. Actually it's another hack: we don't use full page > for PTE table because we need to reserve something for PGD I don't understand "we need to reserve something for PGD". Do you mean that you would not require a second page for the PGD because the full pagetable could fit in one page? My first reaction was to say then create pgtable-nopgd.h like the other two. The page walkers support this with the advent of gigantic pages. Then I realized that might not be optimal: while the page table might fit in one page, it would mean you always allocate the pte space to cover the full address space. Even if your processes spread out over the 3G of address space allocated to them (32 bit kernel), you will allocate space for 4G, wasting 1/4 of the pte space. That does imply you want to allocate the pte page from a slab instead of pgalloc. Is that covered? milton ^ permalink raw reply related [flat|nested] 35+ messages in thread
* Re[2]: [2/2] powerpc: support for 256K pages on PPC 44x 2008-11-11 14:59 ` Milton Miller @ 2008-11-14 4:32 ` Yuri Tikhonov 2008-11-14 15:41 ` Milton Miller 0 siblings, 1 reply; 35+ messages in thread From: Yuri Tikhonov @ 2008-11-14 4:32 UTC (permalink / raw) To: Milton Miller Cc: linux-ppc, dzu, Vladimir Panfilov, Ilya Yanok, Wolfgang Denk Hello Milton, On Tuesday, November 11, 2008 Milton Miller wrote: [snip] >>>> >>>> #ifdef CONFIG_PTE_64BIT >>>> typedef unsigned long long pte_basic_t; >>>> +#ifdef CONFIG_PPC_256K_PAGES >>>> +#define PTE_SHIFT (PAGE_SHIFT - 7) >>> >>> This seems to be missing the comment on how many ptes are actually in >>> the page that are in the other if and else cases. >> >> Ok. I'll fix this. Actually it's another hack: we don't use full page >> for PTE table because we need to reserve something for PGD > I don't understand "we need to reserve something for PGD". Do you=20 > mean that you would not require a second page for the PGD because the=20 > full pagetable could fit in one page? My first reaction was to say=20 > then create pgtable-nopgd.h like the other two. The page walkers=20 > support this with the advent of gigantic pages. Then I realized that=20 > might not be optimal: while the page table might fit in one page, it=20 > would mean you always allocate the pte space to cover the full address > space. Even if your processes spread out over the 3G of address space > allocated to them (32 bit kernel), you will allocate space for 4G,=20 > wasting 1/4 of the pte space. > That does imply you want to allocate the pte page from a slab instead=20 > of pgalloc. Is that covered? Well, in case of 256K PAGE_SIZE we do not need the PGD level indeed (18 bits are used for offset, and remaining 14 bits are for PTE index=20 inside the PTE table). Even the full 256K PTE page isn't necessary to=20 cover the full range: only half of it would be enough (with 14 bits we=20 can address only 16K PTEs). But the head_44x.S code is essentially based on the assumption of=20 2-level page addressing. Also, I may guess that eliminating of the PGD level won't be as easy as just a re-implementation of the TLB-miss=20 handlers in head_44x.S. So, the current approach for 256K-pages=20 support was just a compromise between the required for the project=20 functionality, and the effort necessary to achieve it. Regards, Yuri -- Yuri Tikhonov, Senior Software Engineer Emcraft Systems, www.emcraft.com ^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: Re[2]: [2/2] powerpc: support for 256K pages on PPC 44x 2008-11-14 4:32 ` Re[2]: " Yuri Tikhonov @ 2008-11-14 15:41 ` Milton Miller 2008-11-27 0:30 ` Re[4]: " Yuri Tikhonov 0 siblings, 1 reply; 35+ messages in thread From: Milton Miller @ 2008-11-14 15:41 UTC (permalink / raw) To: Yuri Tikhonov Cc: dzu, Ilya Yanok, linux-ppc, Vladimir Panfilov, Wolfgang Denk On Nov 13, 2008, at 10:32 PM, Yuri Tikhonov wrote: > On Tuesday, November 11, 2008 Milton Miller wrote: >>>>> #ifdef CONFIG_PTE_64BIT >>>>> typedef unsigned long long pte_basic_t; >>>>> +#ifdef CONFIG_PPC_256K_PAGES >>>>> +#define PTE_SHIFT (PAGE_SHIFT - 7) >>>> >>>> This seems to be missing the comment on how many ptes are actually >>>> in >>>> the page that are in the other if and else cases. >>> >>> Ok. I'll fix this. Actually it's another hack: we don't use full page >>> for PTE table because we need to reserve something for PGD > >> I don't understand "we need to reserve something for PGD". Do you >> mean that you would not require a second page for the PGD because the >> full pagetable could fit in one page? ... >> That does imply you want to allocate the pte page from a slab instead >> of pgalloc. Is that covered? > > Well, in case of 256K PAGE_SIZE we do not need the PGD level indeed > (18 bits are used for offset, and remaining 14 bits are for PTE index > inside the PTE table). Even the full 256K PTE page isn't necessary to > cover the full range: only half of it would be enough (with 14 bits we > can address only 16K PTEs). > > But the head_44x.S code is essentially based on the assumption of > 2-level page addressing. Also, I may guess that eliminating of the > PGD level won't be as easy as just a re-implementation of the TLB-miss > handlers in head_44x.S. So, the current approach for 256K-pages > support was just a compromise between the required for the project > functionality, and the effort necessary to achieve it. So are you allocating the < PAGE_SIZE levels from slabs (either kmalloc or dedicated) instead of allocating pages? Or are you wasting the extra space? At a very minimum you need to comment this in the code. If I were maintiner I would say not wasting large fractions of pages when the page size is 256k would be my merge requirement. As I said, I'm fine with keeping the page table two levels, but the tradeoff needs to be documented. milton ^ permalink raw reply [flat|nested] 35+ messages in thread
* Re[4]: [2/2] powerpc: support for 256K pages on PPC 44x 2008-11-14 15:41 ` Milton Miller @ 2008-11-27 0:30 ` Yuri Tikhonov 0 siblings, 0 replies; 35+ messages in thread From: Yuri Tikhonov @ 2008-11-27 0:30 UTC (permalink / raw) To: Milton Miller Cc: dzu, Ilya Yanok, linux-ppc, Vladimir Panfilov, Wolfgang Denk =0D=0A Hello Milton, On Friday, November 14, 2008 you wrote: > On Nov 13, 2008, at 10:32 PM, Yuri Tikhonov wrote: >> On Tuesday, November 11, 2008 Milton Miller wrote: >>>>>> #ifdef CONFIG_PTE_64BIT >>>>>> typedef unsigned long long pte_basic_t; >>>>>> +#ifdef CONFIG_PPC_256K_PAGES >>>>>> +#define PTE_SHIFT (PAGE_SHIFT - 7) >>>>> >>>>> This seems to be missing the comment on how many ptes are actually=20 >>>>> in >>>>> the page that are in the other if and else cases. >>>> >>>> Ok. I'll fix this. Actually it's another hack: we don't use full page >>>> for PTE table because we need to reserve something for PGD >> >>> I don't understand "we need to reserve something for PGD". Do you >>> mean that you would not require a second page for the PGD because the >>> full pagetable could fit in one page? > ... >>> That does imply you want to allocate the pte page from a slab instead >>> of pgalloc. Is that covered? >> >> Well, in case of 256K PAGE_SIZE we do not need the PGD level indeed >> (18 bits are used for offset, and remaining 14 bits are for PTE index >> inside the PTE table). Even the full 256K PTE page isn't necessary to >> cover the full range: only half of it would be enough (with 14 bits we >> can address only 16K PTEs). >> >> But the head_44x.S code is essentially based on the assumption of >> 2-level page addressing. Also, I may guess that eliminating of the >> PGD level won't be as easy as just a re-implementation of the TLB-miss >> handlers in head_44x.S. So, the current approach for 256K-pages >> support was just a compromise between the required for the project >> functionality, and the effort necessary to achieve it. > So are you allocating the < PAGE_SIZE levels from slabs (either kmalloc > or dedicated) instead of allocating pages? Or are you wasting the=20 > extra space? Wasting the extra space has a place here. > At a very minimum you need to comment this in the code. If I were=20 > maintiner I would say not wasting large fractions of pages when the=20 > page size is 256k would be my merge requirement. As I said, I'm fine=20 > with keeping the page table two levels, but the tradeoff needs to be=20 > documented. Agree, we'll document this fact, and re-submit the patch. Regards, Yuri -- Yuri Tikhonov, Senior Software Engineer Emcraft Systems, www.emcraft.com ^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [RFC PATCH] Support for big page sizes on 44x (Updated) 2008-10-16 2:22 [RFC PATCH] Support for big page sizes on 44x (Updated) Ilya Yanok 2008-10-16 2:22 ` [PATCH 1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures Ilya Yanok 2008-10-16 2:22 ` [PATCH 2/2] powerpc: support for 256K pages on PPC 44x Ilya Yanok @ 2008-11-11 2:17 ` Benjamin Herrenschmidt 2008-11-11 2:22 ` Benjamin Herrenschmidt 2008-11-24 20:32 ` Hollis Blanchard 3 siblings, 1 reply; 35+ messages in thread From: Benjamin Herrenschmidt @ 2008-11-11 2:17 UTC (permalink / raw) To: Ilya Yanok; +Cc: linuxppc-dev, pvr, dzu, wd On Thu, 2008-10-16 at 06:22 +0400, Ilya Yanok wrote: > These patches add support for selecting page size on PPC 44x. > First one adds support for 16K/64K pages while second one adds support > for 256K pages along with some hacks. > > However there are still number of problems: > 1. We can't use default PKMAP_BASE definition with 64KB/256KB pages so > we change it. Not sure that it's optimal. Then redefined PKMAP_BASE is > not aligned on (1<<PMD_SHIFT), don't know if it is really bad. Well, the main thing is the implementation of kmap and kmap_atomic. They both basically assumes that all the reserved PTEs for kmap and kmap_atomic are in a single PTE page since it uses a simple addition (substraction for _atomic really but heh, that's about the same). Note that PKMAP (kmap) and FIXMAP (kmap_atomic) can be in two different PTE pages. But it's important that the whole PKMAP is entirely contained within a PTE page. It doesn't have to -start- on a PTE page boundary though. > 2. with 16KB/64KB/256KB pages WARN_ON(!pmd_none(*pmd)) is triggered > inside dma_alloc_init() function. Not sure if it is really bad. I think that's a bogus WARN_ON. > 3. with 256KB pages ENTRIES_PER_PAGEPAGE in mm/shem.c become zero. Yeah well, I'd like to keep that 256K page separate for now, let's focus on merging 16K/64K support first. > 4. We use asm-offsets mechanism to make PTE_SHIFT/PMD_SHIFT available in > assembler but we don't really need the power of asm-offsets here. Maybe > it will be more convinient to just take these defines out of #ifndef > __ASSEMBLY__? But this would change asm-generic... We sure should do that. I don't think of a reason why those need to be protected by __ASSEMBLY__. Cheers, Ben. ^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [RFC PATCH] Support for big page sizes on 44x (Updated) 2008-11-11 2:17 ` [RFC PATCH] Support for big page sizes on 44x (Updated) Benjamin Herrenschmidt @ 2008-11-11 2:22 ` Benjamin Herrenschmidt 0 siblings, 0 replies; 35+ messages in thread From: Benjamin Herrenschmidt @ 2008-11-11 2:22 UTC (permalink / raw) To: Ilya Yanok; +Cc: linuxppc-dev, pvr, dzu, wd > Well, the main thing is the implementation of kmap and kmap_atomic. > > They both basically assumes that all the reserved PTEs for kmap and > kmap_atomic are in a single PTE page since it uses a simple addition > (substraction for _atomic really but heh, that's about the same). > > Note that PKMAP (kmap) and FIXMAP (kmap_atomic) can be in two different > PTE pages. But it's important that the whole PKMAP is entirely contained > within a PTE page. It doesn't have to -start- on a PTE page boundary > though. Also note that if you end up with PKMAP and FIXMAP on two different PTE pages, make sure they are both instanciated at boot time. Ben. ^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [RFC PATCH] Support for big page sizes on 44x (Updated) 2008-10-16 2:22 [RFC PATCH] Support for big page sizes on 44x (Updated) Ilya Yanok ` (2 preceding siblings ...) 2008-11-11 2:17 ` [RFC PATCH] Support for big page sizes on 44x (Updated) Benjamin Herrenschmidt @ 2008-11-24 20:32 ` Hollis Blanchard 2008-11-24 23:06 ` Wolfgang Denk 3 siblings, 1 reply; 35+ messages in thread From: Hollis Blanchard @ 2008-11-24 20:32 UTC (permalink / raw) To: Ilya Yanok; +Cc: linuxppc-dev, pvr, dzu, wd On Wed, Oct 15, 2008 at 8:22 PM, Ilya Yanok <yanok@emcraft.com> wrote: > These patches add support for selecting page size on PPC 44x. > First one adds support for 16K/64K pages while second one adds support > for 256K pages along with some hacks. Leaving 256K pages aside for now, do you plan to revise the 4/16/64K page size patch for 2.6.29? KVM gets a huge performance boost from 64K pages, so I'm anxious to see this go in. I've submitted a few supporting patches for incidental problems caused by large pages, so I think it's just down to this patch itself now. -Hollis ^ permalink raw reply [flat|nested] 35+ messages in thread
* Re: [RFC PATCH] Support for big page sizes on 44x (Updated) 2008-11-24 20:32 ` Hollis Blanchard @ 2008-11-24 23:06 ` Wolfgang Denk 0 siblings, 0 replies; 35+ messages in thread From: Wolfgang Denk @ 2008-11-24 23:06 UTC (permalink / raw) To: Yuri Tikhonov; +Cc: linuxppc-dev, pvr, dzu, Ilya Yanok Dear Yuri & Ilya, In message <fb412d760811241232j26477d4ficcce27ea670339fb@mail.gmail.com> Hollis Blanchard wrote: > On Wed, Oct 15, 2008 at 8:22 PM, Ilya Yanok <yanok@emcraft.com> wrote: > > These patches add support for selecting page size on PPC 44x. > > First one adds support for 16K/64K pages while second one adds support > > for 256K pages along with some hacks. > > Leaving 256K pages aside for now, do you plan to revise the 4/16/64K > page size patch for 2.6.29? KVM gets a huge performance boost from 64K > pages, so I'm anxious to see this go in. I've submitted a few > supporting patches for incidental problems caused by large pages, so I > think it's just down to this patch itself now. Can we please push this upstream with some priority? Thanks. Best regards, Wolfgang Denk -- DENX Software Engineering GmbH, MD: Wolfgang Denk & Detlev Zundel HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany Phone: (+49)-8142-66989-10 Fax: (+49)-8142-66989-80 Email: wd@denx.de In any group of employed individuals the only naturally early riser is _always_ the office manager, who will _always_ leave reproachful little notes ... on the desks of their subordinates. - Terry Pratchett, _Lords and Ladies_ ^ permalink raw reply [flat|nested] 35+ messages in thread
end of thread, other threads:[~2008-11-27 0:30 UTC | newest] Thread overview: 35+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2008-10-16 2:22 [RFC PATCH] Support for big page sizes on 44x (Updated) Ilya Yanok 2008-10-16 2:22 ` [PATCH 1/2] powerpc: add 16K/64K pages support for the 44x PPC32 architectures Ilya Yanok 2008-10-17 15:54 ` prodyut hazarika 2008-10-18 12:58 ` Josh Boyer 2008-10-18 20:36 ` prodyut hazarika 2008-10-22 14:28 ` Christian Ehrhardt 2008-10-22 17:54 ` Christian Ehrhardt 2008-10-31 23:23 ` Hollis Blanchard 2008-11-01 11:30 ` Josh Boyer 2008-11-01 21:55 ` Benjamin Herrenschmidt 2008-11-02 13:41 ` Josh Boyer 2008-11-02 21:33 ` Benjamin Herrenschmidt 2008-11-03 0:33 ` Josh Boyer 2008-11-03 0:43 ` Benjamin Herrenschmidt 2008-11-03 11:26 ` Josh Boyer 2008-11-03 20:17 ` Benjamin Herrenschmidt 2008-11-03 19:55 ` Hollis Blanchard 2008-11-03 20:00 ` Josh Boyer 2008-11-05 17:33 ` Hollis Blanchard 2008-11-06 1:48 ` David Gibson 2008-11-11 13:19 ` Josh Boyer 2008-11-11 15:00 ` Hollis Blanchard 2008-11-10 15:09 ` [1/2] " Milton Miller 2008-11-10 16:50 ` Ilya Yanok 2008-10-16 2:22 ` [PATCH 2/2] powerpc: support for 256K pages on PPC 44x Ilya Yanok 2008-11-10 15:09 ` [2/2] " Milton Miller 2008-11-10 16:24 ` Ilya Yanok 2008-11-11 14:59 ` Milton Miller 2008-11-14 4:32 ` Re[2]: " Yuri Tikhonov 2008-11-14 15:41 ` Milton Miller 2008-11-27 0:30 ` Re[4]: " Yuri Tikhonov 2008-11-11 2:17 ` [RFC PATCH] Support for big page sizes on 44x (Updated) Benjamin Herrenschmidt 2008-11-11 2:22 ` Benjamin Herrenschmidt 2008-11-24 20:32 ` Hollis Blanchard 2008-11-24 23:06 ` Wolfgang Denk
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).