* [patch 06/10] 40x decrementer fixes
From: Josh Boyer @ 2007-08-03 16:09 UTC (permalink / raw)
To: paulus; +Cc: linuxppc-dev
In-Reply-To: <20070803160900.069522000@linux.vnet.ibm.com>
Allow generic_calibrate_decr to work for 40x platforms. Given that the hardware
behavior is identical, this also changes the set_dec function to reload the PIT
on 40x to match the behavior 44x currently has.
Signed-off-by: Josh Boyer <jwboyer@linux.vnet.ibm.com>
---
arch/powerpc/kernel/time.c | 2 +-
include/asm-powerpc/time.h | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
--- linux-2.6.orig/arch/powerpc/kernel/time.c
+++ linux-2.6/arch/powerpc/kernel/time.c
@@ -867,7 +867,7 @@ void __init generic_calibrate_decr(void)
"(not found)\n");
}
-#ifdef CONFIG_BOOKE
+#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
/* Set the time base to zero */
mtspr(SPRN_TBWL, 0);
mtspr(SPRN_TBWU, 0);
--- linux-2.6.orig/include/asm-powerpc/time.h
+++ linux-2.6/include/asm-powerpc/time.h
@@ -174,7 +174,7 @@ static inline unsigned int get_dec(void)
static inline void set_dec(int val)
{
#if defined(CONFIG_40x)
- return; /* Have to let it auto-reload */
+ mtspr(SPRN_PIT, val);
#elif defined(CONFIG_8xx_CPU6)
set_dec_cpu6(val);
#else
--
^ permalink raw reply
* [patch 05/10] 40x MMU
From: Josh Boyer @ 2007-08-03 16:09 UTC (permalink / raw)
To: paulus; +Cc: linuxppc-dev
In-Reply-To: <20070803160900.069522000@linux.vnet.ibm.com>
Add MMU definitions for 40x platforms. Also fixes two warnings in 40x_mmu.c.
Signed-off-by: Josh Boyer <jwboyer@linux.vnet.ibm.com>
---
arch/powerpc/kernel/head_40x.S | 2 -
arch/powerpc/mm/40x_mmu.c | 4 +-
include/asm-powerpc/mmu-40x.h | 65 +++++++++++++++++++++++++++++++++++++++++
include/asm-powerpc/mmu.h | 3 +
4 files changed, 71 insertions(+), 3 deletions(-)
--- /dev/null
+++ linux-2.6/include/asm-powerpc/mmu-40x.h
@@ -0,0 +1,65 @@
+#ifndef _ASM_POWERPC_MMU_40X_H_
+#define _ASM_POWERPC_MMU_40X_H_
+
+/*
+ * PPC40x support
+ */
+
+#define PPC40X_TLB_SIZE 64
+
+/*
+ * TLB entries are defined by a "high" tag portion and a "low" data
+ * portion. On all architectures, the data portion is 32-bits.
+ *
+ * TLB entries are managed entirely under software control by reading,
+ * writing, and searchoing using the 4xx-specific tlbre, tlbwr, and tlbsx
+ * instructions.
+ */
+
+#define TLB_LO 1
+#define TLB_HI 0
+
+#define TLB_DATA TLB_LO
+#define TLB_TAG TLB_HI
+
+/* Tag portion */
+
+#define TLB_EPN_MASK 0xFFFFFC00 /* Effective Page Number */
+#define TLB_PAGESZ_MASK 0x00000380
+#define TLB_PAGESZ(x) (((x) & 0x7) << 7)
+#define PAGESZ_1K 0
+#define PAGESZ_4K 1
+#define PAGESZ_16K 2
+#define PAGESZ_64K 3
+#define PAGESZ_256K 4
+#define PAGESZ_1M 5
+#define PAGESZ_4M 6
+#define PAGESZ_16M 7
+#define TLB_VALID 0x00000040 /* Entry is valid */
+
+/* Data portion */
+
+#define TLB_RPN_MASK 0xFFFFFC00 /* Real Page Number */
+#define TLB_PERM_MASK 0x00000300
+#define TLB_EX 0x00000200 /* Instruction execution allowed */
+#define TLB_WR 0x00000100 /* Writes permitted */
+#define TLB_ZSEL_MASK 0x000000F0
+#define TLB_ZSEL(x) (((x) & 0xF) << 4)
+#define TLB_ATTR_MASK 0x0000000F
+#define TLB_W 0x00000008 /* Caching is write-through */
+#define TLB_I 0x00000004 /* Caching is inhibited */
+#define TLB_M 0x00000002 /* Memory is coherent */
+#define TLB_G 0x00000001 /* Memory is guarded from prefetch */
+
+#ifndef __ASSEMBLY__
+
+typedef unsigned long phys_addr_t;
+
+typedef struct {
+ unsigned long id;
+ unsigned long vdso_base;
+} mm_context_t;
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_MMU_40X_H_ */
--- linux-2.6.orig/include/asm-powerpc/mmu.h
+++ linux-2.6/include/asm-powerpc/mmu.h
@@ -8,6 +8,9 @@
#elif defined(CONFIG_PPC_STD_MMU)
/* 32-bit classic hash table MMU */
# include <asm/mmu-hash32.h>
+#elif defined(CONFIG_40x)
+/* 40x-style software loaded TLB */
+# include <asm/mmu-40x.h>
#elif defined(CONFIG_44x)
/* 44x-style software loaded TLB */
# include <asm/mmu-44x.h>
--- linux-2.6.orig/arch/powerpc/mm/40x_mmu.c
+++ linux-2.6/arch/powerpc/mm/40x_mmu.c
@@ -108,7 +108,7 @@ unsigned long __init mmu_mapin_ram(void)
pmd_t *pmdp;
unsigned long val = p | _PMD_SIZE_16M | _PAGE_HWEXEC | _PAGE_HWWRITE;
- pmdp = pmd_offset(pgd_offset_k(v), v);
+ pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v);
pmd_val(*pmdp++) = val;
pmd_val(*pmdp++) = val;
pmd_val(*pmdp++) = val;
@@ -123,7 +123,7 @@ unsigned long __init mmu_mapin_ram(void)
pmd_t *pmdp;
unsigned long val = p | _PMD_SIZE_4M | _PAGE_HWEXEC | _PAGE_HWWRITE;
- pmdp = pmd_offset(pgd_offset_k(v), v);
+ pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v);
pmd_val(*pmdp) = val;
v += LARGE_PAGE_SIZE_4M;
--- linux-2.6.orig/arch/powerpc/kernel/head_40x.S
+++ linux-2.6/arch/powerpc/kernel/head_40x.S
@@ -772,7 +772,7 @@ finish_tlb_load:
*/
lwz r9, tlb_4xx_index@l(0)
addi r9, r9, 1
- andi. r9, r9, (PPC4XX_TLB_SIZE-1)
+ andi. r9, r9, (PPC40X_TLB_SIZE-1)
stw r9, tlb_4xx_index@l(0)
6:
--
^ permalink raw reply
* [patch 04/10] 4xx bootwrapper reworks
From: Josh Boyer @ 2007-08-03 16:09 UTC (permalink / raw)
To: paulus; +Cc: linuxppc-dev
In-Reply-To: <20070803160900.069522000@linux.vnet.ibm.com>
Make the fixup_memsize function common for all of 4xx as several chips share
the same SDRAM controller. Also add functions to reset 40x chips and quiesce
the ethernet.
Signed-off-by: Josh Boyer <jwboyer@linux.vnet.ibm.com>
---
arch/powerpc/boot/44x.h | 4 ----
arch/powerpc/boot/4xx.c | 36 +++++++++++++++++++++++++++++++-----
arch/powerpc/boot/4xx.h | 20 ++++++++++++++++++++
arch/powerpc/boot/dcr.h | 2 ++
arch/powerpc/boot/ebony.c | 3 ++-
5 files changed, 55 insertions(+), 10 deletions(-)
--- linux-2.6.orig/arch/powerpc/boot/44x.h
+++ linux-2.6/arch/powerpc/boot/44x.h
@@ -10,10 +10,6 @@
#ifndef _PPC_BOOT_44X_H_
#define _PPC_BOOT_44X_H_
-void ibm44x_fixup_memsize(void);
-void ibm4xx_fixup_ebc_ranges(const char *ebc);
-
-void ibm44x_dbcr_reset(void);
void ebony_init(void *mac0, void *mac1);
#endif /* _PPC_BOOT_44X_H_ */
--- linux-2.6.orig/arch/powerpc/boot/4xx.c
+++ linux-2.6/arch/powerpc/boot/4xx.c
@@ -21,8 +21,8 @@
#include "reg.h"
#include "dcr.h"
-/* Read the 44x memory controller to get size of system memory. */
-void ibm44x_fixup_memsize(void)
+/* Read the 4xx SDRAM controller to get size of system memory. */
+void ibm4xx_fixup_memsize(void)
{
int i;
unsigned long memsize, bank_config;
@@ -39,22 +39,48 @@ void ibm44x_fixup_memsize(void)
dt_fixup_memory(0, memsize);
}
-#define SPRN_DBCR0 0x134
-#define DBCR0_RST_SYSTEM 0x30000000
+#define DBCR0_RST_SYSTEM 0x30000000
void ibm44x_dbcr_reset(void)
{
unsigned long tmp;
+ /* DBCR0 on 44x is SPRN 0x134 */
asm volatile (
"mfspr %0,%1\n"
"oris %0,%0,%2@h\n"
"mtspr %1,%0"
- : "=&r"(tmp) : "i"(SPRN_DBCR0), "i"(DBCR0_RST_SYSTEM)
+ : "=&r"(tmp) : "i"(0x134), "i"(DBCR0_RST_SYSTEM)
);
}
+void ibm40x_dbcr_reset(void)
+{
+ unsigned long tmp;
+
+ /* DBCR0 on 40x is SPRN 0x3F2 */
+ asm volatile (
+ "mfspr %0,%1\n"
+ "oris %0,%0,%2@h\n"
+ "mtspr %1,%0"
+ : "=&r"(tmp) : "i"(0x3F2), "i"(DBCR0_RST_SYSTEM)
+ );
+}
+
+#define EMAC_RESET 0x20000000
+#define MAL_RESET 0x80000000
+void ibm4xx_quiesce_eth(u32 *emac0, u32 *emac1)
+{
+ /* Quiesce the MAL and EMAC(s) since PIBS/OpenBIOS don't do this for us */
+ if (emac0)
+ *emac0 = EMAC_RESET;
+ if (emac1)
+ *emac1 = EMAC_RESET;
+
+ mtdcr(DCRN_MAL0_CFG, MAL_RESET);
+}
+
/* Read 4xx EBC bus bridge registers to get mappings of the peripheral
* banks into the OPB address space */
void ibm4xx_fixup_ebc_ranges(const char *ebc)
--- /dev/null
+++ linux-2.6/arch/powerpc/boot/4xx.h
@@ -0,0 +1,20 @@
+/*
+ * PowerPC 4xx related functions
+ *
+ * Copyright 2007 IBM Corporation.
+ * Josh Boyer <jwboyer@linux.vnet.ibm.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+#ifndef _POWERPC_BOOT_4XX_H_
+#define _POWERPC_BOOT_4XX_H_
+
+void ibm4xx_fixup_memsize(void);
+void ibm44x_dbcr_reset(void);
+void ibm40x_dbcr_reset(void);
+void ibm4xx_quiesce_eth(u32 *emac0, u32 *emac1);
+void ibm4xx_fixup_ebc_ranges(const char *ebc);
+
+#endif /* _POWERPC_BOOT_4XX_H_ */
--- linux-2.6.orig/arch/powerpc/boot/dcr.h
+++ linux-2.6/arch/powerpc/boot/dcr.h
@@ -121,4 +121,6 @@ static const unsigned long sdram_bxcr[]
#define DCRN_CPC0_MIRQ1 0x0ed
#define DCRN_CPC0_JTAGID 0x0ef
+#define DCRN_MAL0_CFG 0x180
+
#endif /* _PPC_BOOT_DCR_H_ */
--- linux-2.6.orig/arch/powerpc/boot/ebony.c
+++ linux-2.6/arch/powerpc/boot/ebony.c
@@ -25,6 +25,7 @@
#include "ops.h"
#include "reg.h"
#include "dcr.h"
+#include "4xx.h"
#include "44x.h"
extern char _dtb_start[];
@@ -98,7 +99,7 @@ static void ebony_fixups(void)
unsigned long sysclk = 33000000;
ibm440gp_fixup_clocks(sysclk, 6 * 1843200);
- ibm44x_fixup_memsize();
+ ibm4xx_fixup_memsize();
dt_fixup_mac_addresses(ebony_mac0, ebony_mac1);
ibm4xx_fixup_ebc_ranges("/plb/opb/ebc");
}
--
^ permalink raw reply
* [patch 03/10] Rename 44x bootwrapper
From: Josh Boyer @ 2007-08-03 16:09 UTC (permalink / raw)
To: paulus; +Cc: linuxppc-dev
In-Reply-To: <20070803160900.069522000@linux.vnet.ibm.com>
Rename the 44x.c wrapper file to 4xx.c. This will allow us to add common
functions in a single file that can be shared across all of 4xx.
Signed-off-by: Josh Boyer <jwboyer@linux.vnet.ibm.com>
---
arch/powerpc/boot/44x.c | 85 ---------------------------------------------
arch/powerpc/boot/4xx.c | 85 +++++++++++++++++++++++++++++++++++++++++++++
arch/powerpc/boot/Makefile | 4 +-
3 files changed, 87 insertions(+), 87 deletions(-)
--- linux-2.6.orig/arch/powerpc/boot/44x.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright 2007 David Gibson, IBM Corporation.
- *
- * Based on earlier code:
- * Matt Porter <mporter@kernel.crashing.org>
- * Copyright 2002-2005 MontaVista Software Inc.
- *
- * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
- * Copyright (c) 2003, 2004 Zultys Technologies
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <stddef.h>
-#include "types.h"
-#include "string.h"
-#include "stdio.h"
-#include "ops.h"
-#include "reg.h"
-#include "dcr.h"
-
-/* Read the 44x memory controller to get size of system memory. */
-void ibm44x_fixup_memsize(void)
-{
- int i;
- unsigned long memsize, bank_config;
-
- memsize = 0;
- for (i = 0; i < ARRAY_SIZE(sdram_bxcr); i++) {
- mtdcr(DCRN_SDRAM0_CFGADDR, sdram_bxcr[i]);
- bank_config = mfdcr(DCRN_SDRAM0_CFGDATA);
-
- if (bank_config & SDRAM_CONFIG_BANK_ENABLE)
- memsize += SDRAM_CONFIG_BANK_SIZE(bank_config);
- }
-
- dt_fixup_memory(0, memsize);
-}
-
-#define SPRN_DBCR0 0x134
-#define DBCR0_RST_SYSTEM 0x30000000
-
-void ibm44x_dbcr_reset(void)
-{
- unsigned long tmp;
-
- asm volatile (
- "mfspr %0,%1\n"
- "oris %0,%0,%2@h\n"
- "mtspr %1,%0"
- : "=&r"(tmp) : "i"(SPRN_DBCR0), "i"(DBCR0_RST_SYSTEM)
- );
-
-}
-
-/* Read 4xx EBC bus bridge registers to get mappings of the peripheral
- * banks into the OPB address space */
-void ibm4xx_fixup_ebc_ranges(const char *ebc)
-{
- void *devp;
- u32 bxcr;
- u32 ranges[EBC_NUM_BANKS*4];
- u32 *p = ranges;
- int i;
-
- for (i = 0; i < EBC_NUM_BANKS; i++) {
- mtdcr(DCRN_EBC0_CFGADDR, EBC_BXCR(i));
- bxcr = mfdcr(DCRN_EBC0_CFGDATA);
-
- if ((bxcr & EBC_BXCR_BU) != EBC_BXCR_BU_OFF) {
- *p++ = i;
- *p++ = 0;
- *p++ = bxcr & EBC_BXCR_BAS;
- *p++ = EBC_BXCR_BANK_SIZE(bxcr);
- }
- }
-
- devp = finddevice(ebc);
- if (! devp)
- fatal("Couldn't locate EBC node %s\n\r", ebc);
-
- setprop(devp, "ranges", ranges, (p - ranges) * sizeof(u32));
-}
--- /dev/null
+++ linux-2.6/arch/powerpc/boot/4xx.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2007 David Gibson, IBM Corporation.
+ *
+ * Based on earlier code:
+ * Matt Porter <mporter@kernel.crashing.org>
+ * Copyright 2002-2005 MontaVista Software Inc.
+ *
+ * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
+ * Copyright (c) 2003, 2004 Zultys Technologies
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <stddef.h>
+#include "types.h"
+#include "string.h"
+#include "stdio.h"
+#include "ops.h"
+#include "reg.h"
+#include "dcr.h"
+
+/* Read the 44x memory controller to get size of system memory. */
+void ibm44x_fixup_memsize(void)
+{
+ int i;
+ unsigned long memsize, bank_config;
+
+ memsize = 0;
+ for (i = 0; i < ARRAY_SIZE(sdram_bxcr); i++) {
+ mtdcr(DCRN_SDRAM0_CFGADDR, sdram_bxcr[i]);
+ bank_config = mfdcr(DCRN_SDRAM0_CFGDATA);
+
+ if (bank_config & SDRAM_CONFIG_BANK_ENABLE)
+ memsize += SDRAM_CONFIG_BANK_SIZE(bank_config);
+ }
+
+ dt_fixup_memory(0, memsize);
+}
+
+#define SPRN_DBCR0 0x134
+#define DBCR0_RST_SYSTEM 0x30000000
+
+void ibm44x_dbcr_reset(void)
+{
+ unsigned long tmp;
+
+ asm volatile (
+ "mfspr %0,%1\n"
+ "oris %0,%0,%2@h\n"
+ "mtspr %1,%0"
+ : "=&r"(tmp) : "i"(SPRN_DBCR0), "i"(DBCR0_RST_SYSTEM)
+ );
+
+}
+
+/* Read 4xx EBC bus bridge registers to get mappings of the peripheral
+ * banks into the OPB address space */
+void ibm4xx_fixup_ebc_ranges(const char *ebc)
+{
+ void *devp;
+ u32 bxcr;
+ u32 ranges[EBC_NUM_BANKS*4];
+ u32 *p = ranges;
+ int i;
+
+ for (i = 0; i < EBC_NUM_BANKS; i++) {
+ mtdcr(DCRN_EBC0_CFGADDR, EBC_BXCR(i));
+ bxcr = mfdcr(DCRN_EBC0_CFGDATA);
+
+ if ((bxcr & EBC_BXCR_BU) != EBC_BXCR_BU_OFF) {
+ *p++ = i;
+ *p++ = 0;
+ *p++ = bxcr & EBC_BXCR_BAS;
+ *p++ = EBC_BXCR_BANK_SIZE(bxcr);
+ }
+ }
+
+ devp = finddevice(ebc);
+ if (! devp)
+ fatal("Couldn't locate EBC node %s\n\r", ebc);
+
+ setprop(devp, "ranges", ranges, (p - ranges) * sizeof(u32));
+}
--- linux-2.6.orig/arch/powerpc/boot/Makefile
+++ linux-2.6/arch/powerpc/boot/Makefile
@@ -31,7 +31,7 @@ endif
BOOTCFLAGS += -I$(obj) -I$(srctree)/$(obj)
-$(obj)/44x.o: BOOTCFLAGS += -mcpu=440
+$(obj)/4xx.o: BOOTCFLAGS += -mcpu=440
$(obj)/ebony.o: BOOTCFLAGS += -mcpu=440
zlib := inffast.c inflate.c inftrees.c
@@ -44,7 +44,7 @@ $(addprefix $(obj)/,$(zlib) gunzip_util.
src-wlib := string.S crt0.S stdio.c main.c flatdevtree.c flatdevtree_misc.c \
ns16550.c serial.c simple_alloc.c div64.S util.S \
gunzip_util.c elf_util.c $(zlib) devtree.c oflib.c ofconsole.c \
- 44x.c ebony.c mv64x60.c mpsc.c mv64x60_i2c.c cuboot.c
+ 4xx.c ebony.c mv64x60.c mpsc.c mv64x60_i2c.c cuboot.c
src-plat := of.c cuboot-83xx.c cuboot-85xx.c holly.c \
cuboot-ebony.c treeboot-ebony.c prpmc2800.c \
ps3-head.S ps3-hvcall.S ps3.c
--
^ permalink raw reply
* [patch 02/10] 4xx Kconfig cleanup
From: Josh Boyer @ 2007-08-03 16:09 UTC (permalink / raw)
To: paulus; +Cc: linuxppc-dev
In-Reply-To: <20070803160900.069522000@linux.vnet.ibm.com>
Remove some leftover cruft in the 40x Kconfig file. Also make sure we
select WANT_DEVICE_TREE for 40x.
Signed-off-by: Josh Boyer <jwboyer@linux.vnet.ibm.com>
---
arch/powerpc/platforms/40x/Kconfig | 77 ---------------------------------
arch/powerpc/platforms/Kconfig.cputype | 1
2 files changed, 1 insertion(+), 77 deletions(-)
--- linux-2.6.orig/arch/powerpc/platforms/40x/Kconfig
+++ linux-2.6/arch/powerpc/platforms/40x/Kconfig
@@ -1,16 +1,3 @@
-config 4xx
- bool
- depends on 40x || 44x
- default y
-
-config BOOKE
- bool
- depends on 44x
- default y
-
-menu "AMCC 40x options"
- depends on 40x
-
#config BUBINGA
# bool "Bubinga"
# depends on 40x
@@ -82,8 +69,6 @@ menu "AMCC 40x options"
# help
# This option enables support for the Xilinx ML300 evaluation board.
-endmenu
-
# 40x specific CPU modules, selected based on the board above.
config NP405H
bool
@@ -126,68 +111,6 @@ config IBM405_ERR77
config IBM405_ERR51
bool
-menu "AMCC 44x options"
- depends on 44x
-
-#config BAMBOO
-# bool "Bamboo"
-# depends on 44x
-# default n
-# select 440EP
-# help
-# This option enables support for the IBM PPC440EP evaluation board.
-
-config EBONY
- bool "Ebony"
- depends on 44x
- default y
- select 440GP
- help
- This option enables support for the IBM PPC440GP evaluation board.
-
-#config LUAN
-# bool "Luan"
-# depends on 44x
-# default n
-# select 440SP
-# help
-# This option enables support for the IBM PPC440SP evaluation board.
-
-#config OCOTEA
-# bool "Ocotea"
-# depends on 44x
-# default n
-# select 440GX
-# help
-# This option enables support for the IBM PPC440GX evaluation board.
-
-endmenu
-
-# 44x specific CPU modules, selected based on the board above.
-config 440EP
- bool
- select PPC_FPU
- select IBM440EP_ERR42
-
-config 440GP
- bool
- select IBM_NEW_EMAC_ZMII
-
-config 440GX
- bool
-
-config 440SP
- bool
-
-config 440A
- bool
- depends on 440GX
- default y
-
-# 44x errata/workaround config symbols, selected by the CPU models above
-config IBM440EP_ERR42
- bool
-
#config XILINX_OCP
# bool
# depends on XILINX_ML300
--- linux-2.6.orig/arch/powerpc/platforms/Kconfig.cputype
+++ linux-2.6/arch/powerpc/platforms/Kconfig.cputype
@@ -40,6 +40,7 @@ config PPC_8xx
config 40x
bool "AMCC 40x"
select PPC_DCR_NATIVE
+ select WANT_DEVICE_TREE
config 44x
bool "AMCC 44x"
--
^ permalink raw reply
* [patch 01/10] Rename 4xx paths to 40x
From: Josh Boyer @ 2007-08-03 16:09 UTC (permalink / raw)
To: paulus; +Cc: linuxppc-dev, David Gibson
In-Reply-To: <20070803160900.069522000@linux.vnet.ibm.com>
4xx is a bit of a misnomer for certain things, as they really apply to PowerPC
40x only. Rename some of the files to clean this up.
Signed-off-by: Josh Boyer <jwboyer@linux.vnet.ibm.com>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
---
arch/powerpc/Makefile | 2
arch/powerpc/kernel/Makefile | 2
arch/powerpc/kernel/head_40x.S | 1021 ++++++++++++++++++++++++++++++++++++
arch/powerpc/kernel/head_4xx.S | 1021 ------------------------------------
arch/powerpc/mm/40x_mmu.c | 135 ++++
arch/powerpc/mm/4xx_mmu.c | 135 ----
arch/powerpc/mm/Makefile | 2
arch/powerpc/platforms/40x/Kconfig | 208 +++++++
arch/powerpc/platforms/40x/Makefile | 1
arch/powerpc/platforms/4xx/Kconfig | 208 -------
arch/powerpc/platforms/4xx/Makefile | 1
11 files changed, 1368 insertions(+), 1368 deletions(-)
--- linux-2.6.orig/arch/powerpc/Makefile
+++ linux-2.6/arch/powerpc/Makefile
@@ -123,7 +123,7 @@ CFLAGS += $(cpu-as-y)
head-y := arch/powerpc/kernel/head_32.o
head-$(CONFIG_PPC64) := arch/powerpc/kernel/head_64.o
head-$(CONFIG_8xx) := arch/powerpc/kernel/head_8xx.o
-head-$(CONFIG_4xx) := arch/powerpc/kernel/head_4xx.o
+head-$(CONFIG_40x) := arch/powerpc/kernel/head_40x.o
head-$(CONFIG_44x) := arch/powerpc/kernel/head_44x.o
head-$(CONFIG_FSL_BOOKE) := arch/powerpc/kernel/head_fsl_booke.o
--- linux-2.6.orig/arch/powerpc/kernel/Makefile
+++ linux-2.6/arch/powerpc/kernel/Makefile
@@ -46,7 +46,7 @@ ifeq ($(CONFIG_PPC_MERGE),y)
extra-$(CONFIG_PPC_STD_MMU) := head_32.o
extra-$(CONFIG_PPC64) := head_64.o
-extra-$(CONFIG_40x) := head_4xx.o
+extra-$(CONFIG_40x) := head_40x.o
extra-$(CONFIG_44x) := head_44x.o
extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o
extra-$(CONFIG_8xx) := head_8xx.o
--- /dev/null
+++ linux-2.6/arch/powerpc/kernel/head_40x.S
@@ -0,0 +1,1021 @@
+/*
+ * Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org>
+ * Initial PowerPC version.
+ * Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu>
+ * Rewritten for PReP
+ * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
+ * Low-level exception handers, MMU support, and rewrite.
+ * Copyright (c) 1997 Dan Malek <dmalek@jlc.net>
+ * PowerPC 8xx modifications.
+ * Copyright (c) 1998-1999 TiVo, Inc.
+ * PowerPC 403GCX modifications.
+ * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
+ * PowerPC 403GCX/405GP modifications.
+ * Copyright 2000 MontaVista Software Inc.
+ * PPC405 modifications
+ * PowerPC 403GCX/405GP modifications.
+ * Author: MontaVista Software, Inc.
+ * frank_rowand@mvista.com or source@mvista.com
+ * debbie_chu@mvista.com
+ *
+ *
+ * Module name: head_4xx.S
+ *
+ * Description:
+ * Kernel execution entry point code.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
+#include <asm/ibm4xx.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+/* As with the other PowerPC ports, it is expected that when code
+ * execution begins here, the following registers contain valid, yet
+ * optional, information:
+ *
+ * r3 - Board info structure pointer (DRAM, frequency, MAC address, etc.)
+ * r4 - Starting address of the init RAM disk
+ * r5 - Ending address of the init RAM disk
+ * r6 - Start of kernel command line string (e.g. "mem=96m")
+ * r7 - End of kernel command line string
+ *
+ * This is all going to change RSN when we add bi_recs....... -- Dan
+ */
+ .text
+_GLOBAL(_stext)
+_GLOBAL(_start)
+
+ /* Save parameters we are passed.
+ */
+ mr r31,r3
+ mr r30,r4
+ mr r29,r5
+ mr r28,r6
+ mr r27,r7
+
+ /* We have to turn on the MMU right away so we get cache modes
+ * set correctly.
+ */
+ bl initial_mmu
+
+/* We now have the lower 16 Meg mapped into TLB entries, and the caches
+ * ready to work.
+ */
+turn_on_mmu:
+ lis r0,MSR_KERNEL@h
+ ori r0,r0,MSR_KERNEL@l
+ mtspr SPRN_SRR1,r0
+ lis r0,start_here@h
+ ori r0,r0,start_here@l
+ mtspr SPRN_SRR0,r0
+ SYNC
+ rfi /* enables MMU */
+ b . /* prevent prefetch past rfi */
+
+/*
+ * This area is used for temporarily saving registers during the
+ * critical exception prolog.
+ */
+ . = 0xc0
+crit_save:
+_GLOBAL(crit_r10)
+ .space 4
+_GLOBAL(crit_r11)
+ .space 4
+
+/*
+ * Exception vector entry code. This code runs with address translation
+ * turned off (i.e. using physical addresses). We assume SPRG3 has the
+ * physical address of the current task thread_struct.
+ * Note that we have to have decremented r1 before we write to any fields
+ * of the exception frame, since a critical interrupt could occur at any
+ * time, and it will write to the area immediately below the current r1.
+ */
+#define NORMAL_EXCEPTION_PROLOG \
+ mtspr SPRN_SPRG0,r10; /* save two registers to work with */\
+ mtspr SPRN_SPRG1,r11; \
+ mtspr SPRN_SPRG2,r1; \
+ mfcr r10; /* save CR in r10 for now */\
+ mfspr r11,SPRN_SRR1; /* check whether user or kernel */\
+ andi. r11,r11,MSR_PR; \
+ beq 1f; \
+ mfspr r1,SPRN_SPRG3; /* if from user, start at top of */\
+ lwz r1,THREAD_INFO-THREAD(r1); /* this thread's kernel stack */\
+ addi r1,r1,THREAD_SIZE; \
+1: subi r1,r1,INT_FRAME_SIZE; /* Allocate an exception frame */\
+ tophys(r11,r1); \
+ stw r10,_CCR(r11); /* save various registers */\
+ stw r12,GPR12(r11); \
+ stw r9,GPR9(r11); \
+ mfspr r10,SPRN_SPRG0; \
+ stw r10,GPR10(r11); \
+ mfspr r12,SPRN_SPRG1; \
+ stw r12,GPR11(r11); \
+ mflr r10; \
+ stw r10,_LINK(r11); \
+ mfspr r10,SPRN_SPRG2; \
+ mfspr r12,SPRN_SRR0; \
+ stw r10,GPR1(r11); \
+ mfspr r9,SPRN_SRR1; \
+ stw r10,0(r11); \
+ rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\
+ stw r0,GPR0(r11); \
+ SAVE_4GPRS(3, r11); \
+ SAVE_2GPRS(7, r11)
+
+/*
+ * Exception prolog for critical exceptions. This is a little different
+ * from the normal exception prolog above since a critical exception
+ * can potentially occur at any point during normal exception processing.
+ * Thus we cannot use the same SPRG registers as the normal prolog above.
+ * Instead we use a couple of words of memory at low physical addresses.
+ * This is OK since we don't support SMP on these processors.
+ */
+#define CRITICAL_EXCEPTION_PROLOG \
+ stw r10,crit_r10@l(0); /* save two registers to work with */\
+ stw r11,crit_r11@l(0); \
+ mfcr r10; /* save CR in r10 for now */\
+ mfspr r11,SPRN_SRR3; /* check whether user or kernel */\
+ andi. r11,r11,MSR_PR; \
+ lis r11,critical_stack_top@h; \
+ ori r11,r11,critical_stack_top@l; \
+ beq 1f; \
+ /* COMING FROM USER MODE */ \
+ mfspr r11,SPRN_SPRG3; /* if from user, start at top of */\
+ lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
+ addi r11,r11,THREAD_SIZE; \
+1: subi r11,r11,INT_FRAME_SIZE; /* Allocate an exception frame */\
+ tophys(r11,r11); \
+ stw r10,_CCR(r11); /* save various registers */\
+ stw r12,GPR12(r11); \
+ stw r9,GPR9(r11); \
+ mflr r10; \
+ stw r10,_LINK(r11); \
+ mfspr r12,SPRN_DEAR; /* save DEAR and ESR in the frame */\
+ stw r12,_DEAR(r11); /* since they may have had stuff */\
+ mfspr r9,SPRN_ESR; /* in them at the point where the */\
+ stw r9,_ESR(r11); /* exception was taken */\
+ mfspr r12,SPRN_SRR2; \
+ stw r1,GPR1(r11); \
+ mfspr r9,SPRN_SRR3; \
+ stw r1,0(r11); \
+ tovirt(r1,r11); \
+ rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\
+ stw r0,GPR0(r11); \
+ SAVE_4GPRS(3, r11); \
+ SAVE_2GPRS(7, r11)
+
+ /*
+ * State at this point:
+ * r9 saved in stack frame, now saved SRR3 & ~MSR_WE
+ * r10 saved in crit_r10 and in stack frame, trashed
+ * r11 saved in crit_r11 and in stack frame,
+ * now phys stack/exception frame pointer
+ * r12 saved in stack frame, now saved SRR2
+ * CR saved in stack frame, CR0.EQ = !SRR3.PR
+ * LR, DEAR, ESR in stack frame
+ * r1 saved in stack frame, now virt stack/excframe pointer
+ * r0, r3-r8 saved in stack frame
+ */
+
+/*
+ * Exception vectors.
+ */
+#define START_EXCEPTION(n, label) \
+ . = n; \
+label:
+
+#define EXCEPTION(n, label, hdlr, xfer) \
+ START_EXCEPTION(n, label); \
+ NORMAL_EXCEPTION_PROLOG; \
+ addi r3,r1,STACK_FRAME_OVERHEAD; \
+ xfer(n, hdlr)
+
+#define CRITICAL_EXCEPTION(n, label, hdlr) \
+ START_EXCEPTION(n, label); \
+ CRITICAL_EXCEPTION_PROLOG; \
+ addi r3,r1,STACK_FRAME_OVERHEAD; \
+ EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
+ NOCOPY, crit_transfer_to_handler, \
+ ret_from_crit_exc)
+
+#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \
+ li r10,trap; \
+ stw r10,_TRAP(r11); \
+ lis r10,msr@h; \
+ ori r10,r10,msr@l; \
+ copyee(r10, r9); \
+ bl tfer; \
+ .long hdlr; \
+ .long ret
+
+#define COPY_EE(d, s) rlwimi d,s,0,16,16
+#define NOCOPY(d, s)
+
+#define EXC_XFER_STD(n, hdlr) \
+ EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \
+ ret_from_except_full)
+
+#define EXC_XFER_LITE(n, hdlr) \
+ EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \
+ ret_from_except)
+
+#define EXC_XFER_EE(n, hdlr) \
+ EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \
+ ret_from_except_full)
+
+#define EXC_XFER_EE_LITE(n, hdlr) \
+ EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \
+ ret_from_except)
+
+
+/*
+ * 0x0100 - Critical Interrupt Exception
+ */
+ CRITICAL_EXCEPTION(0x0100, CriticalInterrupt, unknown_exception)
+
+/*
+ * 0x0200 - Machine Check Exception
+ */
+ CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
+
+/*
+ * 0x0300 - Data Storage Exception
+ * This happens for just a few reasons. U0 set (but we don't do that),
+ * or zone protection fault (user violation, write to protected page).
+ * If this is just an update of modified status, we do that quickly
+ * and exit. Otherwise, we call heavywight functions to do the work.
+ */
+ START_EXCEPTION(0x0300, DataStorage)
+ mtspr SPRN_SPRG0, r10 /* Save some working registers */
+ mtspr SPRN_SPRG1, r11
+#ifdef CONFIG_403GCX
+ stw r12, 0(r0)
+ stw r9, 4(r0)
+ mfcr r11
+ mfspr r12, SPRN_PID
+ stw r11, 8(r0)
+ stw r12, 12(r0)
+#else
+ mtspr SPRN_SPRG4, r12
+ mtspr SPRN_SPRG5, r9
+ mfcr r11
+ mfspr r12, SPRN_PID
+ mtspr SPRN_SPRG7, r11
+ mtspr SPRN_SPRG6, r12
+#endif
+
+ /* First, check if it was a zone fault (which means a user
+ * tried to access a kernel or read-protected page - always
+ * a SEGV). All other faults here must be stores, so no
+ * need to check ESR_DST as well. */
+ mfspr r10, SPRN_ESR
+ andis. r10, r10, ESR_DIZ@h
+ bne 2f
+
+ mfspr r10, SPRN_DEAR /* Get faulting address */
+
+ /* If we are faulting a kernel address, we have to use the
+ * kernel page tables.
+ */
+ lis r11, TASK_SIZE@h
+ cmplw r10, r11
+ blt+ 3f
+ lis r11, swapper_pg_dir@h
+ ori r11, r11, swapper_pg_dir@l
+ li r9, 0
+ mtspr SPRN_PID, r9 /* TLB will have 0 TID */
+ b 4f
+
+ /* Get the PGD for the current thread.
+ */
+3:
+ mfspr r11,SPRN_SPRG3
+ lwz r11,PGDIR(r11)
+4:
+ tophys(r11, r11)
+ rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */
+ lwz r11, 0(r11) /* Get L1 entry */
+ rlwinm. r12, r11, 0, 0, 19 /* Extract L2 (pte) base address */
+ beq 2f /* Bail if no table */
+
+ rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */
+ lwz r11, 0(r12) /* Get Linux PTE */
+
+ andi. r9, r11, _PAGE_RW /* Is it writeable? */
+ beq 2f /* Bail if not */
+
+ /* Update 'changed'.
+ */
+ ori r11, r11, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE
+ stw r11, 0(r12) /* Update Linux page table */
+
+ /* Most of the Linux PTE is ready to load into the TLB LO.
+ * We set ZSEL, where only the LS-bit determines user access.
+ * We set execute, because we don't have the granularity to
+ * properly set this at the page level (Linux problem).
+ * If shared is set, we cause a zero PID->TID load.
+ * Many of these bits are software only. Bits we don't set
+ * here we (properly should) assume have the appropriate value.
+ */
+ li r12, 0x0ce2
+ andc r11, r11, r12 /* Make sure 20, 21 are zero */
+
+ /* find the TLB index that caused the fault. It has to be here.
+ */
+ tlbsx r9, 0, r10
+
+ tlbwe r11, r9, TLB_DATA /* Load TLB LO */
+
+ /* Done...restore registers and get out of here.
+ */
+#ifdef CONFIG_403GCX
+ lwz r12, 12(r0)
+ lwz r11, 8(r0)
+ mtspr SPRN_PID, r12
+ mtcr r11
+ lwz r9, 4(r0)
+ lwz r12, 0(r0)
+#else
+ mfspr r12, SPRN_SPRG6
+ mfspr r11, SPRN_SPRG7
+ mtspr SPRN_PID, r12
+ mtcr r11
+ mfspr r9, SPRN_SPRG5
+ mfspr r12, SPRN_SPRG4
+#endif
+ mfspr r11, SPRN_SPRG1
+ mfspr r10, SPRN_SPRG0
+ PPC405_ERR77_SYNC
+ rfi /* Should sync shadow TLBs */
+ b . /* prevent prefetch past rfi */
+
+2:
+ /* The bailout. Restore registers to pre-exception conditions
+ * and call the heavyweights to help us out.
+ */
+#ifdef CONFIG_403GCX
+ lwz r12, 12(r0)
+ lwz r11, 8(r0)
+ mtspr SPRN_PID, r12
+ mtcr r11
+ lwz r9, 4(r0)
+ lwz r12, 0(r0)
+#else
+ mfspr r12, SPRN_SPRG6
+ mfspr r11, SPRN_SPRG7
+ mtspr SPRN_PID, r12
+ mtcr r11
+ mfspr r9, SPRN_SPRG5
+ mfspr r12, SPRN_SPRG4
+#endif
+ mfspr r11, SPRN_SPRG1
+ mfspr r10, SPRN_SPRG0
+ b DataAccess
+
+/*
+ * 0x0400 - Instruction Storage Exception
+ * This is caused by a fetch from non-execute or guarded pages.
+ */
+ START_EXCEPTION(0x0400, InstructionAccess)
+ NORMAL_EXCEPTION_PROLOG
+ mr r4,r12 /* Pass SRR0 as arg2 */
+ li r5,0 /* Pass zero as arg3 */
+ EXC_XFER_EE_LITE(0x400, handle_page_fault)
+
+/* 0x0500 - External Interrupt Exception */
+ EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
+
+/* 0x0600 - Alignment Exception */
+ START_EXCEPTION(0x0600, Alignment)
+ NORMAL_EXCEPTION_PROLOG
+ mfspr r4,SPRN_DEAR /* Grab the DEAR and save it */
+ stw r4,_DEAR(r11)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ EXC_XFER_EE(0x600, alignment_exception)
+
+/* 0x0700 - Program Exception */
+ START_EXCEPTION(0x0700, ProgramCheck)
+ NORMAL_EXCEPTION_PROLOG
+ mfspr r4,SPRN_ESR /* Grab the ESR and save it */
+ stw r4,_ESR(r11)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ EXC_XFER_STD(0x700, program_check_exception)
+
+ EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_EE)
+
+/* 0x0C00 - System Call Exception */
+ START_EXCEPTION(0x0C00, SystemCall)
+ NORMAL_EXCEPTION_PROLOG
+ EXC_XFER_EE_LITE(0xc00, DoSyscall)
+
+ EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_EE)
+
+/* 0x1000 - Programmable Interval Timer (PIT) Exception */
+ START_EXCEPTION(0x1000, Decrementer)
+ NORMAL_EXCEPTION_PROLOG
+ lis r0,TSR_PIS@h
+ mtspr SPRN_TSR,r0 /* Clear the PIT exception */
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ EXC_XFER_LITE(0x1000, timer_interrupt)
+
+#if 0
+/* NOTE:
+ * FIT and WDT handlers are not implemented yet.
+ */
+
+/* 0x1010 - Fixed Interval Timer (FIT) Exception
+*/
+ STND_EXCEPTION(0x1010, FITException, unknown_exception)
+
+/* 0x1020 - Watchdog Timer (WDT) Exception
+*/
+#ifdef CONFIG_BOOKE_WDT
+ CRITICAL_EXCEPTION(0x1020, WDTException, WatchdogException)
+#else
+ CRITICAL_EXCEPTION(0x1020, WDTException, unknown_exception)
+#endif
+#endif
+
+/* 0x1100 - Data TLB Miss Exception
+ * As the name implies, translation is not in the MMU, so search the
+ * page tables and fix it. The only purpose of this function is to
+ * load TLB entries from the page table if they exist.
+ */
+ START_EXCEPTION(0x1100, DTLBMiss)
+ mtspr SPRN_SPRG0, r10 /* Save some working registers */
+ mtspr SPRN_SPRG1, r11
+#ifdef CONFIG_403GCX
+ stw r12, 0(r0)
+ stw r9, 4(r0)
+ mfcr r11
+ mfspr r12, SPRN_PID
+ stw r11, 8(r0)
+ stw r12, 12(r0)
+#else
+ mtspr SPRN_SPRG4, r12
+ mtspr SPRN_SPRG5, r9
+ mfcr r11
+ mfspr r12, SPRN_PID
+ mtspr SPRN_SPRG7, r11
+ mtspr SPRN_SPRG6, r12
+#endif
+ mfspr r10, SPRN_DEAR /* Get faulting address */
+
+ /* If we are faulting a kernel address, we have to use the
+ * kernel page tables.
+ */
+ lis r11, TASK_SIZE@h
+ cmplw r10, r11
+ blt+ 3f
+ lis r11, swapper_pg_dir@h
+ ori r11, r11, swapper_pg_dir@l
+ li r9, 0
+ mtspr SPRN_PID, r9 /* TLB will have 0 TID */
+ b 4f
+
+ /* Get the PGD for the current thread.
+ */
+3:
+ mfspr r11,SPRN_SPRG3
+ lwz r11,PGDIR(r11)
+4:
+ tophys(r11, r11)
+ rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */
+ lwz r12, 0(r11) /* Get L1 entry */
+ andi. r9, r12, _PMD_PRESENT /* Check if it points to a PTE page */
+ beq 2f /* Bail if no table */
+
+ rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */
+ lwz r11, 0(r12) /* Get Linux PTE */
+ andi. r9, r11, _PAGE_PRESENT
+ beq 5f
+
+ ori r11, r11, _PAGE_ACCESSED
+ stw r11, 0(r12)
+
+ /* Create TLB tag. This is the faulting address plus a static
+ * set of bits. These are size, valid, E, U0.
+ */
+ li r12, 0x00c0
+ rlwimi r10, r12, 0, 20, 31
+
+ b finish_tlb_load
+
+2: /* Check for possible large-page pmd entry */
+ rlwinm. r9, r12, 2, 22, 24
+ beq 5f
+
+ /* Create TLB tag. This is the faulting address, plus a static
+ * set of bits (valid, E, U0) plus the size from the PMD.
+ */
+ ori r9, r9, 0x40
+ rlwimi r10, r9, 0, 20, 31
+ mr r11, r12
+
+ b finish_tlb_load
+
+5:
+ /* The bailout. Restore registers to pre-exception conditions
+ * and call the heavyweights to help us out.
+ */
+#ifdef CONFIG_403GCX
+ lwz r12, 12(r0)
+ lwz r11, 8(r0)
+ mtspr SPRN_PID, r12
+ mtcr r11
+ lwz r9, 4(r0)
+ lwz r12, 0(r0)
+#else
+ mfspr r12, SPRN_SPRG6
+ mfspr r11, SPRN_SPRG7
+ mtspr SPRN_PID, r12
+ mtcr r11
+ mfspr r9, SPRN_SPRG5
+ mfspr r12, SPRN_SPRG4
+#endif
+ mfspr r11, SPRN_SPRG1
+ mfspr r10, SPRN_SPRG0
+ b DataAccess
+
+/* 0x1200 - Instruction TLB Miss Exception
+ * Nearly the same as above, except we get our information from different
+ * registers and bailout to a different point.
+ */
+ START_EXCEPTION(0x1200, ITLBMiss)
+ mtspr SPRN_SPRG0, r10 /* Save some working registers */
+ mtspr SPRN_SPRG1, r11
+#ifdef CONFIG_403GCX
+ stw r12, 0(r0)
+ stw r9, 4(r0)
+ mfcr r11
+ mfspr r12, SPRN_PID
+ stw r11, 8(r0)
+ stw r12, 12(r0)
+#else
+ mtspr SPRN_SPRG4, r12
+ mtspr SPRN_SPRG5, r9
+ mfcr r11
+ mfspr r12, SPRN_PID
+ mtspr SPRN_SPRG7, r11
+ mtspr SPRN_SPRG6, r12
+#endif
+ mfspr r10, SPRN_SRR0 /* Get faulting address */
+
+ /* If we are faulting a kernel address, we have to use the
+ * kernel page tables.
+ */
+ lis r11, TASK_SIZE@h
+ cmplw r10, r11
+ blt+ 3f
+ lis r11, swapper_pg_dir@h
+ ori r11, r11, swapper_pg_dir@l
+ li r9, 0
+ mtspr SPRN_PID, r9 /* TLB will have 0 TID */
+ b 4f
+
+ /* Get the PGD for the current thread.
+ */
+3:
+ mfspr r11,SPRN_SPRG3
+ lwz r11,PGDIR(r11)
+4:
+ tophys(r11, r11)
+ rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */
+ lwz r12, 0(r11) /* Get L1 entry */
+ andi. r9, r12, _PMD_PRESENT /* Check if it points to a PTE page */
+ beq 2f /* Bail if no table */
+
+ rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */
+ lwz r11, 0(r12) /* Get Linux PTE */
+ andi. r9, r11, _PAGE_PRESENT
+ beq 5f
+
+ ori r11, r11, _PAGE_ACCESSED
+ stw r11, 0(r12)
+
+ /* Create TLB tag. This is the faulting address plus a static
+ * set of bits. These are size, valid, E, U0.
+ */
+ li r12, 0x00c0
+ rlwimi r10, r12, 0, 20, 31
+
+ b finish_tlb_load
+
+2: /* Check for possible large-page pmd entry */
+ rlwinm. r9, r12, 2, 22, 24
+ beq 5f
+
+ /* Create TLB tag. This is the faulting address, plus a static
+ * set of bits (valid, E, U0) plus the size from the PMD.
+ */
+ ori r9, r9, 0x40
+ rlwimi r10, r9, 0, 20, 31
+ mr r11, r12
+
+ b finish_tlb_load
+
+5:
+ /* The bailout. Restore registers to pre-exception conditions
+ * and call the heavyweights to help us out.
+ */
+#ifdef CONFIG_403GCX
+ lwz r12, 12(r0)
+ lwz r11, 8(r0)
+ mtspr SPRN_PID, r12
+ mtcr r11
+ lwz r9, 4(r0)
+ lwz r12, 0(r0)
+#else
+ mfspr r12, SPRN_SPRG6
+ mfspr r11, SPRN_SPRG7
+ mtspr SPRN_PID, r12
+ mtcr r11
+ mfspr r9, SPRN_SPRG5
+ mfspr r12, SPRN_SPRG4
+#endif
+ mfspr r11, SPRN_SPRG1
+ mfspr r10, SPRN_SPRG0
+ b InstructionAccess
+
+ EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE)
+#ifdef CONFIG_IBM405_ERR51
+ /* 405GP errata 51 */
+ START_EXCEPTION(0x1700, Trap_17)
+ b DTLBMiss
+#else
+ EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE)
+#endif
+ EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_EE)
+
+/* Check for a single step debug exception while in an exception
+ * handler before state has been saved. This is to catch the case
+ * where an instruction that we are trying to single step causes
+ * an exception (eg ITLB/DTLB miss) and thus the first instruction of
+ * the exception handler generates a single step debug exception.
+ *
+ * If we get a debug trap on the first instruction of an exception handler,
+ * we reset the MSR_DE in the _exception handler's_ MSR (the debug trap is
+ * a critical exception, so we are using SPRN_CSRR1 to manipulate the MSR).
+ * The exception handler was handling a non-critical interrupt, so it will
+ * save (and later restore) the MSR via SPRN_SRR1, which will still have
+ * the MSR_DE bit set.
+ */
+ /* 0x2000 - Debug Exception */
+ START_EXCEPTION(0x2000, DebugTrap)
+ CRITICAL_EXCEPTION_PROLOG
+
+ /*
+ * If this is a single step or branch-taken exception in an
+ * exception entry sequence, it was probably meant to apply to
+ * the code where the exception occurred (since exception entry
+ * doesn't turn off DE automatically). We simulate the effect
+ * of turning off DE on entry to an exception handler by turning
+ * off DE in the SRR3 value and clearing the debug status.
+ */
+ mfspr r10,SPRN_DBSR /* check single-step/branch taken */
+ andis. r10,r10,DBSR_IC@h
+ beq+ 2f
+
+ andi. r10,r9,MSR_IR|MSR_PR /* check supervisor + MMU off */
+ beq 1f /* branch and fix it up */
+
+ mfspr r10,SPRN_SRR2 /* Faulting instruction address */
+ cmplwi r10,0x2100
+ bgt+ 2f /* address above exception vectors */
+
+ /* here it looks like we got an inappropriate debug exception. */
+1: rlwinm r9,r9,0,~MSR_DE /* clear DE in the SRR3 value */
+ lis r10,DBSR_IC@h /* clear the IC event */
+ mtspr SPRN_DBSR,r10
+ /* restore state and get out */
+ lwz r10,_CCR(r11)
+ lwz r0,GPR0(r11)
+ lwz r1,GPR1(r11)
+ mtcrf 0x80,r10
+ mtspr SPRN_SRR2,r12
+ mtspr SPRN_SRR3,r9
+ lwz r9,GPR9(r11)
+ lwz r12,GPR12(r11)
+ lwz r10,crit_r10@l(0)
+ lwz r11,crit_r11@l(0)
+ PPC405_ERR77_SYNC
+ rfci
+ b .
+
+ /* continue normal handling for a critical exception... */
+2: mfspr r4,SPRN_DBSR
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ EXC_XFER_TEMPLATE(DebugException, 0x2002, \
+ (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
+ NOCOPY, crit_transfer_to_handler, ret_from_crit_exc)
+
+/*
+ * The other Data TLB exceptions bail out to this point
+ * if they can't resolve the lightweight TLB fault.
+ */
+DataAccess:
+ NORMAL_EXCEPTION_PROLOG
+ mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
+ stw r5,_ESR(r11)
+ mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
+ EXC_XFER_EE_LITE(0x300, handle_page_fault)
+
+/* Other PowerPC processors, namely those derived from the 6xx-series
+ * have vectors from 0x2100 through 0x2F00 defined, but marked as reserved.
+ * However, for the 4xx-series processors these are neither defined nor
+ * reserved.
+ */
+
+ /* Damn, I came up one instruction too many to fit into the
+ * exception space :-). Both the instruction and data TLB
+ * miss get to this point to load the TLB.
+ * r10 - TLB_TAG value
+ * r11 - Linux PTE
+ * r12, r9 - avilable to use
+ * PID - loaded with proper value when we get here
+ * Upon exit, we reload everything and RFI.
+ * Actually, it will fit now, but oh well.....a common place
+ * to load the TLB.
+ */
+tlb_4xx_index:
+ .long 0
+finish_tlb_load:
+ /* load the next available TLB index.
+ */
+ lwz r9, tlb_4xx_index@l(0)
+ addi r9, r9, 1
+ andi. r9, r9, (PPC4XX_TLB_SIZE-1)
+ stw r9, tlb_4xx_index@l(0)
+
+6:
+ /*
+ * Clear out the software-only bits in the PTE to generate the
+ * TLB_DATA value. These are the bottom 2 bits of the RPM, the
+ * top 3 bits of the zone field, and M.
+ */
+ li r12, 0x0ce2
+ andc r11, r11, r12
+
+ tlbwe r11, r9, TLB_DATA /* Load TLB LO */
+ tlbwe r10, r9, TLB_TAG /* Load TLB HI */
+
+ /* Done...restore registers and get out of here.
+ */
+#ifdef CONFIG_403GCX
+ lwz r12, 12(r0)
+ lwz r11, 8(r0)
+ mtspr SPRN_PID, r12
+ mtcr r11
+ lwz r9, 4(r0)
+ lwz r12, 0(r0)
+#else
+ mfspr r12, SPRN_SPRG6
+ mfspr r11, SPRN_SPRG7
+ mtspr SPRN_PID, r12
+ mtcr r11
+ mfspr r9, SPRN_SPRG5
+ mfspr r12, SPRN_SPRG4
+#endif
+ mfspr r11, SPRN_SPRG1
+ mfspr r10, SPRN_SPRG0
+ PPC405_ERR77_SYNC
+ rfi /* Should sync shadow TLBs */
+ b . /* prevent prefetch past rfi */
+
+/* extern void giveup_fpu(struct task_struct *prev)
+ *
+ * The PowerPC 4xx family of processors do not have an FPU, so this just
+ * returns.
+ */
+_GLOBAL(giveup_fpu)
+ blr
+
+/* This is where the main kernel code starts.
+ */
+start_here:
+
+ /* ptr to current */
+ lis r2,init_task@h
+ ori r2,r2,init_task@l
+
+ /* ptr to phys current thread */
+ tophys(r4,r2)
+ addi r4,r4,THREAD /* init task's THREAD */
+ mtspr SPRN_SPRG3,r4
+
+ /* stack */
+ lis r1,init_thread_union@ha
+ addi r1,r1,init_thread_union@l
+ li r0,0
+ stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+
+ bl early_init /* We have to do this with MMU on */
+
+/*
+ * Decide what sort of machine this is and initialize the MMU.
+ */
+ mr r3,r31
+ mr r4,r30
+ mr r5,r29
+ mr r6,r28
+ mr r7,r27
+ bl machine_init
+ bl MMU_init
+
+/* Go back to running unmapped so we can load up new values
+ * and change to using our exception vectors.
+ * On the 4xx, all we have to do is invalidate the TLB to clear
+ * the old 16M byte TLB mappings.
+ */
+ lis r4,2f@h
+ ori r4,r4,2f@l
+ tophys(r4,r4)
+ lis r3,(MSR_KERNEL & ~(MSR_IR|MSR_DR))@h
+ ori r3,r3,(MSR_KERNEL & ~(MSR_IR|MSR_DR))@l
+ mtspr SPRN_SRR0,r4
+ mtspr SPRN_SRR1,r3
+ rfi
+ b . /* prevent prefetch past rfi */
+
+/* Load up the kernel context */
+2:
+ sync /* Flush to memory before changing TLB */
+ tlbia
+ isync /* Flush shadow TLBs */
+
+ /* set up the PTE pointers for the Abatron bdiGDB.
+ */
+ lis r6, swapper_pg_dir@h
+ ori r6, r6, swapper_pg_dir@l
+ lis r5, abatron_pteptrs@h
+ ori r5, r5, abatron_pteptrs@l
+ stw r5, 0xf0(r0) /* Must match your Abatron config file */
+ tophys(r5,r5)
+ stw r6, 0(r5)
+
+/* Now turn on the MMU for real! */
+ lis r4,MSR_KERNEL@h
+ ori r4,r4,MSR_KERNEL@l
+ lis r3,start_kernel@h
+ ori r3,r3,start_kernel@l
+ mtspr SPRN_SRR0,r3
+ mtspr SPRN_SRR1,r4
+ rfi /* enable MMU and jump to start_kernel */
+ b . /* prevent prefetch past rfi */
+
+/* Set up the initial MMU state so we can do the first level of
+ * kernel initialization. This maps the first 16 MBytes of memory 1:1
+ * virtual to physical and more importantly sets the cache mode.
+ */
+initial_mmu:
+ tlbia /* Invalidate all TLB entries */
+ isync
+
+ /* We should still be executing code at physical address 0x0000xxxx
+ * at this point. However, start_here is at virtual address
+ * 0xC000xxxx. So, set up a TLB mapping to cover this once
+ * translation is enabled.
+ */
+
+ lis r3,KERNELBASE@h /* Load the kernel virtual address */
+ ori r3,r3,KERNELBASE@l
+ tophys(r4,r3) /* Load the kernel physical address */
+
+ iccci r0,r3 /* Invalidate the i-cache before use */
+
+ /* Load the kernel PID.
+ */
+ li r0,0
+ mtspr SPRN_PID,r0
+ sync
+
+ /* Configure and load two entries into TLB slots 62 and 63.
+ * In case we are pinning TLBs, these are reserved in by the
+ * other TLB functions. If not reserving, then it doesn't
+ * matter where they are loaded.
+ */
+ clrrwi r4,r4,10 /* Mask off the real page number */
+ ori r4,r4,(TLB_WR | TLB_EX) /* Set the write and execute bits */
+
+ clrrwi r3,r3,10 /* Mask off the effective page number */
+ ori r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_16M))
+
+ li r0,63 /* TLB slot 63 */
+
+ tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */
+ tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */
+
+#if defined(CONFIG_SERIAL_TEXT_DEBUG) && defined(SERIAL_DEBUG_IO_BASE)
+
+ /* Load a TLB entry for the UART, so that ppc4xx_progress() can use
+ * the UARTs nice and early. We use a 4k real==virtual mapping. */
+
+ lis r3,SERIAL_DEBUG_IO_BASE@h
+ ori r3,r3,SERIAL_DEBUG_IO_BASE@l
+ mr r4,r3
+ clrrwi r4,r4,12
+ ori r4,r4,(TLB_WR|TLB_I|TLB_M|TLB_G)
+
+ clrrwi r3,r3,12
+ ori r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_4K))
+
+ li r0,0 /* TLB slot 0 */
+ tlbwe r4,r0,TLB_DATA
+ tlbwe r3,r0,TLB_TAG
+#endif /* CONFIG_SERIAL_DEBUG_TEXT && SERIAL_DEBUG_IO_BASE */
+
+ isync
+
+ /* Establish the exception vector base
+ */
+ lis r4,KERNELBASE@h /* EVPR only uses the high 16-bits */
+ tophys(r0,r4) /* Use the physical address */
+ mtspr SPRN_EVPR,r0
+
+ blr
+
+_GLOBAL(abort)
+ mfspr r13,SPRN_DBCR0
+ oris r13,r13,DBCR0_RST_SYSTEM@h
+ mtspr SPRN_DBCR0,r13
+
+_GLOBAL(set_context)
+
+#ifdef CONFIG_BDI_SWITCH
+ /* Context switch the PTE pointer for the Abatron BDI2000.
+ * The PGDIR is the second parameter.
+ */
+ lis r5, KERNELBASE@h
+ lwz r5, 0xf0(r5)
+ stw r4, 0x4(r5)
+#endif
+ sync
+ mtspr SPRN_PID,r3
+ isync /* Need an isync to flush shadow */
+ /* TLBs after changing PID */
+ blr
+
+/* We put a few things here that have to be page-aligned. This stuff
+ * goes at the beginning of the data segment, which is page-aligned.
+ */
+ .data
+ .align 12
+ .globl sdata
+sdata:
+ .globl empty_zero_page
+empty_zero_page:
+ .space 4096
+ .globl swapper_pg_dir
+swapper_pg_dir:
+ .space 4096
+
+
+/* Stack for handling critical exceptions from kernel mode */
+ .section .bss
+ .align 12
+exception_stack_bottom:
+ .space 4096
+critical_stack_top:
+ .globl exception_stack_top
+exception_stack_top:
+
+/* This space gets a copy of optional info passed to us by the bootstrap
+ * which is used to pass parameters into the kernel like root=/dev/sda1, etc.
+ */
+ .globl cmd_line
+cmd_line:
+ .space 512
+
+/* Room for two PTE pointers, usually the kernel and current user pointers
+ * to their respective root page table.
+ */
+abatron_pteptrs:
+ .space 8
--- linux-2.6.orig/arch/powerpc/kernel/head_4xx.S
+++ /dev/null
@@ -1,1021 +0,0 @@
-/*
- * Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org>
- * Initial PowerPC version.
- * Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu>
- * Rewritten for PReP
- * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
- * Low-level exception handers, MMU support, and rewrite.
- * Copyright (c) 1997 Dan Malek <dmalek@jlc.net>
- * PowerPC 8xx modifications.
- * Copyright (c) 1998-1999 TiVo, Inc.
- * PowerPC 403GCX modifications.
- * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
- * PowerPC 403GCX/405GP modifications.
- * Copyright 2000 MontaVista Software Inc.
- * PPC405 modifications
- * PowerPC 403GCX/405GP modifications.
- * Author: MontaVista Software, Inc.
- * frank_rowand@mvista.com or source@mvista.com
- * debbie_chu@mvista.com
- *
- *
- * Module name: head_4xx.S
- *
- * Description:
- * Kernel execution entry point code.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
-#include <asm/pgtable.h>
-#include <asm/ibm4xx.h>
-#include <asm/cputable.h>
-#include <asm/thread_info.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-
-/* As with the other PowerPC ports, it is expected that when code
- * execution begins here, the following registers contain valid, yet
- * optional, information:
- *
- * r3 - Board info structure pointer (DRAM, frequency, MAC address, etc.)
- * r4 - Starting address of the init RAM disk
- * r5 - Ending address of the init RAM disk
- * r6 - Start of kernel command line string (e.g. "mem=96m")
- * r7 - End of kernel command line string
- *
- * This is all going to change RSN when we add bi_recs....... -- Dan
- */
- .text
-_GLOBAL(_stext)
-_GLOBAL(_start)
-
- /* Save parameters we are passed.
- */
- mr r31,r3
- mr r30,r4
- mr r29,r5
- mr r28,r6
- mr r27,r7
-
- /* We have to turn on the MMU right away so we get cache modes
- * set correctly.
- */
- bl initial_mmu
-
-/* We now have the lower 16 Meg mapped into TLB entries, and the caches
- * ready to work.
- */
-turn_on_mmu:
- lis r0,MSR_KERNEL@h
- ori r0,r0,MSR_KERNEL@l
- mtspr SPRN_SRR1,r0
- lis r0,start_here@h
- ori r0,r0,start_here@l
- mtspr SPRN_SRR0,r0
- SYNC
- rfi /* enables MMU */
- b . /* prevent prefetch past rfi */
-
-/*
- * This area is used for temporarily saving registers during the
- * critical exception prolog.
- */
- . = 0xc0
-crit_save:
-_GLOBAL(crit_r10)
- .space 4
-_GLOBAL(crit_r11)
- .space 4
-
-/*
- * Exception vector entry code. This code runs with address translation
- * turned off (i.e. using physical addresses). We assume SPRG3 has the
- * physical address of the current task thread_struct.
- * Note that we have to have decremented r1 before we write to any fields
- * of the exception frame, since a critical interrupt could occur at any
- * time, and it will write to the area immediately below the current r1.
- */
-#define NORMAL_EXCEPTION_PROLOG \
- mtspr SPRN_SPRG0,r10; /* save two registers to work with */\
- mtspr SPRN_SPRG1,r11; \
- mtspr SPRN_SPRG2,r1; \
- mfcr r10; /* save CR in r10 for now */\
- mfspr r11,SPRN_SRR1; /* check whether user or kernel */\
- andi. r11,r11,MSR_PR; \
- beq 1f; \
- mfspr r1,SPRN_SPRG3; /* if from user, start at top of */\
- lwz r1,THREAD_INFO-THREAD(r1); /* this thread's kernel stack */\
- addi r1,r1,THREAD_SIZE; \
-1: subi r1,r1,INT_FRAME_SIZE; /* Allocate an exception frame */\
- tophys(r11,r1); \
- stw r10,_CCR(r11); /* save various registers */\
- stw r12,GPR12(r11); \
- stw r9,GPR9(r11); \
- mfspr r10,SPRN_SPRG0; \
- stw r10,GPR10(r11); \
- mfspr r12,SPRN_SPRG1; \
- stw r12,GPR11(r11); \
- mflr r10; \
- stw r10,_LINK(r11); \
- mfspr r10,SPRN_SPRG2; \
- mfspr r12,SPRN_SRR0; \
- stw r10,GPR1(r11); \
- mfspr r9,SPRN_SRR1; \
- stw r10,0(r11); \
- rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\
- stw r0,GPR0(r11); \
- SAVE_4GPRS(3, r11); \
- SAVE_2GPRS(7, r11)
-
-/*
- * Exception prolog for critical exceptions. This is a little different
- * from the normal exception prolog above since a critical exception
- * can potentially occur at any point during normal exception processing.
- * Thus we cannot use the same SPRG registers as the normal prolog above.
- * Instead we use a couple of words of memory at low physical addresses.
- * This is OK since we don't support SMP on these processors.
- */
-#define CRITICAL_EXCEPTION_PROLOG \
- stw r10,crit_r10@l(0); /* save two registers to work with */\
- stw r11,crit_r11@l(0); \
- mfcr r10; /* save CR in r10 for now */\
- mfspr r11,SPRN_SRR3; /* check whether user or kernel */\
- andi. r11,r11,MSR_PR; \
- lis r11,critical_stack_top@h; \
- ori r11,r11,critical_stack_top@l; \
- beq 1f; \
- /* COMING FROM USER MODE */ \
- mfspr r11,SPRN_SPRG3; /* if from user, start at top of */\
- lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
- addi r11,r11,THREAD_SIZE; \
-1: subi r11,r11,INT_FRAME_SIZE; /* Allocate an exception frame */\
- tophys(r11,r11); \
- stw r10,_CCR(r11); /* save various registers */\
- stw r12,GPR12(r11); \
- stw r9,GPR9(r11); \
- mflr r10; \
- stw r10,_LINK(r11); \
- mfspr r12,SPRN_DEAR; /* save DEAR and ESR in the frame */\
- stw r12,_DEAR(r11); /* since they may have had stuff */\
- mfspr r9,SPRN_ESR; /* in them at the point where the */\
- stw r9,_ESR(r11); /* exception was taken */\
- mfspr r12,SPRN_SRR2; \
- stw r1,GPR1(r11); \
- mfspr r9,SPRN_SRR3; \
- stw r1,0(r11); \
- tovirt(r1,r11); \
- rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\
- stw r0,GPR0(r11); \
- SAVE_4GPRS(3, r11); \
- SAVE_2GPRS(7, r11)
-
- /*
- * State at this point:
- * r9 saved in stack frame, now saved SRR3 & ~MSR_WE
- * r10 saved in crit_r10 and in stack frame, trashed
- * r11 saved in crit_r11 and in stack frame,
- * now phys stack/exception frame pointer
- * r12 saved in stack frame, now saved SRR2
- * CR saved in stack frame, CR0.EQ = !SRR3.PR
- * LR, DEAR, ESR in stack frame
- * r1 saved in stack frame, now virt stack/excframe pointer
- * r0, r3-r8 saved in stack frame
- */
-
-/*
- * Exception vectors.
- */
-#define START_EXCEPTION(n, label) \
- . = n; \
-label:
-
-#define EXCEPTION(n, label, hdlr, xfer) \
- START_EXCEPTION(n, label); \
- NORMAL_EXCEPTION_PROLOG; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- xfer(n, hdlr)
-
-#define CRITICAL_EXCEPTION(n, label, hdlr) \
- START_EXCEPTION(n, label); \
- CRITICAL_EXCEPTION_PROLOG; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
- NOCOPY, crit_transfer_to_handler, \
- ret_from_crit_exc)
-
-#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \
- li r10,trap; \
- stw r10,_TRAP(r11); \
- lis r10,msr@h; \
- ori r10,r10,msr@l; \
- copyee(r10, r9); \
- bl tfer; \
- .long hdlr; \
- .long ret
-
-#define COPY_EE(d, s) rlwimi d,s,0,16,16
-#define NOCOPY(d, s)
-
-#define EXC_XFER_STD(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \
- ret_from_except)
-
-#define EXC_XFER_EE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_EE_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \
- ret_from_except)
-
-
-/*
- * 0x0100 - Critical Interrupt Exception
- */
- CRITICAL_EXCEPTION(0x0100, CriticalInterrupt, unknown_exception)
-
-/*
- * 0x0200 - Machine Check Exception
- */
- CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
-
-/*
- * 0x0300 - Data Storage Exception
- * This happens for just a few reasons. U0 set (but we don't do that),
- * or zone protection fault (user violation, write to protected page).
- * If this is just an update of modified status, we do that quickly
- * and exit. Otherwise, we call heavywight functions to do the work.
- */
- START_EXCEPTION(0x0300, DataStorage)
- mtspr SPRN_SPRG0, r10 /* Save some working registers */
- mtspr SPRN_SPRG1, r11
-#ifdef CONFIG_403GCX
- stw r12, 0(r0)
- stw r9, 4(r0)
- mfcr r11
- mfspr r12, SPRN_PID
- stw r11, 8(r0)
- stw r12, 12(r0)
-#else
- mtspr SPRN_SPRG4, r12
- mtspr SPRN_SPRG5, r9
- mfcr r11
- mfspr r12, SPRN_PID
- mtspr SPRN_SPRG7, r11
- mtspr SPRN_SPRG6, r12
-#endif
-
- /* First, check if it was a zone fault (which means a user
- * tried to access a kernel or read-protected page - always
- * a SEGV). All other faults here must be stores, so no
- * need to check ESR_DST as well. */
- mfspr r10, SPRN_ESR
- andis. r10, r10, ESR_DIZ@h
- bne 2f
-
- mfspr r10, SPRN_DEAR /* Get faulting address */
-
- /* If we are faulting a kernel address, we have to use the
- * kernel page tables.
- */
- lis r11, TASK_SIZE@h
- cmplw r10, r11
- blt+ 3f
- lis r11, swapper_pg_dir@h
- ori r11, r11, swapper_pg_dir@l
- li r9, 0
- mtspr SPRN_PID, r9 /* TLB will have 0 TID */
- b 4f
-
- /* Get the PGD for the current thread.
- */
-3:
- mfspr r11,SPRN_SPRG3
- lwz r11,PGDIR(r11)
-4:
- tophys(r11, r11)
- rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */
- lwz r11, 0(r11) /* Get L1 entry */
- rlwinm. r12, r11, 0, 0, 19 /* Extract L2 (pte) base address */
- beq 2f /* Bail if no table */
-
- rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */
- lwz r11, 0(r12) /* Get Linux PTE */
-
- andi. r9, r11, _PAGE_RW /* Is it writeable? */
- beq 2f /* Bail if not */
-
- /* Update 'changed'.
- */
- ori r11, r11, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE
- stw r11, 0(r12) /* Update Linux page table */
-
- /* Most of the Linux PTE is ready to load into the TLB LO.
- * We set ZSEL, where only the LS-bit determines user access.
- * We set execute, because we don't have the granularity to
- * properly set this at the page level (Linux problem).
- * If shared is set, we cause a zero PID->TID load.
- * Many of these bits are software only. Bits we don't set
- * here we (properly should) assume have the appropriate value.
- */
- li r12, 0x0ce2
- andc r11, r11, r12 /* Make sure 20, 21 are zero */
-
- /* find the TLB index that caused the fault. It has to be here.
- */
- tlbsx r9, 0, r10
-
- tlbwe r11, r9, TLB_DATA /* Load TLB LO */
-
- /* Done...restore registers and get out of here.
- */
-#ifdef CONFIG_403GCX
- lwz r12, 12(r0)
- lwz r11, 8(r0)
- mtspr SPRN_PID, r12
- mtcr r11
- lwz r9, 4(r0)
- lwz r12, 0(r0)
-#else
- mfspr r12, SPRN_SPRG6
- mfspr r11, SPRN_SPRG7
- mtspr SPRN_PID, r12
- mtcr r11
- mfspr r9, SPRN_SPRG5
- mfspr r12, SPRN_SPRG4
-#endif
- mfspr r11, SPRN_SPRG1
- mfspr r10, SPRN_SPRG0
- PPC405_ERR77_SYNC
- rfi /* Should sync shadow TLBs */
- b . /* prevent prefetch past rfi */
-
-2:
- /* The bailout. Restore registers to pre-exception conditions
- * and call the heavyweights to help us out.
- */
-#ifdef CONFIG_403GCX
- lwz r12, 12(r0)
- lwz r11, 8(r0)
- mtspr SPRN_PID, r12
- mtcr r11
- lwz r9, 4(r0)
- lwz r12, 0(r0)
-#else
- mfspr r12, SPRN_SPRG6
- mfspr r11, SPRN_SPRG7
- mtspr SPRN_PID, r12
- mtcr r11
- mfspr r9, SPRN_SPRG5
- mfspr r12, SPRN_SPRG4
-#endif
- mfspr r11, SPRN_SPRG1
- mfspr r10, SPRN_SPRG0
- b DataAccess
-
-/*
- * 0x0400 - Instruction Storage Exception
- * This is caused by a fetch from non-execute or guarded pages.
- */
- START_EXCEPTION(0x0400, InstructionAccess)
- NORMAL_EXCEPTION_PROLOG
- mr r4,r12 /* Pass SRR0 as arg2 */
- li r5,0 /* Pass zero as arg3 */
- EXC_XFER_EE_LITE(0x400, handle_page_fault)
-
-/* 0x0500 - External Interrupt Exception */
- EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
-
-/* 0x0600 - Alignment Exception */
- START_EXCEPTION(0x0600, Alignment)
- NORMAL_EXCEPTION_PROLOG
- mfspr r4,SPRN_DEAR /* Grab the DEAR and save it */
- stw r4,_DEAR(r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE(0x600, alignment_exception)
-
-/* 0x0700 - Program Exception */
- START_EXCEPTION(0x0700, ProgramCheck)
- NORMAL_EXCEPTION_PROLOG
- mfspr r4,SPRN_ESR /* Grab the ESR and save it */
- stw r4,_ESR(r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_STD(0x700, program_check_exception)
-
- EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_EE)
-
-/* 0x0C00 - System Call Exception */
- START_EXCEPTION(0x0C00, SystemCall)
- NORMAL_EXCEPTION_PROLOG
- EXC_XFER_EE_LITE(0xc00, DoSyscall)
-
- EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_EE)
-
-/* 0x1000 - Programmable Interval Timer (PIT) Exception */
- START_EXCEPTION(0x1000, Decrementer)
- NORMAL_EXCEPTION_PROLOG
- lis r0,TSR_PIS@h
- mtspr SPRN_TSR,r0 /* Clear the PIT exception */
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_LITE(0x1000, timer_interrupt)
-
-#if 0
-/* NOTE:
- * FIT and WDT handlers are not implemented yet.
- */
-
-/* 0x1010 - Fixed Interval Timer (FIT) Exception
-*/
- STND_EXCEPTION(0x1010, FITException, unknown_exception)
-
-/* 0x1020 - Watchdog Timer (WDT) Exception
-*/
-#ifdef CONFIG_BOOKE_WDT
- CRITICAL_EXCEPTION(0x1020, WDTException, WatchdogException)
-#else
- CRITICAL_EXCEPTION(0x1020, WDTException, unknown_exception)
-#endif
-#endif
-
-/* 0x1100 - Data TLB Miss Exception
- * As the name implies, translation is not in the MMU, so search the
- * page tables and fix it. The only purpose of this function is to
- * load TLB entries from the page table if they exist.
- */
- START_EXCEPTION(0x1100, DTLBMiss)
- mtspr SPRN_SPRG0, r10 /* Save some working registers */
- mtspr SPRN_SPRG1, r11
-#ifdef CONFIG_403GCX
- stw r12, 0(r0)
- stw r9, 4(r0)
- mfcr r11
- mfspr r12, SPRN_PID
- stw r11, 8(r0)
- stw r12, 12(r0)
-#else
- mtspr SPRN_SPRG4, r12
- mtspr SPRN_SPRG5, r9
- mfcr r11
- mfspr r12, SPRN_PID
- mtspr SPRN_SPRG7, r11
- mtspr SPRN_SPRG6, r12
-#endif
- mfspr r10, SPRN_DEAR /* Get faulting address */
-
- /* If we are faulting a kernel address, we have to use the
- * kernel page tables.
- */
- lis r11, TASK_SIZE@h
- cmplw r10, r11
- blt+ 3f
- lis r11, swapper_pg_dir@h
- ori r11, r11, swapper_pg_dir@l
- li r9, 0
- mtspr SPRN_PID, r9 /* TLB will have 0 TID */
- b 4f
-
- /* Get the PGD for the current thread.
- */
-3:
- mfspr r11,SPRN_SPRG3
- lwz r11,PGDIR(r11)
-4:
- tophys(r11, r11)
- rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */
- lwz r12, 0(r11) /* Get L1 entry */
- andi. r9, r12, _PMD_PRESENT /* Check if it points to a PTE page */
- beq 2f /* Bail if no table */
-
- rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */
- lwz r11, 0(r12) /* Get Linux PTE */
- andi. r9, r11, _PAGE_PRESENT
- beq 5f
-
- ori r11, r11, _PAGE_ACCESSED
- stw r11, 0(r12)
-
- /* Create TLB tag. This is the faulting address plus a static
- * set of bits. These are size, valid, E, U0.
- */
- li r12, 0x00c0
- rlwimi r10, r12, 0, 20, 31
-
- b finish_tlb_load
-
-2: /* Check for possible large-page pmd entry */
- rlwinm. r9, r12, 2, 22, 24
- beq 5f
-
- /* Create TLB tag. This is the faulting address, plus a static
- * set of bits (valid, E, U0) plus the size from the PMD.
- */
- ori r9, r9, 0x40
- rlwimi r10, r9, 0, 20, 31
- mr r11, r12
-
- b finish_tlb_load
-
-5:
- /* The bailout. Restore registers to pre-exception conditions
- * and call the heavyweights to help us out.
- */
-#ifdef CONFIG_403GCX
- lwz r12, 12(r0)
- lwz r11, 8(r0)
- mtspr SPRN_PID, r12
- mtcr r11
- lwz r9, 4(r0)
- lwz r12, 0(r0)
-#else
- mfspr r12, SPRN_SPRG6
- mfspr r11, SPRN_SPRG7
- mtspr SPRN_PID, r12
- mtcr r11
- mfspr r9, SPRN_SPRG5
- mfspr r12, SPRN_SPRG4
-#endif
- mfspr r11, SPRN_SPRG1
- mfspr r10, SPRN_SPRG0
- b DataAccess
-
-/* 0x1200 - Instruction TLB Miss Exception
- * Nearly the same as above, except we get our information from different
- * registers and bailout to a different point.
- */
- START_EXCEPTION(0x1200, ITLBMiss)
- mtspr SPRN_SPRG0, r10 /* Save some working registers */
- mtspr SPRN_SPRG1, r11
-#ifdef CONFIG_403GCX
- stw r12, 0(r0)
- stw r9, 4(r0)
- mfcr r11
- mfspr r12, SPRN_PID
- stw r11, 8(r0)
- stw r12, 12(r0)
-#else
- mtspr SPRN_SPRG4, r12
- mtspr SPRN_SPRG5, r9
- mfcr r11
- mfspr r12, SPRN_PID
- mtspr SPRN_SPRG7, r11
- mtspr SPRN_SPRG6, r12
-#endif
- mfspr r10, SPRN_SRR0 /* Get faulting address */
-
- /* If we are faulting a kernel address, we have to use the
- * kernel page tables.
- */
- lis r11, TASK_SIZE@h
- cmplw r10, r11
- blt+ 3f
- lis r11, swapper_pg_dir@h
- ori r11, r11, swapper_pg_dir@l
- li r9, 0
- mtspr SPRN_PID, r9 /* TLB will have 0 TID */
- b 4f
-
- /* Get the PGD for the current thread.
- */
-3:
- mfspr r11,SPRN_SPRG3
- lwz r11,PGDIR(r11)
-4:
- tophys(r11, r11)
- rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */
- lwz r12, 0(r11) /* Get L1 entry */
- andi. r9, r12, _PMD_PRESENT /* Check if it points to a PTE page */
- beq 2f /* Bail if no table */
-
- rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */
- lwz r11, 0(r12) /* Get Linux PTE */
- andi. r9, r11, _PAGE_PRESENT
- beq 5f
-
- ori r11, r11, _PAGE_ACCESSED
- stw r11, 0(r12)
-
- /* Create TLB tag. This is the faulting address plus a static
- * set of bits. These are size, valid, E, U0.
- */
- li r12, 0x00c0
- rlwimi r10, r12, 0, 20, 31
-
- b finish_tlb_load
-
-2: /* Check for possible large-page pmd entry */
- rlwinm. r9, r12, 2, 22, 24
- beq 5f
-
- /* Create TLB tag. This is the faulting address, plus a static
- * set of bits (valid, E, U0) plus the size from the PMD.
- */
- ori r9, r9, 0x40
- rlwimi r10, r9, 0, 20, 31
- mr r11, r12
-
- b finish_tlb_load
-
-5:
- /* The bailout. Restore registers to pre-exception conditions
- * and call the heavyweights to help us out.
- */
-#ifdef CONFIG_403GCX
- lwz r12, 12(r0)
- lwz r11, 8(r0)
- mtspr SPRN_PID, r12
- mtcr r11
- lwz r9, 4(r0)
- lwz r12, 0(r0)
-#else
- mfspr r12, SPRN_SPRG6
- mfspr r11, SPRN_SPRG7
- mtspr SPRN_PID, r12
- mtcr r11
- mfspr r9, SPRN_SPRG5
- mfspr r12, SPRN_SPRG4
-#endif
- mfspr r11, SPRN_SPRG1
- mfspr r10, SPRN_SPRG0
- b InstructionAccess
-
- EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE)
-#ifdef CONFIG_IBM405_ERR51
- /* 405GP errata 51 */
- START_EXCEPTION(0x1700, Trap_17)
- b DTLBMiss
-#else
- EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE)
-#endif
- EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_EE)
-
-/* Check for a single step debug exception while in an exception
- * handler before state has been saved. This is to catch the case
- * where an instruction that we are trying to single step causes
- * an exception (eg ITLB/DTLB miss) and thus the first instruction of
- * the exception handler generates a single step debug exception.
- *
- * If we get a debug trap on the first instruction of an exception handler,
- * we reset the MSR_DE in the _exception handler's_ MSR (the debug trap is
- * a critical exception, so we are using SPRN_CSRR1 to manipulate the MSR).
- * The exception handler was handling a non-critical interrupt, so it will
- * save (and later restore) the MSR via SPRN_SRR1, which will still have
- * the MSR_DE bit set.
- */
- /* 0x2000 - Debug Exception */
- START_EXCEPTION(0x2000, DebugTrap)
- CRITICAL_EXCEPTION_PROLOG
-
- /*
- * If this is a single step or branch-taken exception in an
- * exception entry sequence, it was probably meant to apply to
- * the code where the exception occurred (since exception entry
- * doesn't turn off DE automatically). We simulate the effect
- * of turning off DE on entry to an exception handler by turning
- * off DE in the SRR3 value and clearing the debug status.
- */
- mfspr r10,SPRN_DBSR /* check single-step/branch taken */
- andis. r10,r10,DBSR_IC@h
- beq+ 2f
-
- andi. r10,r9,MSR_IR|MSR_PR /* check supervisor + MMU off */
- beq 1f /* branch and fix it up */
-
- mfspr r10,SPRN_SRR2 /* Faulting instruction address */
- cmplwi r10,0x2100
- bgt+ 2f /* address above exception vectors */
-
- /* here it looks like we got an inappropriate debug exception. */
-1: rlwinm r9,r9,0,~MSR_DE /* clear DE in the SRR3 value */
- lis r10,DBSR_IC@h /* clear the IC event */
- mtspr SPRN_DBSR,r10
- /* restore state and get out */
- lwz r10,_CCR(r11)
- lwz r0,GPR0(r11)
- lwz r1,GPR1(r11)
- mtcrf 0x80,r10
- mtspr SPRN_SRR2,r12
- mtspr SPRN_SRR3,r9
- lwz r9,GPR9(r11)
- lwz r12,GPR12(r11)
- lwz r10,crit_r10@l(0)
- lwz r11,crit_r11@l(0)
- PPC405_ERR77_SYNC
- rfci
- b .
-
- /* continue normal handling for a critical exception... */
-2: mfspr r4,SPRN_DBSR
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_TEMPLATE(DebugException, 0x2002, \
- (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
- NOCOPY, crit_transfer_to_handler, ret_from_crit_exc)
-
-/*
- * The other Data TLB exceptions bail out to this point
- * if they can't resolve the lightweight TLB fault.
- */
-DataAccess:
- NORMAL_EXCEPTION_PROLOG
- mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
- stw r5,_ESR(r11)
- mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
- EXC_XFER_EE_LITE(0x300, handle_page_fault)
-
-/* Other PowerPC processors, namely those derived from the 6xx-series
- * have vectors from 0x2100 through 0x2F00 defined, but marked as reserved.
- * However, for the 4xx-series processors these are neither defined nor
- * reserved.
- */
-
- /* Damn, I came up one instruction too many to fit into the
- * exception space :-). Both the instruction and data TLB
- * miss get to this point to load the TLB.
- * r10 - TLB_TAG value
- * r11 - Linux PTE
- * r12, r9 - avilable to use
- * PID - loaded with proper value when we get here
- * Upon exit, we reload everything and RFI.
- * Actually, it will fit now, but oh well.....a common place
- * to load the TLB.
- */
-tlb_4xx_index:
- .long 0
-finish_tlb_load:
- /* load the next available TLB index.
- */
- lwz r9, tlb_4xx_index@l(0)
- addi r9, r9, 1
- andi. r9, r9, (PPC4XX_TLB_SIZE-1)
- stw r9, tlb_4xx_index@l(0)
-
-6:
- /*
- * Clear out the software-only bits in the PTE to generate the
- * TLB_DATA value. These are the bottom 2 bits of the RPM, the
- * top 3 bits of the zone field, and M.
- */
- li r12, 0x0ce2
- andc r11, r11, r12
-
- tlbwe r11, r9, TLB_DATA /* Load TLB LO */
- tlbwe r10, r9, TLB_TAG /* Load TLB HI */
-
- /* Done...restore registers and get out of here.
- */
-#ifdef CONFIG_403GCX
- lwz r12, 12(r0)
- lwz r11, 8(r0)
- mtspr SPRN_PID, r12
- mtcr r11
- lwz r9, 4(r0)
- lwz r12, 0(r0)
-#else
- mfspr r12, SPRN_SPRG6
- mfspr r11, SPRN_SPRG7
- mtspr SPRN_PID, r12
- mtcr r11
- mfspr r9, SPRN_SPRG5
- mfspr r12, SPRN_SPRG4
-#endif
- mfspr r11, SPRN_SPRG1
- mfspr r10, SPRN_SPRG0
- PPC405_ERR77_SYNC
- rfi /* Should sync shadow TLBs */
- b . /* prevent prefetch past rfi */
-
-/* extern void giveup_fpu(struct task_struct *prev)
- *
- * The PowerPC 4xx family of processors do not have an FPU, so this just
- * returns.
- */
-_GLOBAL(giveup_fpu)
- blr
-
-/* This is where the main kernel code starts.
- */
-start_here:
-
- /* ptr to current */
- lis r2,init_task@h
- ori r2,r2,init_task@l
-
- /* ptr to phys current thread */
- tophys(r4,r2)
- addi r4,r4,THREAD /* init task's THREAD */
- mtspr SPRN_SPRG3,r4
-
- /* stack */
- lis r1,init_thread_union@ha
- addi r1,r1,init_thread_union@l
- li r0,0
- stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
-
- bl early_init /* We have to do this with MMU on */
-
-/*
- * Decide what sort of machine this is and initialize the MMU.
- */
- mr r3,r31
- mr r4,r30
- mr r5,r29
- mr r6,r28
- mr r7,r27
- bl machine_init
- bl MMU_init
-
-/* Go back to running unmapped so we can load up new values
- * and change to using our exception vectors.
- * On the 4xx, all we have to do is invalidate the TLB to clear
- * the old 16M byte TLB mappings.
- */
- lis r4,2f@h
- ori r4,r4,2f@l
- tophys(r4,r4)
- lis r3,(MSR_KERNEL & ~(MSR_IR|MSR_DR))@h
- ori r3,r3,(MSR_KERNEL & ~(MSR_IR|MSR_DR))@l
- mtspr SPRN_SRR0,r4
- mtspr SPRN_SRR1,r3
- rfi
- b . /* prevent prefetch past rfi */
-
-/* Load up the kernel context */
-2:
- sync /* Flush to memory before changing TLB */
- tlbia
- isync /* Flush shadow TLBs */
-
- /* set up the PTE pointers for the Abatron bdiGDB.
- */
- lis r6, swapper_pg_dir@h
- ori r6, r6, swapper_pg_dir@l
- lis r5, abatron_pteptrs@h
- ori r5, r5, abatron_pteptrs@l
- stw r5, 0xf0(r0) /* Must match your Abatron config file */
- tophys(r5,r5)
- stw r6, 0(r5)
-
-/* Now turn on the MMU for real! */
- lis r4,MSR_KERNEL@h
- ori r4,r4,MSR_KERNEL@l
- lis r3,start_kernel@h
- ori r3,r3,start_kernel@l
- mtspr SPRN_SRR0,r3
- mtspr SPRN_SRR1,r4
- rfi /* enable MMU and jump to start_kernel */
- b . /* prevent prefetch past rfi */
-
-/* Set up the initial MMU state so we can do the first level of
- * kernel initialization. This maps the first 16 MBytes of memory 1:1
- * virtual to physical and more importantly sets the cache mode.
- */
-initial_mmu:
- tlbia /* Invalidate all TLB entries */
- isync
-
- /* We should still be executing code at physical address 0x0000xxxx
- * at this point. However, start_here is at virtual address
- * 0xC000xxxx. So, set up a TLB mapping to cover this once
- * translation is enabled.
- */
-
- lis r3,KERNELBASE@h /* Load the kernel virtual address */
- ori r3,r3,KERNELBASE@l
- tophys(r4,r3) /* Load the kernel physical address */
-
- iccci r0,r3 /* Invalidate the i-cache before use */
-
- /* Load the kernel PID.
- */
- li r0,0
- mtspr SPRN_PID,r0
- sync
-
- /* Configure and load two entries into TLB slots 62 and 63.
- * In case we are pinning TLBs, these are reserved in by the
- * other TLB functions. If not reserving, then it doesn't
- * matter where they are loaded.
- */
- clrrwi r4,r4,10 /* Mask off the real page number */
- ori r4,r4,(TLB_WR | TLB_EX) /* Set the write and execute bits */
-
- clrrwi r3,r3,10 /* Mask off the effective page number */
- ori r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_16M))
-
- li r0,63 /* TLB slot 63 */
-
- tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */
- tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */
-
-#if defined(CONFIG_SERIAL_TEXT_DEBUG) && defined(SERIAL_DEBUG_IO_BASE)
-
- /* Load a TLB entry for the UART, so that ppc4xx_progress() can use
- * the UARTs nice and early. We use a 4k real==virtual mapping. */
-
- lis r3,SERIAL_DEBUG_IO_BASE@h
- ori r3,r3,SERIAL_DEBUG_IO_BASE@l
- mr r4,r3
- clrrwi r4,r4,12
- ori r4,r4,(TLB_WR|TLB_I|TLB_M|TLB_G)
-
- clrrwi r3,r3,12
- ori r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_4K))
-
- li r0,0 /* TLB slot 0 */
- tlbwe r4,r0,TLB_DATA
- tlbwe r3,r0,TLB_TAG
-#endif /* CONFIG_SERIAL_DEBUG_TEXT && SERIAL_DEBUG_IO_BASE */
-
- isync
-
- /* Establish the exception vector base
- */
- lis r4,KERNELBASE@h /* EVPR only uses the high 16-bits */
- tophys(r0,r4) /* Use the physical address */
- mtspr SPRN_EVPR,r0
-
- blr
-
-_GLOBAL(abort)
- mfspr r13,SPRN_DBCR0
- oris r13,r13,DBCR0_RST_SYSTEM@h
- mtspr SPRN_DBCR0,r13
-
-_GLOBAL(set_context)
-
-#ifdef CONFIG_BDI_SWITCH
- /* Context switch the PTE pointer for the Abatron BDI2000.
- * The PGDIR is the second parameter.
- */
- lis r5, KERNELBASE@h
- lwz r5, 0xf0(r5)
- stw r4, 0x4(r5)
-#endif
- sync
- mtspr SPRN_PID,r3
- isync /* Need an isync to flush shadow */
- /* TLBs after changing PID */
- blr
-
-/* We put a few things here that have to be page-aligned. This stuff
- * goes at the beginning of the data segment, which is page-aligned.
- */
- .data
- .align 12
- .globl sdata
-sdata:
- .globl empty_zero_page
-empty_zero_page:
- .space 4096
- .globl swapper_pg_dir
-swapper_pg_dir:
- .space 4096
-
-
-/* Stack for handling critical exceptions from kernel mode */
- .section .bss
- .align 12
-exception_stack_bottom:
- .space 4096
-critical_stack_top:
- .globl exception_stack_top
-exception_stack_top:
-
-/* This space gets a copy of optional info passed to us by the bootstrap
- * which is used to pass parameters into the kernel like root=/dev/sda1, etc.
- */
- .globl cmd_line
-cmd_line:
- .space 512
-
-/* Room for two PTE pointers, usually the kernel and current user pointers
- * to their respective root page table.
- */
-abatron_pteptrs:
- .space 8
--- /dev/null
+++ linux-2.6/arch/powerpc/mm/40x_mmu.c
@@ -0,0 +1,135 @@
+/*
+ * This file contains the routines for initializing the MMU
+ * on the 4xx series of chips.
+ * -- paulus
+ *
+ * Derived from arch/ppc/mm/init.c:
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ * and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ * Copyright (C) 1996 Paul Mackerras
+ *
+ * Derived from "arch/i386/mm/init.c"
+ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/highmem.h>
+
+#include <asm/pgalloc.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+#include <asm/bootx.h>
+#include <asm/machdep.h>
+#include <asm/setup.h>
+#include "mmu_decl.h"
+
+extern int __map_without_ltlbs;
+/*
+ * MMU_init_hw does the chip-specific initialization of the MMU hardware.
+ */
+void __init MMU_init_hw(void)
+{
+ /*
+ * The Zone Protection Register (ZPR) defines how protection will
+ * be applied to every page which is a member of a given zone. At
+ * present, we utilize only two of the 4xx's zones.
+ * The zone index bits (of ZSEL) in the PTE are used for software
+ * indicators, except the LSB. For user access, zone 1 is used,
+ * for kernel access, zone 0 is used. We set all but zone 1
+ * to zero, allowing only kernel access as indicated in the PTE.
+ * For zone 1, we set a 01 binary (a value of 10 will not work)
+ * to allow user access as indicated in the PTE. This also allows
+ * kernel access as indicated in the PTE.
+ */
+
+ mtspr(SPRN_ZPR, 0x10000000);
+
+ flush_instruction_cache();
+
+ /*
+ * Set up the real-mode cache parameters for the exception vector
+ * handlers (which are run in real-mode).
+ */
+
+ mtspr(SPRN_DCWR, 0x00000000); /* All caching is write-back */
+
+ /*
+ * Cache instruction and data space where the exception
+ * vectors and the kernel live in real-mode.
+ */
+
+ mtspr(SPRN_DCCR, 0xF0000000); /* 512 MB of data space at 0x0. */
+ mtspr(SPRN_ICCR, 0xF0000000); /* 512 MB of instr. space at 0x0. */
+}
+
+#define LARGE_PAGE_SIZE_16M (1<<24)
+#define LARGE_PAGE_SIZE_4M (1<<22)
+
+unsigned long __init mmu_mapin_ram(void)
+{
+ unsigned long v, s;
+ phys_addr_t p;
+
+ v = KERNELBASE;
+ p = PPC_MEMSTART;
+ s = 0;
+
+ if (__map_without_ltlbs) {
+ return s;
+ }
+
+ while (s <= (total_lowmem - LARGE_PAGE_SIZE_16M)) {
+ pmd_t *pmdp;
+ unsigned long val = p | _PMD_SIZE_16M | _PAGE_HWEXEC | _PAGE_HWWRITE;
+
+ pmdp = pmd_offset(pgd_offset_k(v), v);
+ pmd_val(*pmdp++) = val;
+ pmd_val(*pmdp++) = val;
+ pmd_val(*pmdp++) = val;
+ pmd_val(*pmdp++) = val;
+
+ v += LARGE_PAGE_SIZE_16M;
+ p += LARGE_PAGE_SIZE_16M;
+ s += LARGE_PAGE_SIZE_16M;
+ }
+
+ while (s <= (total_lowmem - LARGE_PAGE_SIZE_4M)) {
+ pmd_t *pmdp;
+ unsigned long val = p | _PMD_SIZE_4M | _PAGE_HWEXEC | _PAGE_HWWRITE;
+
+ pmdp = pmd_offset(pgd_offset_k(v), v);
+ pmd_val(*pmdp) = val;
+
+ v += LARGE_PAGE_SIZE_4M;
+ p += LARGE_PAGE_SIZE_4M;
+ s += LARGE_PAGE_SIZE_4M;
+ }
+
+ return s;
+}
--- linux-2.6.orig/arch/powerpc/mm/4xx_mmu.c
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * This file contains the routines for initializing the MMU
- * on the 4xx series of chips.
- * -- paulus
- *
- * Derived from arch/ppc/mm/init.c:
- * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
- * and Cort Dougan (PReP) (cort@cs.nmt.edu)
- * Copyright (C) 1996 Paul Mackerras
- *
- * Derived from "arch/i386/mm/init.c"
- * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/stddef.h>
-#include <linux/vmalloc.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/highmem.h>
-
-#include <asm/pgalloc.h>
-#include <asm/prom.h>
-#include <asm/io.h>
-#include <asm/mmu_context.h>
-#include <asm/pgtable.h>
-#include <asm/mmu.h>
-#include <asm/uaccess.h>
-#include <asm/smp.h>
-#include <asm/bootx.h>
-#include <asm/machdep.h>
-#include <asm/setup.h>
-#include "mmu_decl.h"
-
-extern int __map_without_ltlbs;
-/*
- * MMU_init_hw does the chip-specific initialization of the MMU hardware.
- */
-void __init MMU_init_hw(void)
-{
- /*
- * The Zone Protection Register (ZPR) defines how protection will
- * be applied to every page which is a member of a given zone. At
- * present, we utilize only two of the 4xx's zones.
- * The zone index bits (of ZSEL) in the PTE are used for software
- * indicators, except the LSB. For user access, zone 1 is used,
- * for kernel access, zone 0 is used. We set all but zone 1
- * to zero, allowing only kernel access as indicated in the PTE.
- * For zone 1, we set a 01 binary (a value of 10 will not work)
- * to allow user access as indicated in the PTE. This also allows
- * kernel access as indicated in the PTE.
- */
-
- mtspr(SPRN_ZPR, 0x10000000);
-
- flush_instruction_cache();
-
- /*
- * Set up the real-mode cache parameters for the exception vector
- * handlers (which are run in real-mode).
- */
-
- mtspr(SPRN_DCWR, 0x00000000); /* All caching is write-back */
-
- /*
- * Cache instruction and data space where the exception
- * vectors and the kernel live in real-mode.
- */
-
- mtspr(SPRN_DCCR, 0xF0000000); /* 512 MB of data space at 0x0. */
- mtspr(SPRN_ICCR, 0xF0000000); /* 512 MB of instr. space at 0x0. */
-}
-
-#define LARGE_PAGE_SIZE_16M (1<<24)
-#define LARGE_PAGE_SIZE_4M (1<<22)
-
-unsigned long __init mmu_mapin_ram(void)
-{
- unsigned long v, s;
- phys_addr_t p;
-
- v = KERNELBASE;
- p = PPC_MEMSTART;
- s = 0;
-
- if (__map_without_ltlbs) {
- return s;
- }
-
- while (s <= (total_lowmem - LARGE_PAGE_SIZE_16M)) {
- pmd_t *pmdp;
- unsigned long val = p | _PMD_SIZE_16M | _PAGE_HWEXEC | _PAGE_HWWRITE;
-
- pmdp = pmd_offset(pgd_offset_k(v), v);
- pmd_val(*pmdp++) = val;
- pmd_val(*pmdp++) = val;
- pmd_val(*pmdp++) = val;
- pmd_val(*pmdp++) = val;
-
- v += LARGE_PAGE_SIZE_16M;
- p += LARGE_PAGE_SIZE_16M;
- s += LARGE_PAGE_SIZE_16M;
- }
-
- while (s <= (total_lowmem - LARGE_PAGE_SIZE_4M)) {
- pmd_t *pmdp;
- unsigned long val = p | _PMD_SIZE_4M | _PAGE_HWEXEC | _PAGE_HWWRITE;
-
- pmdp = pmd_offset(pgd_offset_k(v), v);
- pmd_val(*pmdp) = val;
-
- v += LARGE_PAGE_SIZE_4M;
- p += LARGE_PAGE_SIZE_4M;
- s += LARGE_PAGE_SIZE_4M;
- }
-
- return s;
-}
--- linux-2.6.orig/arch/powerpc/mm/Makefile
+++ linux-2.6/arch/powerpc/mm/Makefile
@@ -13,7 +13,7 @@ obj-$(CONFIG_PPC64) += init_64.o pgtabl
hash_utils_64.o hash_low_64.o tlb_64.o \
slb_low.o slb.o stab.o mmap.o $(hash-y)
obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o tlb_32.o
-obj-$(CONFIG_40x) += 4xx_mmu.o
+obj-$(CONFIG_40x) += 40x_mmu.o
obj-$(CONFIG_44x) += 44x_mmu.o
obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o
obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
--- /dev/null
+++ linux-2.6/arch/powerpc/platforms/40x/Kconfig
@@ -0,0 +1,208 @@
+config 4xx
+ bool
+ depends on 40x || 44x
+ default y
+
+config BOOKE
+ bool
+ depends on 44x
+ default y
+
+menu "AMCC 40x options"
+ depends on 40x
+
+#config BUBINGA
+# bool "Bubinga"
+# depends on 40x
+# default n
+# select 405EP
+# help
+# This option enables support for the IBM 405EP evaluation board.
+
+#config CPCI405
+# bool "CPCI405"
+# depends on 40x
+# default n
+# select 405GP
+# help
+# This option enables support for the CPCI405 board.
+
+#config EP405
+# bool "EP405/EP405PC"
+# depends on 40x
+# default n
+# select 405GP
+# help
+# This option enables support for the EP405/EP405PC boards.
+
+#config EP405PC
+# bool "EP405PC Support"
+# depends on EP405
+# default y
+# help
+# This option enables support for the extra features of the EP405PC board.
+
+#config REDWOOD_5
+# bool "Redwood-5"
+# depends on 40x
+# default n
+# select STB03xxx
+# help
+# This option enables support for the IBM STB04 evaluation board.
+
+#config REDWOOD_6
+# bool "Redwood-6"
+# depends on 40x
+# default n
+# select STB03xxx
+# help
+# This option enables support for the IBM STBx25xx evaluation board.
+
+#config SYCAMORE
+# bool "Sycamore"
+# depends on 40x
+# default n
+# select 405GPR
+# help
+# This option enables support for the IBM PPC405GPr evaluation board.
+
+#config WALNUT
+# bool "Walnut"
+# depends on 40x
+# default y
+# select 405GP
+# help
+# This option enables support for the IBM PPC405GP evaluation board.
+
+#config XILINX_ML300
+# bool "Xilinx-ML300"
+# depends on 40x
+# default y
+# select VIRTEX_II_PRO
+# help
+# This option enables support for the Xilinx ML300 evaluation board.
+
+endmenu
+
+# 40x specific CPU modules, selected based on the board above.
+config NP405H
+ bool
+ #depends on ASH
+
+# OAK doesn't exist but wanted to keep this around for any future 403GCX boards
+config 403GCX
+ bool
+ #depends on OAK
+ select IBM405_ERR51
+
+config 405GP
+ bool
+ select IBM405_ERR77
+ select IBM405_ERR51
+
+config 405EP
+ bool
+
+config 405GPR
+ bool
+
+config VIRTEX_II_PRO
+ bool
+ select IBM405_ERR77
+ select IBM405_ERR51
+
+config STB03xxx
+ bool
+ select IBM405_ERR77
+ select IBM405_ERR51
+
+# 40x errata/workaround config symbols, selected by the CPU models above
+
+# All 405-based cores up until the 405GPR and 405EP have this errata.
+config IBM405_ERR77
+ bool
+
+# All 40x-based cores, up until the 405GPR and 405EP have this errata.
+config IBM405_ERR51
+ bool
+
+menu "AMCC 44x options"
+ depends on 44x
+
+#config BAMBOO
+# bool "Bamboo"
+# depends on 44x
+# default n
+# select 440EP
+# help
+# This option enables support for the IBM PPC440EP evaluation board.
+
+config EBONY
+ bool "Ebony"
+ depends on 44x
+ default y
+ select 440GP
+ help
+ This option enables support for the IBM PPC440GP evaluation board.
+
+#config LUAN
+# bool "Luan"
+# depends on 44x
+# default n
+# select 440SP
+# help
+# This option enables support for the IBM PPC440SP evaluation board.
+
+#config OCOTEA
+# bool "Ocotea"
+# depends on 44x
+# default n
+# select 440GX
+# help
+# This option enables support for the IBM PPC440GX evaluation board.
+
+endmenu
+
+# 44x specific CPU modules, selected based on the board above.
+config 440EP
+ bool
+ select PPC_FPU
+ select IBM440EP_ERR42
+
+config 440GP
+ bool
+ select IBM_NEW_EMAC_ZMII
+
+config 440GX
+ bool
+
+config 440SP
+ bool
+
+config 440A
+ bool
+ depends on 440GX
+ default y
+
+# 44x errata/workaround config symbols, selected by the CPU models above
+config IBM440EP_ERR42
+ bool
+
+#config XILINX_OCP
+# bool
+# depends on XILINX_ML300
+# default y
+
+#config BIOS_FIXUP
+# bool
+# depends on BUBINGA || EP405 || SYCAMORE || WALNUT
+# default y
+
+#config PPC4xx_DMA
+# bool "PPC4xx DMA controller support"
+# depends on 4xx
+
+#config PPC4xx_EDMA
+# bool
+# depends on !STB03xxx && PPC4xx_DMA
+# default y
--- /dev/null
+++ linux-2.6/arch/powerpc/platforms/40x/Makefile
@@ -0,0 +1 @@
+# empty makefile so make clean works
\ No newline at end of file
--- linux-2.6.orig/arch/powerpc/platforms/4xx/Kconfig
+++ /dev/null
@@ -1,208 +0,0 @@
-config 4xx
- bool
- depends on 40x || 44x
- default y
-
-config BOOKE
- bool
- depends on 44x
- default y
-
-menu "AMCC 40x options"
- depends on 40x
-
-#config BUBINGA
-# bool "Bubinga"
-# depends on 40x
-# default n
-# select 405EP
-# help
-# This option enables support for the IBM 405EP evaluation board.
-
-#config CPCI405
-# bool "CPCI405"
-# depends on 40x
-# default n
-# select 405GP
-# help
-# This option enables support for the CPCI405 board.
-
-#config EP405
-# bool "EP405/EP405PC"
-# depends on 40x
-# default n
-# select 405GP
-# help
-# This option enables support for the EP405/EP405PC boards.
-
-#config EP405PC
-# bool "EP405PC Support"
-# depends on EP405
-# default y
-# help
-# This option enables support for the extra features of the EP405PC board.
-
-#config REDWOOD_5
-# bool "Redwood-5"
-# depends on 40x
-# default n
-# select STB03xxx
-# help
-# This option enables support for the IBM STB04 evaluation board.
-
-#config REDWOOD_6
-# bool "Redwood-6"
-# depends on 40x
-# default n
-# select STB03xxx
-# help
-# This option enables support for the IBM STBx25xx evaluation board.
-
-#config SYCAMORE
-# bool "Sycamore"
-# depends on 40x
-# default n
-# select 405GPR
-# help
-# This option enables support for the IBM PPC405GPr evaluation board.
-
-#config WALNUT
-# bool "Walnut"
-# depends on 40x
-# default y
-# select 405GP
-# help
-# This option enables support for the IBM PPC405GP evaluation board.
-
-#config XILINX_ML300
-# bool "Xilinx-ML300"
-# depends on 40x
-# default y
-# select VIRTEX_II_PRO
-# help
-# This option enables support for the Xilinx ML300 evaluation board.
-
-endmenu
-
-# 40x specific CPU modules, selected based on the board above.
-config NP405H
- bool
- #depends on ASH
-
-# OAK doesn't exist but wanted to keep this around for any future 403GCX boards
-config 403GCX
- bool
- #depends on OAK
- select IBM405_ERR51
-
-config 405GP
- bool
- select IBM405_ERR77
- select IBM405_ERR51
-
-config 405EP
- bool
-
-config 405GPR
- bool
-
-config VIRTEX_II_PRO
- bool
- select IBM405_ERR77
- select IBM405_ERR51
-
-config STB03xxx
- bool
- select IBM405_ERR77
- select IBM405_ERR51
-
-# 40x errata/workaround config symbols, selected by the CPU models above
-
-# All 405-based cores up until the 405GPR and 405EP have this errata.
-config IBM405_ERR77
- bool
-
-# All 40x-based cores, up until the 405GPR and 405EP have this errata.
-config IBM405_ERR51
- bool
-
-menu "AMCC 44x options"
- depends on 44x
-
-#config BAMBOO
-# bool "Bamboo"
-# depends on 44x
-# default n
-# select 440EP
-# help
-# This option enables support for the IBM PPC440EP evaluation board.
-
-config EBONY
- bool "Ebony"
- depends on 44x
- default y
- select 440GP
- help
- This option enables support for the IBM PPC440GP evaluation board.
-
-#config LUAN
-# bool "Luan"
-# depends on 44x
-# default n
-# select 440SP
-# help
-# This option enables support for the IBM PPC440SP evaluation board.
-
-#config OCOTEA
-# bool "Ocotea"
-# depends on 44x
-# default n
-# select 440GX
-# help
-# This option enables support for the IBM PPC440GX evaluation board.
-
-endmenu
-
-# 44x specific CPU modules, selected based on the board above.
-config 440EP
- bool
- select PPC_FPU
- select IBM440EP_ERR42
-
-config 440GP
- bool
- select IBM_NEW_EMAC_ZMII
-
-config 440GX
- bool
-
-config 440SP
- bool
-
-config 440A
- bool
- depends on 440GX
- default y
-
-# 44x errata/workaround config symbols, selected by the CPU models above
-config IBM440EP_ERR42
- bool
-
-#config XILINX_OCP
-# bool
-# depends on XILINX_ML300
-# default y
-
-#config BIOS_FIXUP
-# bool
-# depends on BUBINGA || EP405 || SYCAMORE || WALNUT
-# default y
-
-#config PPC4xx_DMA
-# bool "PPC4xx DMA controller support"
-# depends on 4xx
-
-#config PPC4xx_EDMA
-# bool
-# depends on !STB03xxx && PPC4xx_DMA
-# default y
--- linux-2.6.orig/arch/powerpc/platforms/4xx/Makefile
+++ /dev/null
@@ -1 +0,0 @@
-# empty makefile so make clean works
\ No newline at end of file
--
^ permalink raw reply
* [patch 00/10] 4xx patch series for 2.6.24
From: Josh Boyer @ 2007-08-03 16:09 UTC (permalink / raw)
To: paulus; +Cc: linuxppc-dev
Here are the current 4xx patches I'd like to get in to 2.6.24. They include
some initial cleanups for 40x support as well as the AMCC Bamboo board support.
If we can get Bamboo merged, that will make it much easier for Sequoia (440EPx)
and Rainer (440GRx) to follow on.
Also, I've started a git tree for 4xx work. It contains these patches on the
for-2.6.24 branch. If you do
git pull \
git://git.infradead.org/users/jwboyer/powerpc.git for-2.6.24
You should get this entire series. It is currently based off of Paul's latest
tree.
josh
--
^ permalink raw reply
* Re: [PATCH 2/6] PowerPC 440EPx: Sequoia DTS
From: Sergei Shtylyov @ 2007-08-03 16:29 UTC (permalink / raw)
To: David Gibson; +Cc: linuxppc-dev
In-Reply-To: <20070801054751.GM31391@localhost.localdomain>
David Gibson wrote:
> Duh, forgot to attach the actual patch. Here it is:
> Index: working-2.6/Documentation/powerpc/booting-without-of.txt
> ===================================================================
> --- working-2.6.orig/Documentation/powerpc/booting-without-of.txt 2007-07-30 17:07:14.000000000 +1000
> +++ working-2.6/Documentation/powerpc/booting-without-of.txt 2007-07-30 17:07:14.000000000 +1000
> @@ -1757,45 +1757,23 @@ platforms are moved over to use the flat
> };
> };
>
> - j) Flash chip nodes
> + j) CFI or JEDEC memory-mapped NOR flash
>
> Flash chips (Memory Technology Devices) are often used for solid state
> file systems on embedded devices.
>
> - Required properties:
> + - compatible : should contain the specific model of flash chip(s) used
> + followed by either "cfi-flash" or "jedec-flash"
Duh, have nearly forgotten to complain about "-flash" suffix. Isn't it
superfluous?
WBR, Sergei
^ permalink raw reply
* Re: How can I boot powerpc kernel?
From: Sergei Shtylyov @ 2007-08-03 15:52 UTC (permalink / raw)
To: ssaravanan; +Cc: linuxppc-embedded
In-Reply-To: <380-22007853151115543@M2W019.mail2web.com>
Hello.
ssaravanan@arasor.in wrote:
> i am new to powerpc architecture.... i have a question that How i will
> boot powerpc kernel....? currently i am using MPC8555E network processor..
> using uboot and montavista kernel, using this i have tried to boot the
> kernel. After verification of images my board is hangs for ever....How can
> i solve this? Is that PowerPC required flat tree for booting linux kernel?
Yes, you must supply the tree. If you're using U-Boot, the following
sequence should boot you Linux:
=> tftpboot 800000 uImage
=> tftpboot e00000 mpc8555e.dtb
=> bootm 800000 - e00000
> thanks.
> Regards,
> Saravanan.S
WBR, Sergei
^ permalink raw reply
* How can I boot powerpc kernel?
From: ssaravanan @ 2007-08-03 15:11 UTC (permalink / raw)
To: linuxppc-embedded
hi all,
i am new to powerpc architecture=2E=2E=2E=2E i have a question that How=
i will
boot powerpc kernel=2E=2E=2E=2E=3F currently i am using MPC8555E network p=
rocessor=2E=2E
using uboot and montavista kernel, using this i have tried to boot the
kernel=2E After verification of images my board is hangs for ever=2E=2E=2E=
=2EHow can
i solve this=3F Is that PowerPC required flat tree for booting linux kerne=
l=3F
thanks=2E
Regards,
Saravanan=2ES
--------------------------------------------------------------------
mail2web=2Ecom - Microsoft=AE Exchange solutions from a leading provider -=
http://link=2Email2web=2Ecom/Business/Exchange
^ permalink raw reply
* Re: [PATCH 2/6] PowerPC 440EPx: Sequoia DTS
From: Sergei Shtylyov @ 2007-08-03 15:47 UTC (permalink / raw)
To: David Gibson; +Cc: linuxppc-dev
In-Reply-To: <20070803031349.GD6418@localhost.localdomain>
Hello.
David Gibson wrote:
>>>Index: working-2.6/Documentation/powerpc/booting-without-of.txt
>>>===================================================================
>>>--- working-2.6.orig/Documentation/powerpc/booting-without-of.txt 2007-07-30 17:07:14.000000000 +1000
>>>+++ working-2.6/Documentation/powerpc/booting-without-of.txt 2007-07-30 17:07:14.000000000 +1000
>>>@@ -1757,45 +1757,23 @@ platforms are moved over to use the flat
>>> };
>>> };
>>>
>>>- j) Flash chip nodes
>>>+ j) CFI or JEDEC memory-mapped NOR flash
>>>
>>> Flash chips (Memory Technology Devices) are often used for solid state
>>> file systems on embedded devices.
>>>- Required properties:
>>>+ - compatible : should contain the specific model of flash chip(s) used
>>>+ followed by either "cfi-flash" or "jedec-flash"
>> This "compatible" prop (and the node in whole) doesn't say a
>>thing about how the flash is mapped into the CPU address space. I
>>strongly disagree that this node provides enough info to select a
>>driver. :-/
> To be honest, I'm not sure that describing the mapping is really the
> job of the compatible property. That the flash is mapped into the
> address space is kind of implicit in the way the reg interacts with
> the parents' ranges property.
Ah, I keep forgetting about implied 1:1 parent/child address
correspondence... :-<
But does it really imply how the (low) address bits of the *child* bus
("ebc" in this case) are connected to the chip? I don't think so...
> Can you describe some of the options for *not* direct mapped flash
> chips - I can't reasonably come up with a way of describing the
> distinction when I've never seen NOR flash other than direct mapped.
You're lucky to live in the single-endian world. Once you're in bi-endian
one, all kinds of strange mappings become possible. I've seen the MIPS
mapping driver which required swapping flash bytes in s/w in BE mode, i.e.
couldn't be served by physmap, yet that's not all... here's the code of its
map read*() methods:
static __u8 xxx_map_read8(struct map_info *map, unsigned long offs)
{
u16 val;
val = readw(map->map_priv_1 + (offs & ~1));
if (offs & 1)
return ((val >> 8) & 0xff);
else
return (val & 0xff);
}
static __u16 bcm947xx_map_read16(struct map_info *map, unsigned long offs)
{
return readw(map->map_priv_1 + offs);
}
static __u32 bcm947xx_map_read32(struct map_info *map, unsigned long offs)
{
return readl(map->map_priv_1 + offs);
}
... while the simple map (used by physmap) has just __raw_read*() for those?
>>>+ - reg : Address range of the flash chip
>>>+ - bank-width : Width (in bytes) of the flash bank. Equal to the device width
>>>+ times the number of interleaved chips.
>>>+ - device-width : (optional) Width of a single flash chip. If omitted,
>>>+ assumed to be equal to 'bank-width'.
>> Why then not just introduce the "interleave" prop and obsolete the
>>"bank-width"?
> Because they're equivalent information, and bank-width is what the
> code ends up actually caring about. I don't see any reason to prefer
> giving the interleave.
Well, "device-width" is not the thing that we care about either. ;-)
>>>Index: working-2.6/drivers/mtd/maps/physmap_of.c
>>>===================================================================
>>>--- working-2.6.orig/drivers/mtd/maps/physmap_of.c 2007-07-30 17:07:11.000000000 +1000
>>>+++ working-2.6/drivers/mtd/maps/physmap_of.c 2007-07-30 17:07:14.000000000 +1000
[...]
>>>+ for (pp = dp->child, i = 0 ; pp; pp = pp->sibling, i++) {
>>>+ const u32 *reg;
>>>+ const char *name;
>>>+ const void *ro;
>>
>> We hardly need the above 2 variables.
> They're all used...
I meant that there's no necessity in them.
[...]
>> Oh, I see that the new partition representation have really simplified
>>parsing -- this function is hardly shorter than the old one... :-P
> They new format isn't supposed to be simpler to parse: it's supposed
> to be more flexible if we ever need to add more per-partition
> information than just the offset / size / read-only.
Well, if we ever need that indeed... :-)
[...]
>>>@@ -221,6 +297,14 @@ err_out:
>>>
>>> static struct of_device_id of_physmap_match[] = {
>>> {
>>>+ .compatible = "cfi-flash",
>>>+ .data = (void *)"cfi_probe",
>>>+ },
>>>+ {
>>>+ .compatible = "jedec-flash",
>>>+ .data = (void *)"jedec_probe",
>>>+ },
>>>+ {
>> This would also trigger on non-linearly mapped CFI or JEDEC
>>flashes which is not a good idea -- however, as we're using the MTD
>>probing code anyway, it will just fail, so it's not luckily is not a
>>fatal design flaw.
> Well, if there's nothing else to distinguish them. Which there isn't
> yet, but will need to be: see above about incomplete - helpful
> suggestions about how to describe the mapping would be more useful
> than merely pointing out the lack.
I was thinking about adding "direct-mapped" prop... but maybe adding
"ranges" to the parent node (that's "ebc") would indeed ensure that the flash
is mapped 1:1 to the EBC's parent bus also.
>>>Index: working-2.6/arch/powerpc/boot/dts/ebony.dts
>>>===================================================================
>>>--- working-2.6.orig/arch/powerpc/boot/dts/ebony.dts 2007-07-30 17:07:14.000000000 +1000
>>>+++ working-2.6/arch/powerpc/boot/dts/ebony.dts 2007-07-30 17:07:14.000000000 +1000
>>[...]
>>>@@ -158,14 +161,20 @@
>>> };
>>> large-flash@2,0 {
>> Hmm... what does @2,0 mean? :-O
> EBC chip select 2, offset 0.
Well, so this node is under some kind of local bus node -- that's good.
Didn't know that the spec allows commas after @...
>>>- device_type = "rom";
>>>- compatible = "direct-mapped";
>>>- probe-type = "JEDEC";
>>>+ compatible = "jedec-flash";
>>> bank-width = <1>;
>>>- partitions = <0 380000
>>>- 380000 80000>;
>>>- partition-names = "fs", "firmware";
>>>+// partitions = <0 380000
>>>+// 380000 80000>;
>>>+// partition-names = "fs", "firmware";
>>> reg = <2 0 400000>;
>>>+ #address-cells = <1>;
>>>+ #size-cells = <1>;
>> Heh...
> Yeah, that bit's a bit ugly, I'll grant you.
Don't we need "ranges" here, at least from the formal PoV -- as the parent
and child address spaces differ? I know the physmap_of parser doesn't care but
still...
>>>+ };
>>> };
>> So, I don't see what we're really winning with your changes...
> "direct-mapped" is simply not a sufficiently specific compatible
> property, no two ways about it.
Yes, for example "direct-mapped-cfi" and "direct-mapped-jedec" would have
been better...
> This spec still needs more specific
> description of some properties, at least for JEDEC flashes.
Yes, of course...
WBR, Sergei
^ permalink raw reply
* Re: [PATCH 3/3] First cut at PReP support for arch/powerpc
From: Jon Loeliger @ 2007-08-03 15:24 UTC (permalink / raw)
To: David Gibson; +Cc: linuxppc-dev@ozlabs.org, Paul Mackerras
In-Reply-To: <20070803063532.GB14456@localhost.localdomain>
On Fri, 2007-08-03 at 01:35, David Gibson wrote:
> > > + PIC8259: interrupt-controller {
> > > + device_type = "i8259";
> > >
> > > device_type = "interrupt-controller".
>
> Is that really right? The MPIC binding, at least, has device_type =
> "open-pic" rather than "interrupt-controller".
>
> > > + compatible = "prep,iic";
> > > + reg = < 00000001 00000020 00000002
> > > + 00000001 000000a0 00000002
> > > + 00000001 000004d0 00000002>;
> > > + interrupts = <00000000 00000003
> > > + 00000002 00000003>;
> > > + interrupt-parent = <&MPIC>;
> > > + };
> > > + };
> > > +
> > > + MPIC: interrupt-controller@d {
> > > + device_type = "open-pic";
> > >
> > > device_type = "interrupt-controller".
>
> Not according to the binding in booting-without-of.txt
My understanding here, though possibly flawed, is that the current
implementation has "open-pic" but _should_ have "interrupt-controller"
as that is the officially correct name.
I _think_ this means we need a transitional period where we update
the code to look for "interrupt-controller", and obsoletedly, looks
for the "open-pic", while we transition to the new, correct name.
I'm just betting that if I'm wrong Segher will tell me! :-)
jdl
^ permalink raw reply
* pci in arch/powerpc vs arch/ppc
From: Alexandros Kostopoulos @ 2007-08-03 14:58 UTC (permalink / raw)
To: linuxppc-dev
Hi all,
in the old arch/ppc tree, there was a function called pq2ads_setup_pci()
that set up PCI regs for 8272xx, in m82xx_pci.c. I was wandering, where
are these registers configured now in arch/powerpc? I can't seem to find
these code now.
Also, I can see that now bus 0, dev 0 (which I think represents the host
bridge, right?) is now excluded using pq2_pci_exclude_device, but it
wasn't in older code. Why is that?
thank you in advance...
^ permalink raw reply
* Re: [PATCH 1/1] lro: Generic Large Receive Offload for TCP traffic
From: Jörn Engel @ 2007-08-03 13:41 UTC (permalink / raw)
To: Jan-Bernd Themann
Cc: Thomas Klein, Jeff Garzik, Jan-Bernd Themann, netdev,
linux-kernel, linux-ppc, Christoph Raisch, Marcus Eder,
Andrew Gallatin, Stefan Roscher, David Miller
In-Reply-To: <200708031441.20632.ossthema@de.ibm.com>
On Fri, 3 August 2007 14:41:19 +0200, Jan-Bernd Themann wrote:
>
> This patch provides generic Large Receive Offload (LRO) functionality
> for IPv4/TCP traffic.
>
> LRO combines received tcp packets to a single larger tcp packet and
> passes them then to the network stack in order to increase performance
> (throughput). The interface supports two modes: Drivers can either pass
> SKBs or fragment lists to the LRO engine.
Maybe this is a stupid question, but why is LRO done at the device
driver level?
If it is a unversal performance benefit, I would have expected it to be
done generically, i.e. have all packets moved into network layer pass
through LRO instead.
> +void lro_flush_pkt(struct net_lro_mgr *lro_mgr,
> + struct iphdr *iph, struct tcphdr *tcph);
In particular this bit looks like it should be driven by a timeout,
which would be settable via /proc/sys/net/core/lro_timeout or similar.
Jörn
--
Rules of Optimization:
Rule 1: Don't do it.
Rule 2 (for experts only): Don't do it yet.
-- M.A. Jackson
^ permalink raw reply
* [PATCH] lro: eHEA example how to use LRO
From: Jan-Bernd Themann @ 2007-08-03 12:41 UTC (permalink / raw)
To: netdev
Cc: Thomas Klein, Jeff Garzik, Jan-Bernd Themann, linux-kernel,
linux-ppc, Christoph Raisch, Marcus Eder, Andrew Gallatin,
Stefan Roscher, David Miller
This patch shows how the generic LRO interface is used for SKB mode
Signed-off-by: Jan-Bernd Themann <themann@de.ibm.com>
---
drivers/net/Kconfig | 1 +
drivers/net/ehea/ehea.h | 9 ++++-
drivers/net/ehea/ehea_ethtool.c | 15 +++++++
drivers/net/ehea/ehea_main.c | 84 +++++++++++++++++++++++++++++++++++---
4 files changed, 101 insertions(+), 8 deletions(-)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index f8a602c..fec4004 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2399,6 +2399,7 @@ config CHELSIO_T3
config EHEA
tristate "eHEA Ethernet support"
depends on IBMEBUS
+ select INET_LRO
---help---
This driver supports the IBM pSeries eHEA ethernet adapter.
diff --git a/drivers/net/ehea/ehea.h b/drivers/net/ehea/ehea.h
index d67f97b..70e33fe 100644
--- a/drivers/net/ehea/ehea.h
+++ b/drivers/net/ehea/ehea.h
@@ -33,13 +33,14 @@
#include <linux/ethtool.h>
#include <linux/vmalloc.h>
#include <linux/if_vlan.h>
+#include <linux/inet_lro.h>
#include <asm/ibmebus.h>
#include <asm/abs_addr.h>
#include <asm/io.h>
#define DRV_NAME "ehea"
-#define DRV_VERSION "EHEA_0073"
+#define DRV_VERSION "EHEA_0074"
/* eHEA capability flags */
#define DLPAR_PORT_ADD_REM 1
@@ -58,6 +59,7 @@
#define EHEA_SMALL_QUEUES
#define EHEA_NUM_TX_QP 1
+#define EHEA_LRO_MAX_AGGR 64
#ifdef EHEA_SMALL_QUEUES
#define EHEA_MAX_CQE_COUNT 1023
@@ -84,6 +86,8 @@
#define EHEA_RQ2_PKT_SIZE 1522
#define EHEA_L_PKT_SIZE 256 /* low latency */
+#define MAX_LRO_DESCRIPTORS 8
+
/* Send completion signaling */
/* Protection Domain Identifier */
@@ -376,6 +380,8 @@ struct ehea_port_res {
u64 tx_packets;
u64 rx_packets;
u32 poll_counter;
+ struct net_lro_mgr lro_mgr;
+ struct net_lro_desc lro_desc[MAX_LRO_DESCRIPTORS];
};
@@ -427,6 +433,7 @@ struct ehea_port {
u32 msg_enable;
u32 sig_comp_iv;
u32 state;
+ u32 lro_max_aggr;
u8 full_duplex;
u8 autoneg;
u8 num_def_qps;
diff --git a/drivers/net/ehea/ehea_ethtool.c b/drivers/net/ehea/ehea_ethtool.c
index decec8c..29ef7a9 100644
--- a/drivers/net/ehea/ehea_ethtool.c
+++ b/drivers/net/ehea/ehea_ethtool.c
@@ -183,6 +183,9 @@ static char ehea_ethtool_stats_keys[][ETH_GSTRING_LEN] = {
{"PR5 free_swqes"},
{"PR6 free_swqes"},
{"PR7 free_swqes"},
+ {"LRO aggregated"},
+ {"LRO flushed"},
+ {"LRO no_desc"},
};
static void ehea_get_strings(struct net_device *dev, u32 stringset, u8 *data)
@@ -239,6 +242,18 @@ static void ehea_get_ethtool_stats(struct net_device *dev,
for (k = 0; k < 8; k++)
data[i++] = atomic_read(&port->port_res[k].swqe_avail);
+ for (k = 0, tmp = 0; k < EHEA_MAX_PORT_RES; k++)
+ tmp |= port->port_res[k].lro_mgr.stats.aggregated;
+ data[i++] = tmp;
+
+ for (k = 0, tmp = 0; k < EHEA_MAX_PORT_RES; k++)
+ tmp |= port->port_res[k].lro_mgr.stats.flushed;
+ data[i++] = tmp;
+
+ for (k = 0, tmp = 0; k < EHEA_MAX_PORT_RES; k++)
+ tmp |= port->port_res[k].lro_mgr.stats.no_desc;
+ data[i++] = tmp;
+
}
const struct ethtool_ops ehea_ethtool_ops = {
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index 9756211..fbaa395 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -52,6 +52,8 @@ static int rq2_entries = EHEA_DEF_ENTRIES_RQ2;
static int rq3_entries = EHEA_DEF_ENTRIES_RQ3;
static int sq_entries = EHEA_DEF_ENTRIES_SQ;
static int use_mcs = 0;
+static int use_lro = 0;
+static int lro_max_aggr = EHEA_LRO_MAX_AGGR;
static int num_tx_qps = EHEA_NUM_TX_QP;
module_param(msg_level, int, 0);
@@ -60,6 +62,8 @@ module_param(rq2_entries, int, 0);
module_param(rq3_entries, int, 0);
module_param(sq_entries, int, 0);
module_param(use_mcs, int, 0);
+module_param(use_lro, int, 0);
+module_param(lro_max_aggr, int, 0);
module_param(num_tx_qps, int, 0);
MODULE_PARM_DESC(num_tx_qps, "Number of TX-QPS");
@@ -77,6 +81,10 @@ MODULE_PARM_DESC(sq_entries, " Number of entries for the Send Queue "
"[2^x - 1], x = [6..14]. Default = "
__MODULE_STRING(EHEA_DEF_ENTRIES_SQ) ")");
MODULE_PARM_DESC(use_mcs, " 0:NAPI, 1:Multiple receive queues, Default = 1 ");
+MODULE_PARM_DESC(lro_max_aggr, " LRO: Max packets to be aggregated. Default = "
+ __MODULE_STRING(EHEA_LRO_MAX_AGGR));
+MODULE_PARM_DESC(use_lro, " Large Receive Offload, 1: enable, 0: disable, "
+ "Default = 0");
static int port_name_cnt = 0;
static LIST_HEAD(adapter_list);
@@ -389,6 +397,60 @@ static int ehea_treat_poll_error(struct ehea_port_res *pr, int rq,
return 0;
}
+static int get_skb_hdr(struct sk_buff *skb, void **iphdr,
+ void **tcph, u64 *hdr_flags, void *priv)
+{
+ struct ehea_cqe *cqe = priv;
+ unsigned int ip_len;
+ struct iphdr *iph;
+
+ /* non tcp/udp packets */
+ if (!cqe->header_length)
+ return -1;
+
+ /* non tcp packet */
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
+ if (iph->protocol != IPPROTO_TCP)
+ return -1;
+
+ ip_len = ip_hdrlen(skb);
+ skb_set_transport_header(skb, ip_len);
+ *tcph = tcp_hdr(skb);
+
+ /* check if ip header and tcp header are complete */
+ if (iph->tot_len < ip_len + tcp_hdrlen(skb))
+ return -1;
+
+ *hdr_flags = LRO_IPV4 | LRO_TCP;
+ *iphdr = iph;
+
+ return 0;
+}
+
+static void ehea_proc_skb(struct ehea_port_res *pr, struct ehea_cqe *cqe,
+ struct sk_buff *skb)
+{
+ int vlan_extracted = (cqe->status & EHEA_CQE_VLAN_TAG_XTRACT)
+ && pr->port->vgrp;
+
+ if (use_lro) {
+ if (vlan_extracted)
+ lro_vlan_hwaccel_receive_skb(&pr->lro_mgr, skb,
+ pr->port->vgrp,
+ cqe->vlan_tag,
+ cqe);
+ else
+ lro_receive_skb(&pr->lro_mgr, skb, cqe);
+ } else {
+ if (vlan_extracted)
+ vlan_hwaccel_receive_skb(skb, pr->port->vgrp,
+ cqe->vlan_tag);
+ else
+ netif_receive_skb(skb);
+ }
+}
+
static struct ehea_cqe *ehea_proc_rwqes(struct net_device *dev,
struct ehea_port_res *pr,
int *budget)
@@ -460,13 +522,7 @@ static struct ehea_cqe *ehea_proc_rwqes(struct net_device *dev,
processed_rq3++;
}
- if ((cqe->status & EHEA_CQE_VLAN_TAG_XTRACT)
- && port->vgrp)
- vlan_hwaccel_receive_skb(skb, port->vgrp,
- cqe->vlan_tag);
- else
- netif_receive_skb(skb);
-
+ ehea_proc_skb(pr, cqe, skb);
dev->last_rx = jiffies;
} else {
pr->p_stats.poll_receive_errors++;
@@ -478,6 +534,8 @@ static struct ehea_cqe *ehea_proc_rwqes(struct net_device *dev,
}
cqe = ehea_poll_rq1(qp, &wqe_index);
}
+ if (use_lro)
+ lro_flush_all(&pr->lro_mgr);
pr->rx_packets += processed;
*budget -= processed;
@@ -1233,6 +1291,15 @@ static int ehea_init_port_res(struct ehea_port *port, struct ehea_port_res *pr,
set_bit(__LINK_STATE_START, &pr->d_netdev->state);
strcpy(pr->d_netdev->name, port->netdev->name);
+ pr->lro_mgr.max_aggr = pr->port->lro_max_aggr;
+ pr->lro_mgr.max_desc = MAX_LRO_DESCRIPTORS;
+ pr->lro_mgr.lro_arr = pr->lro_desc;
+ pr->lro_mgr.get_skb_header = get_skb_hdr;
+ pr->lro_mgr.features = LRO_F_NAPI | LRO_F_EXTRACT_VLAN_ID;
+ pr->lro_mgr.dev = port->netdev;
+ pr->lro_mgr.ip_summed = CHECKSUM_UNNECESSARY;
+ pr->lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY;
+
ret = 0;
goto out;
@@ -1712,6 +1779,7 @@ static int ehea_change_mtu(struct net_device *dev, int new_mtu)
if ((new_mtu < 68) || (new_mtu > EHEA_MAX_PACKET_SIZE))
return -EINVAL;
dev->mtu = new_mtu;
+
return 0;
}
@@ -2669,6 +2737,8 @@ struct ehea_port *ehea_setup_single_port(struct ehea_adapter *adapter,
goto out_dereg_bc;
}
+ port->lro_max_aggr = lro_max_aggr;
+
ret = ehea_get_jumboframe_status(port, &jumbo);
if (ret)
ehea_error("failed determining jumbo frame status for %s",
--
1.5.2
^ permalink raw reply related
* [PATCH 1/1] lro: Generic Large Receive Offload for TCP traffic
From: Jan-Bernd Themann @ 2007-08-03 12:41 UTC (permalink / raw)
To: David Miller
Cc: Thomas Klein, Jeff Garzik, Jan-Bernd Themann, netdev,
linux-kernel, linux-ppc, Christoph Raisch, Marcus Eder,
Andrew Gallatin, Stefan Roscher
This patch provides generic Large Receive Offload (LRO) functionality
for IPv4/TCP traffic.
LRO combines received tcp packets to a single larger tcp packet and
passes them then to the network stack in order to increase performance
(throughput). The interface supports two modes: Drivers can either pass
SKBs or fragment lists to the LRO engine.
Signed-off-by: Jan-Bernd Themann <themann@de.ibm.com>
---
include/linux/inet_lro.h | 177 ++++++++++++++
net/ipv4/Kconfig | 8 +
net/ipv4/Makefile | 1 +
net/ipv4/inet_lro.c | 600 ++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 786 insertions(+), 0 deletions(-)
create mode 100644 include/linux/inet_lro.h
create mode 100644 net/ipv4/inet_lro.c
diff --git a/include/linux/inet_lro.h b/include/linux/inet_lro.h
new file mode 100644
index 0000000..e1fc1d1
--- /dev/null
+++ b/include/linux/inet_lro.h
@@ -0,0 +1,177 @@
+/*
+ * linux/include/linux/inet_lro.h
+ *
+ * Large Receive Offload (ipv4 / tcp)
+ *
+ * (C) Copyright IBM Corp. 2007
+ *
+ * Authors:
+ * Jan-Bernd Themann <themann@de.ibm.com>
+ * Christoph Raisch <raisch@de.ibm.com>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __INET_LRO_H_
+#define __INET_LRO_H_
+
+#include <net/ip.h>
+#include <net/tcp.h>
+
+/*
+ * LRO statistics
+ */
+
+struct net_lro_stats {
+ unsigned long aggregated;
+ unsigned long flushed;
+ unsigned long no_desc;
+};
+
+/*
+ * LRO descriptor for a tcp session
+ */
+struct net_lro_desc {
+ struct sk_buff *parent;
+ struct sk_buff *last_skb;
+ struct skb_frag_struct *next_frag;
+ struct iphdr *iph;
+ struct tcphdr *tcph;
+ struct vlan_group *vgrp;
+ __wsum data_csum;
+ u32 tcp_rcv_tsecr;
+ u32 tcp_rcv_tsval;
+ u32 tcp_ack;
+ u32 tcp_next_seq;
+ u32 skb_tot_frags_len;
+ u16 ip_tot_len;
+ u16 tcp_saw_tstamp; /* timestamps enabled */
+ u16 tcp_window;
+ u16 vlan_tag;
+ int pkt_aggr_cnt; /* counts aggregated packets */
+ int vlan_packet;
+ int mss;
+ int active;
+};
+
+/*
+ * Large Receive Offload (LRO) Manager
+ *
+ * Fields must be set by driver
+ */
+
+struct net_lro_mgr {
+ struct net_device *dev;
+ struct net_lro_stats stats;
+
+ /* LRO features */
+ unsigned long features;
+#define LRO_F_NAPI 1 /* Pass packets to stack via NAPI */
+#define LRO_F_EXTRACT_VLAN_ID 2 /* Set flag if VLAN IDs are extracted
+ from received packets and eth protocol
+ is still ETH_P_8021Q */
+
+ u32 ip_summed; /* Set in non generated SKBs in page mode */
+ u32 ip_summed_aggr; /* Set in aggregated SKBs: CHECKSUM_UNNECESSARY
+ * or CHECKSUM_NONE */
+
+ int max_desc; /* Max number of LRO descriptors */
+ int max_aggr; /* Max number of LRO packets to be aggregated */
+
+ struct net_lro_desc *lro_arr; /* Array of LRO descriptors */
+
+ /*
+ * Optimized driver functions
+ *
+ * get_skb_header: returns tcp and ip header for packet in SKB
+ */
+ int (*get_skb_header)(struct sk_buff *skb, void **ip_hdr,
+ void **tcpudp_hdr, u64 *hdr_flags, void *priv);
+
+ /* hdr_flags: */
+#define LRO_IPV4 1 /* ip_hdr is IPv4 header */
+#define LRO_TCP 2 /* tcpudp_hdr is TCP header */
+
+ /*
+ * get_frag_header: returns mac, tcp and ip header for packet in SKB
+ *
+ * @hdr_flags: Indicate what kind of LRO has to be done
+ * (IPv4/IPv6/TCP/UDP)
+ */
+ int (*get_frag_header)(struct skb_frag_struct *frag, void **mac_hdr,
+ void **ip_hdr, void **tcpudp_hdr, u64 *hdr_flags,
+ void *priv);
+};
+
+/*
+ * Processes a SKB
+ *
+ * @lro_mgr: LRO manager to use
+ * @skb: SKB to aggregate
+ * @priv: Private data that may be used by driver functions
+ * (for example get_tcp_ip_hdr)
+ */
+
+void lro_receive_skb(struct net_lro_mgr *lro_mgr,
+ struct sk_buff *skb,
+ void *priv);
+
+/*
+ * Processes a SKB with VLAN HW acceleration support
+ */
+
+void lro_vlan_hwaccel_receive_skb(struct net_lro_mgr *lro_mgr,
+ struct sk_buff *skb,
+ struct vlan_group *vgrp,
+ u16 vlan_tag,
+ void *priv);
+
+/*
+ * Processes a fragment list
+ *
+ * This functions aggregate fragments and generate SKBs do pass
+ * the packets to the stack.
+ *
+ * @lro_mgr: LRO manager to use
+ * @frags: Fragment to be processed. Must contain entire header in first
+ * element.
+ * @len: Length of received data
+ * @true_size: Actual size of memory the fragment is consuming
+ * @priv: Private data that may be used by driver functions
+ * (for example get_tcp_ip_hdr)
+ */
+
+void lro_receive_frags(struct net_lro_mgr *lro_mgr,
+ struct skb_frag_struct *frags,
+ int len, int true_size, void *priv, __wsum sum);
+
+void lro_vlan_hwaccel_receive_frags(struct net_lro_mgr *lro_mgr,
+ struct skb_frag_struct *frags,
+ int len, int true_size,
+ struct vlan_group *vgrp,
+ u16 vlan_tag,
+ void *priv, __wsum sum);
+
+/*
+ * Forward all aggregated SKBs held by lro_mgr to network stack
+ */
+
+void lro_flush_all(struct net_lro_mgr *lro_mgr);
+
+void lro_flush_pkt(struct net_lro_mgr *lro_mgr,
+ struct iphdr *iph, struct tcphdr *tcph);
+
+#endif
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index fb79097..d894f61 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -394,6 +394,14 @@ config INET_XFRM_MODE_BEET
If unsure, say Y.
+config INET_LRO
+ tristate "Large Receive Offload (ipv4/tcp)"
+
+ ---help---
+ Support for Large Receive Offload (ipv4/tcp).
+
+ If unsure, say Y.
+
config INET_DIAG
tristate "INET: socket monitoring interface"
default y
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index fbf1674..a02c36d 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_INET_ESP) += esp4.o
obj-$(CONFIG_INET_IPCOMP) += ipcomp.o
obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o
obj-$(CONFIG_INET_XFRM_MODE_BEET) += xfrm4_mode_beet.o
+obj-$(CONFIG_INET_LRO) += inet_lro.o
obj-$(CONFIG_INET_TUNNEL) += tunnel4.o
obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o
obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
new file mode 100644
index 0000000..3309adc
--- /dev/null
+++ b/net/ipv4/inet_lro.c
@@ -0,0 +1,600 @@
+/*
+ * linux/net/ipv4/inet_lro.c
+ *
+ * Large Receive Offload (ipv4 / tcp)
+ *
+ * (C) Copyright IBM Corp. 2007
+ *
+ * Authors:
+ * Jan-Bernd Themann <themann@de.ibm.com>
+ * Christoph Raisch <raisch@de.ibm.com>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+#include <linux/module.h>
+#include <linux/if_vlan.h>
+#include <linux/inet_lro.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>");
+MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)");
+
+#define TCP_HDR_LEN(tcph) (tcph->doff << 2)
+#define IP_HDR_LEN(iph) (iph->ihl << 2)
+#define TCP_PAYLOAD_LENGTH(iph, tcph) \
+ (ntohs(iph->tot_len) - IP_HDR_LEN(iph) - TCP_HDR_LEN(tcph))
+
+#define IPH_LEN_WO_OPTIONS 5
+#define TCPH_LEN_WO_OPTIONS 5
+#define TCPH_LEN_W_TIMESTAMP 8
+
+#define LRO_MAX_PG_HLEN 64
+
+#define LRO_INC_STATS(lro_mgr, attr) { lro_mgr->stats.attr++; }
+
+/*
+ * Basic tcp checks whether packet is suitable for LRO
+ */
+
+static int lro_tcp_ip_check(struct iphdr *iph, struct tcphdr *tcph,
+ int len, struct net_lro_desc *lro_desc)
+{
+ /* check ip header: don't aggregate padded frames */
+ if (ntohs(iph->tot_len) != len)
+ return -1;
+
+ if (TCP_PAYLOAD_LENGTH(iph, tcph) == 0)
+ return -1;
+
+ if (iph->ihl != IPH_LEN_WO_OPTIONS)
+ return -1;
+
+ if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack
+ || tcph->rst || tcph->syn || tcph->fin)
+ return -1;
+
+ if (INET_ECN_is_ce(ipv4_get_dsfield(iph)))
+ return -1;
+
+ if (tcph->doff != TCPH_LEN_WO_OPTIONS
+ && tcph->doff != TCPH_LEN_W_TIMESTAMP)
+ return -1;
+
+ /* check tcp options (only timestamp allowed) */
+ if (tcph->doff == TCPH_LEN_W_TIMESTAMP) {
+ u32 *topt = (u32 *)(tcph + 1);
+
+ if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
+ | (TCPOPT_TIMESTAMP << 8)
+ | TCPOLEN_TIMESTAMP))
+ return -1;
+
+ /* timestamp should be in right order */
+ topt++;
+ if (lro_desc && after(ntohl(lro_desc->tcp_rcv_tsval),
+ ntohl(*topt)))
+ return -1;
+
+ /* timestamp reply should not be zero */
+ topt++;
+ if (*topt == 0)
+ return -1;
+ }
+
+ return 0;
+}
+
+static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc)
+{
+ struct iphdr *iph = lro_desc->iph;
+ struct tcphdr *tcph = lro_desc->tcph;
+ u32 *p;
+ __wsum tcp_hdr_csum;
+
+ tcph->ack_seq = lro_desc->tcp_ack;
+ tcph->window = lro_desc->tcp_window;
+
+ if (lro_desc->tcp_saw_tstamp) {
+ p = (u32 *)(tcph + 1);
+ *(p+2) = lro_desc->tcp_rcv_tsecr;
+ }
+
+ iph->tot_len = htons(lro_desc->ip_tot_len);
+
+ iph->check = 0;
+ iph->check = ip_fast_csum((u8 *)lro_desc->iph, iph->ihl);
+
+ tcph->check = 0;
+ tcp_hdr_csum = csum_partial((u8 *)tcph, TCP_HDR_LEN(tcph), 0);
+ lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum);
+ tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
+ lro_desc->ip_tot_len -
+ IP_HDR_LEN(iph), IPPROTO_TCP,
+ lro_desc->data_csum);
+}
+
+static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len)
+{
+ __wsum tcp_csum;
+ __wsum tcp_hdr_csum;
+ __wsum tcp_ps_hdr_csum;
+
+ tcp_csum = ~csum_unfold(tcph->check);
+ tcp_hdr_csum = csum_partial((u8 *)tcph, TCP_HDR_LEN(tcph), tcp_csum);
+
+ tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
+ len + TCP_HDR_LEN(tcph),
+ IPPROTO_TCP, 0);
+
+ return csum_sub(csum_sub(tcp_csum, tcp_hdr_csum),
+ tcp_ps_hdr_csum);
+}
+
+static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
+ struct iphdr *iph, struct tcphdr *tcph,
+ u16 vlan_tag, struct vlan_group *vgrp)
+{
+ int nr_frags;
+ u32 *ptr;
+ u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
+
+ nr_frags = skb_shinfo(skb)->nr_frags;
+ lro_desc->parent = skb;
+ lro_desc->next_frag = &(skb_shinfo(skb)->frags[nr_frags]);
+ lro_desc->iph = iph;
+ lro_desc->tcph = tcph;
+ lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len;
+ lro_desc->tcp_ack = ntohl(tcph->ack_seq);
+ lro_desc->tcp_window = tcph->window;
+
+ lro_desc->pkt_aggr_cnt = 1;
+ lro_desc->ip_tot_len = ntohs(iph->tot_len);
+
+ if (tcph->doff == 8) {
+ ptr = (u32 *)(tcph+1);
+ lro_desc->tcp_saw_tstamp = 1;
+ lro_desc->tcp_rcv_tsval = *(ptr+1);
+ lro_desc->tcp_rcv_tsecr = *(ptr+2);
+ }
+
+ lro_desc->mss = tcp_data_len;
+ lro_desc->vgrp = vgrp;
+ lro_desc->vlan_tag = vlan_tag;
+ lro_desc->active = 1;
+
+ lro_desc->data_csum = lro_tcp_data_csum(iph, tcph,
+ tcp_data_len);
+}
+
+static inline void lro_clear_desc(struct net_lro_desc *lro_desc)
+{
+ memset(lro_desc, 0, sizeof(struct net_lro_desc));
+}
+
+static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph,
+ struct tcphdr *tcph, int tcp_data_len)
+{
+ struct sk_buff *parent = lro_desc->parent;
+ u32 *topt;
+
+ lro_desc->pkt_aggr_cnt++;
+ lro_desc->ip_tot_len += tcp_data_len;
+ lro_desc->tcp_next_seq += tcp_data_len;
+ lro_desc->tcp_window = tcph->window;
+ lro_desc->tcp_ack = tcph->ack_seq;
+
+ /* don't update tcp_rcv_tsval, would not work with PAWS */
+ if (lro_desc->tcp_saw_tstamp) {
+ topt = (u32 *) (tcph + 1);
+ lro_desc->tcp_rcv_tsecr = *(topt + 2);
+ }
+
+ lro_desc->data_csum = csum_block_add(lro_desc->data_csum,
+ lro_tcp_data_csum(iph, tcph,
+ tcp_data_len),
+ parent->len);
+
+ parent->len += tcp_data_len;
+ parent->data_len += tcp_data_len;
+ if (tcp_data_len > lro_desc->mss)
+ lro_desc->mss = tcp_data_len;
+}
+
+static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb,
+ struct iphdr *iph, struct tcphdr *tcph)
+{
+ struct sk_buff *parent = lro_desc->parent;
+ int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
+
+ lro_add_common(lro_desc, iph, tcph, tcp_data_len);
+
+ skb_pull(skb, (skb->len - tcp_data_len));
+ parent->truesize += skb->truesize;
+
+ if (lro_desc->last_skb)
+ lro_desc->last_skb->next = skb;
+ else
+ skb_shinfo(parent)->frag_list = skb;
+
+ lro_desc->last_skb = skb;
+}
+
+static void lro_add_frags(struct net_lro_desc *lro_desc,
+ int len, int hlen, int truesize,
+ struct skb_frag_struct *skb_frags,
+ struct iphdr *iph, struct tcphdr *tcph)
+{
+ struct sk_buff *skb = lro_desc->parent;
+ int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
+
+ lro_add_common(lro_desc, iph, tcph, tcp_data_len);
+
+ skb->truesize += truesize;
+
+ skb_frags[0].page_offset += hlen;
+ skb_frags[0].size -= hlen;
+
+ while (tcp_data_len > 0) {
+ *(lro_desc->next_frag) = *skb_frags;
+ tcp_data_len -= skb_frags->size;
+ lro_desc->next_frag++;
+ skb_frags++;
+ skb_shinfo(skb)->nr_frags++;
+ }
+}
+
+static int lro_check_tcp_conn(struct net_lro_desc *lro_desc,
+ struct iphdr *iph,
+ struct tcphdr *tcph)
+{
+ if ((lro_desc->iph->saddr != iph->saddr)
+ || (lro_desc->iph->daddr != iph->daddr)
+ || (lro_desc->tcph->source != tcph->source)
+ || (lro_desc->tcph->dest != tcph->dest))
+ return -1;
+ return 0;
+}
+
+static struct net_lro_desc *lro_get_desc(struct net_lro_mgr *lro_mgr,
+ struct net_lro_desc *lro_arr,
+ struct iphdr *iph,
+ struct tcphdr *tcph)
+{
+ struct net_lro_desc *lro_desc = NULL;
+ struct net_lro_desc *tmp;
+ int max_desc = lro_mgr->max_desc;
+ int i;
+
+ for (i = 0; i < max_desc; i++) {
+ tmp = &lro_arr[i];
+ if (tmp->active)
+ if (!lro_check_tcp_conn(tmp, iph, tcph)) {
+ lro_desc = tmp;
+ goto out;
+ }
+ }
+
+ for (i = 0; i < max_desc; i++) {
+ if (!lro_arr[i].active) {
+ lro_desc = &lro_arr[i];
+ goto out;
+ }
+ }
+
+ LRO_INC_STATS(lro_mgr, no_desc);
+out:
+ return lro_desc;
+}
+
+static void lro_flush(struct net_lro_mgr *lro_mgr,
+ struct net_lro_desc *lro_desc)
+{
+ if (lro_desc->pkt_aggr_cnt > 1)
+ lro_update_tcp_ip_header(lro_desc);
+
+ skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss;
+
+ if (lro_desc->vgrp) {
+ if (test_bit(LRO_F_NAPI, &lro_mgr->features))
+ vlan_hwaccel_receive_skb(lro_desc->parent,
+ lro_desc->vgrp,
+ lro_desc->vlan_tag);
+ else
+ vlan_hwaccel_rx(lro_desc->parent,
+ lro_desc->vgrp,
+ lro_desc->vlan_tag);
+
+ } else {
+ if (test_bit(LRO_F_NAPI, &lro_mgr->features))
+ netif_receive_skb(lro_desc->parent);
+ else
+ netif_rx(lro_desc->parent);
+ }
+
+ LRO_INC_STATS(lro_mgr, flushed);
+ lro_clear_desc(lro_desc);
+}
+
+static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
+ struct vlan_group *vgrp, u16 vlan_tag, void *priv)
+{
+ struct net_lro_desc *lro_desc;
+ struct iphdr *iph;
+ struct tcphdr *tcph;
+ u64 flags;
+ int vlan_hdr_len = 0;
+
+ if (!lro_mgr->get_skb_header
+ || lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph,
+ &flags, priv))
+ goto out;
+
+ if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
+ goto out;
+
+ lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
+ if (!lro_desc)
+ goto out;
+
+ if ((skb->protocol == htons(ETH_P_8021Q))
+ && !test_bit(LRO_F_EXTRACT_VLAN_ID, &lro_mgr->features))
+ vlan_hdr_len = VLAN_HLEN;
+
+ if (!lro_desc->active) { /* start new lro session */
+ if (lro_tcp_ip_check(iph, tcph, skb->len - vlan_hdr_len, NULL))
+ goto out;
+
+ skb->ip_summed = lro_mgr->ip_summed_aggr;
+ lro_init_desc(lro_desc, skb, iph, tcph, vlan_tag, vgrp);
+ LRO_INC_STATS(lro_mgr, aggregated);
+ return 0;
+ }
+
+ if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
+ goto out2;
+
+ if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc))
+ goto out2;
+
+ lro_add_packet(lro_desc, skb, iph, tcph);
+ LRO_INC_STATS(lro_mgr, aggregated);
+
+ if ((lro_desc->pkt_aggr_cnt >= lro_mgr->max_aggr) ||
+ lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
+ lro_flush(lro_mgr, lro_desc);
+
+ return 0;
+
+out2: /* send aggregated SKBs to stack */
+ lro_flush(lro_mgr, lro_desc);
+
+out: /* Original SKB has to be posted to stack */
+ skb->ip_summed = lro_mgr->ip_summed;
+ return 1;
+}
+
+
+static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr,
+ struct skb_frag_struct *frags,
+ int len, int true_size,
+ void *mac_hdr,
+ int hlen, __wsum sum,
+ u32 ip_summed)
+{
+ struct sk_buff *skb;
+ struct skb_frag_struct *skb_frags;
+ int data_len = len;
+ int hdr_len = min(len, hlen);
+
+ skb = netdev_alloc_skb(lro_mgr->dev, hlen);
+ if (!skb)
+ return NULL;
+
+ skb->len = len;
+ skb->data_len = len - hdr_len;
+ skb->truesize += true_size;
+ skb->tail += hdr_len;
+
+ memcpy(skb->data, mac_hdr, hdr_len);
+
+ skb_frags = skb_shinfo(skb)->frags;
+ while (data_len > 0) {
+ *skb_frags = *frags;
+ data_len -= frags->size;
+ skb_frags++;
+ frags++;
+ skb_shinfo(skb)->nr_frags++;
+ }
+
+ skb_shinfo(skb)->frags[0].page_offset += hdr_len;
+ skb_shinfo(skb)->frags[0].size -= hdr_len;
+
+ skb->ip_summed = ip_summed;
+ skb->csum = sum;
+ skb->protocol = eth_type_trans(skb, lro_mgr->dev);
+ return skb;
+}
+
+static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr,
+ struct skb_frag_struct *frags,
+ int len, int true_size,
+ struct vlan_group *vgrp,
+ u16 vlan_tag, void *priv, __wsum sum)
+{
+ struct net_lro_desc *lro_desc;
+ struct iphdr *iph;
+ struct tcphdr *tcph;
+ struct sk_buff *skb;
+ u64 flags;
+ void *mac_hdr;
+ int mac_hdr_len;
+ int hdr_len = LRO_MAX_PG_HLEN;
+ int vlan_hdr_len = 0;
+
+ if (!lro_mgr->get_frag_header
+ || lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph,
+ (void *)&tcph, &flags, priv)) {
+ mac_hdr = page_address(frags->page) + frags->page_offset;
+ goto out1;
+ }
+
+ if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
+ goto out1;
+
+ hdr_len = (int)((void *)(tcph) + TCP_HDR_LEN(tcph) - mac_hdr);
+ mac_hdr_len = (int)((void *)(iph) - mac_hdr);
+
+ lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
+ if (!lro_desc)
+ goto out1;
+
+ if (!lro_desc->active) { /* start new lro session */
+ if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, NULL))
+ goto out1;
+
+ skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr,
+ hdr_len, 0, lro_mgr->ip_summed_aggr);
+ if (!skb)
+ goto out;
+
+ if ((skb->protocol == htons(ETH_P_8021Q))
+ && !test_bit(LRO_F_EXTRACT_VLAN_ID, &lro_mgr->features))
+ vlan_hdr_len = VLAN_HLEN;
+
+ iph = (void *)(skb->data + vlan_hdr_len);
+ tcph = (void *)((u8 *)skb->data + vlan_hdr_len
+ + IP_HDR_LEN(iph));
+
+ lro_init_desc(lro_desc, skb, iph, tcph, 0, NULL);
+ LRO_INC_STATS(lro_mgr, aggregated);
+ return 0;
+ }
+
+ if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
+ goto out2;
+
+ if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, lro_desc))
+ goto out2;
+
+ lro_add_frags(lro_desc, len, hdr_len, true_size, frags, iph, tcph);
+ LRO_INC_STATS(lro_mgr, aggregated);
+
+ if ((skb_shinfo(lro_desc->parent)->nr_frags >= lro_mgr->max_aggr) ||
+ lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
+ lro_flush(lro_mgr, lro_desc);
+
+ return NULL;
+
+out2: /* send aggregated packets to the stack */
+ lro_flush(lro_mgr, lro_desc);
+
+out1: /* Original packet has to be posted to the stack */
+ skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr,
+ hdr_len, sum, lro_mgr->ip_summed);
+out:
+ return skb;
+}
+
+void lro_receive_skb(struct net_lro_mgr *lro_mgr,
+ struct sk_buff *skb,
+ void *priv)
+{
+ if (__lro_proc_skb(lro_mgr, skb, NULL, 0, priv)) {
+ if (test_bit(LRO_F_NAPI, &lro_mgr->features))
+ netif_receive_skb(skb);
+ else
+ netif_rx(skb);
+ }
+}
+EXPORT_SYMBOL(lro_receive_skb);
+
+void lro_vlan_hwaccel_receive_skb(struct net_lro_mgr *lro_mgr,
+ struct sk_buff *skb,
+ struct vlan_group *vgrp,
+ u16 vlan_tag,
+ void *priv)
+{
+ if (__lro_proc_skb(lro_mgr, skb, vgrp, vlan_tag, priv)) {
+ if (test_bit(LRO_F_NAPI, &lro_mgr->features))
+ vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag);
+ else
+ vlan_hwaccel_rx(skb, vgrp, vlan_tag);
+ }
+}
+EXPORT_SYMBOL(lro_vlan_hwaccel_receive_skb);
+
+void lro_receive_frags(struct net_lro_mgr *lro_mgr,
+ struct skb_frag_struct *frags,
+ int len, int true_size, void *priv, __wsum sum)
+{
+ struct sk_buff *skb;
+
+ skb = __lro_proc_segment(lro_mgr, frags, len, true_size, NULL, 0,
+ priv, sum);
+ if (!skb)
+ return;
+
+ if (test_bit(LRO_F_NAPI, &lro_mgr->features))
+ netif_receive_skb(skb);
+ else
+ netif_rx(skb);
+}
+EXPORT_SYMBOL(lro_receive_frags);
+
+void lro_vlan_hwaccel_receive_frags(struct net_lro_mgr *lro_mgr,
+ struct skb_frag_struct *frags,
+ int len, int true_size,
+ struct vlan_group *vgrp,
+ u16 vlan_tag, void *priv, __wsum sum)
+{
+ struct sk_buff *skb;
+
+ skb = __lro_proc_segment(lro_mgr, frags, len, true_size, vgrp,
+ vlan_tag, priv, sum);
+ if (!skb)
+ return;
+
+ if (test_bit(LRO_F_NAPI, &lro_mgr->features))
+ vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag);
+ else
+ vlan_hwaccel_rx(skb, vgrp, vlan_tag);
+}
+EXPORT_SYMBOL(lro_vlan_hwaccel_receive_frags);
+
+void lro_flush_all(struct net_lro_mgr *lro_mgr)
+{
+ int i;
+ struct net_lro_desc *lro_desc = lro_mgr->lro_arr;
+
+ for (i = 0; i < lro_mgr->max_desc; i++) {
+ if (lro_desc[i].active)
+ lro_flush(lro_mgr, &lro_desc[i]);
+ }
+}
+EXPORT_SYMBOL(lro_flush_all);
+
+void lro_flush_pkt(struct net_lro_mgr *lro_mgr,
+ struct iphdr *iph, struct tcphdr *tcph)
+{
+ struct net_lro_desc *lro_desc;
+
+ lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
+ if (lro_desc->active)
+ lro_flush(lro_mgr, lro_desc);
+}
+EXPORT_SYMBOL(lro_flush_pkt);
--
1.5.2
^ permalink raw reply related
* [PATCH 0/1] lro: Generic Large Receive Offload for TCP traffic
From: Jan-Bernd Themann @ 2007-08-03 12:41 UTC (permalink / raw)
To: David Miller
Cc: Thomas Klein, Jeff Garzik, Jan-Bernd Themann, netdev,
linux-kernel, linux-ppc, Christoph Raisch, Marcus Eder,
Andrew Gallatin, Stefan Roscher
Hi,
I think this patch could be the final version for now. It has been tested
on two platforms (power and x86_64) and works very well.
Apart from David Miller and Evgeniy Polaykov, we'd like to thank especially
Andrew Gallatin for his great reviews and help to make that happen.
After some discussion we decided to post the LRO patch separately from the
driver patches. Our final driver patches for LRO will be posted later with
some additional fixes for upstream inclusion to the netdev git.
However, I'll also post our LRO patch for the driver today as an example
of how to use this interface.
Thanks a lot,
Jan-Bernd
[PATCH 1/1] lro: Generic Large Receive Offload for TCP traffic
Changes to http://www.spinics.net/lists/netdev/msg37084.html
1) Fixed the LRO_MAX_PG_HLEN bug
2) skb->ip_summed can now be defined by driver for aggregated packets
3) The problem that the "ramp up" for tcp connections between machines
with different MTU size (1500 vs 9000) is very slow has been fixed
by setting skb->gso_size.
4) Checksum problem for little endian machines has been fixed
5) missing additon of vlan_hdr_len for TCP header determination has been added.
^ permalink raw reply
* Re: [PATCH 4/6] PowerPC 440EPx: Sequoia bootwrapper
From: Josh Boyer @ 2007-08-03 12:59 UTC (permalink / raw)
To: Stefan Roese; +Cc: linuxppc-dev
In-Reply-To: <200708030838.25574.sr@denx.de>
On Fri, 3 Aug 2007 08:38:25 +0200
Stefan Roese <sr@denx.de> wrote:
> > > +static void ibm440epx_fixup_memsize(void)
> >
> > And we should move this at the same time. Isn't denali used by another
> > CPU as well?
>
> Right now the Denali DDR(2) core is only used by the 440EPx/440GRx. But other
> AMCC PPC's using it will follow soon.
So that would be a yes :) Good. Then we should add the function to
4xx.c and call it ibm4xx_denali_fixup_memsize.
josh
^ permalink raw reply
* Re: [PATCH 5/6] PowerPC 440EPx: Sequoia board support
From: Josh Boyer @ 2007-08-03 12:57 UTC (permalink / raw)
To: Kumar Gala; +Cc: linuxppc-dev, David Gibson
In-Reply-To: <B53DD351-58BF-4D89-9EEB-D0A3D260E2DA@kernel.crashing.org>
On Fri, 3 Aug 2007 03:39:23 -0500
Kumar Gala <galak@kernel.crashing.org> wrote:
>
> On Aug 2, 2007, at 3:32 PM, Josh Boyer wrote:
>
> > On Wed, 1 Aug 2007 15:05:42 +1000
> > David Gibson <david@gibson.dropbear.id.au> wrote:
> >
> >> On Wed, Aug 01, 2007 at 07:01:17AM +0200, Segher Boessenkool wrote:
> >>>>> + { /* 440EPX - without Security/Kasumi */
> >>>>> + .pvr_mask = 0xf0000fff,
> >>>>> + .pvr_value = 0x200008D4,
> >>>>> + .cpu_name = "440EPX - no Security/Kasumi",
> >>>>> + .cpu_features = CPU_FTRS_44X,
> >>>>> + .cpu_user_features = COMMON_USER_BOOKE |
> >>>>> PPC_FEATURE_HAS_FPU, /*
> >>>>> 440EPX has an FPU */
> >>>>> + .icache_bsize = 32,
> >>>>> + .dcache_bsize = 32,
> >>>>> + },
> >>>>
> >>>> Since the with/without Security/Kasumi versions have no
> >>>> differences in
> >>>> their cputable entry other than the PVR, couldn't you just
> >>>> remove the
> >>>> relevant PVR bit from the mask and use a single entry?
> >>>
> >>> And get rid of the stupid "has an FPU" comment at the same time
> >>> please :-)
> >>
> >> Actually that comment may be worthwhile if expanded a little. I
> >> think
> >> the point is that 440EPx *unlike most other 4xx chips* has an
> >> FPU. So
> >> the point of the comment is not explaining the feature bit, which is
> >> obvious, but as a "no, really, it does".
> >
> > Right. 440EP(x) are the only currently available 44x chips that
> > contain an FPU, so I also think the comment can stay.
>
> I agree w/Segher the comment is redundant. Just make a note of the
> fact that we really have FPU in the commit message.
Fine. I don't really care either way because in the grand scheme of
things, it has no significant impact either way. It's just a comment.
josh
^ permalink raw reply
* Re: [PATCH 5/6] PowerPC 440EPx: Sequoia board support
From: Benjamin Herrenschmidt @ 2007-08-03 12:35 UTC (permalink / raw)
To: Stefan Roese; +Cc: linuxppc-dev
In-Reply-To: <200708031425.19475.sr@denx.de>
> Depends on interpretation. IIRC currently the same die is used for 440EPx and
> 440GRx. I could be wrong here though and it is just a bug in the chip. But
> anyway we should support this somehow. Could be that I missed this in the
> current 440GRx (Rainier) arch/ppc support too. I have to admit, that no
> clever solution comes to my mind right away though.
We can always come up with some kind of runtime detection, by turning on
MSR:FP, issuing an fp instruction and catching the illegal instruction
fault if any :-)
Ben.
^ permalink raw reply
* Re: 22.1-rt8 : BUG: scheduling while atomic: stress/0x00000002/739, CPU#0
From: Benjamin Herrenschmidt @ 2007-08-03 12:34 UTC (permalink / raw)
To: Pradyumna Sampath; +Cc: linuxppc-dev
In-Reply-To: <f87351060708030450u7fbd38e1o8ee491e523b30e3f@mail.gmail.com>
On Fri, 2007-08-03 at 17:20 +0530, Pradyumna Sampath wrote:
> Hi,
> I am running linux-2.6.22.1-rt8 on mpc5200 and on running stress
>
> #./stress --cpu 8 --io 4 --vm 2 --vm-bytes 2M --timeout 300s
>
> I get the following stack.But this does not happen with non -rt kernel.
>
> LR = 0xff51fcc
> BUG: scheduling while atomic: stress/0x00000002/739, CPU#0
> Call Trace:
> [c04fbb80] [c0008650] show_stack+0x50/0x190 (unreliable)
> [c04fbbb0] [c0016fc0] __schedule_bug+0x38/0x48
> [c04fbbc0] [c01d4f7c] __schedule+0x3dc/0x450
> [c04fbbf0] [c01d56c8] schedule+0x54/0xf0
> [c04fbc00] [c01d6390] rt_spin_lock_slowlock+0x100/0x224
> [c04fbc60] [c0052198] get_zone_pcp+0x30/0x58
> [c04fbc80] [c0053078] free_hot_cold_page+0x114/0x1fc
> [c04fbcb0] [c0056424] put_page+0x50/0x15c
> [c04fbcd0] [c0069854] free_page_and_swap_cache+0x44/0x80
> [c04fbce0] [c005d760] unmap_vmas+0x2e0/0x788
> [c04fbd70] [c006292c] exit_mmap+0x74/0x108
> [c04fbda0] [c001adc0] mmput+0x54/0xf0
> [c04fbdc0] [c001ecfc] exit_mm+0xc0/0x124
> [c04fbde0] [c001ffd0] do_exit+0x164/0x8bc
> [c04fbe20] [c0020760] do_group_exit+0x38/0x90
> [c04fbe40] [c002ba48] get_signal_to_deliver+0x2ac/0x3c0
> [c04fbe70] [c000739c] do_signal+0x50/0x60c
> [c04fbf40] [c000fec0] do_user_signal+0x74/0xc4
> --- Exception: c00 at 0xfeab1dc
> LR = 0xff51fcc
>
> I have also tested the same case on x86 and that does not throw up
> anything. Another observation is the fact is that this happens when
> --vm is > 1. So I am guessing its a very PowerPC specific problem.
> Maybe some part of the vm code for powerpc. This happens for all
> powerpc -rt kernels I have used.
Weird. unmap_vmas calls to free_page_and_swap_cache() should be
happening within a get/put_cpu section, thus with preempt disabled, thus
I fail to see how rt_spin_lock would be allowed to preempt.
^ permalink raw reply
* Re: [PATCH 5/6] PowerPC 440EPx: Sequoia board support
From: Stefan Roese @ 2007-08-03 12:25 UTC (permalink / raw)
To: Valentine Barshak; +Cc: linuxppc-dev
In-Reply-To: <46B31352.1060503@ru.mvista.com>
On Friday 03 August 2007, Valentine Barshak wrote:
> >> Should the 440GRX PVR additions be done in a separate patch? Or is the
> >> PVR and cpu features truly the only difference between 440EPx and
> >> 440GRx?
> >
> > I think it makes sense to add the 440GRx with this patchset too. The
> > 440GRx is a subset of the 440EPx, missing some stuff like USB, FPU. And
> > the AMCC Rainier 440GRx eval board is a subset of the Sequoia eval board.
> > So no new board specific sources should be necessary to support the
> > Rainier, just a different defconfig file.
> >
> > Best regards,
> > Stefan
>
> I have a Rainier 440GRx board and the PVR is equal to the 440EPx one
> (0x200008D0). This has to be handled somehow, since the
> PPC_FEATURE_HAS_FPU flag should *not* be set for 440GRx.
> I'm really not sure how though. Any ideas are greatly appreciated :)
> Is it a h/w bug?
Depends on interpretation. IIRC currently the same die is used for 440EPx and
440GRx. I could be wrong here though and it is just a bug in the chip. But
anyway we should support this somehow. Could be that I missed this in the
current 440GRx (Rainier) arch/ppc support too. I have to admit, that no
clever solution comes to my mind right away though.
Best regards,
Stefan
^ permalink raw reply
* 22.1-rt8: BUG: scheduling while atomic: stress/0x00000002/739, CPU#0
From: Pradyumna Sampath @ 2007-08-03 11:53 UTC (permalink / raw)
To: linuxppc-embedded
Hi,
First of all Im sorry to crosspost between linuxppc-dev and linuxppc-embedded.
I am running linux-2.6.22.1-rt8 on mpc5200 and on running stress
#./stress --cpu 8 --io 4 --vm 2 --vm-bytes 2M --timeout 300s
I get the following stack.But this does not happen with non -rt kernel.
LR = 0xff51fcc
BUG: scheduling while atomic: stress/0x00000002/739, CPU#0
Call Trace:
[c04fbb80] [c0008650] show_stack+0x50/0x190 (unreliable)
[c04fbbb0] [c0016fc0] __schedule_bug+0x38/0x48
[c04fbbc0] [c01d4f7c] __schedule+0x3dc/0x450
[c04fbbf0] [c01d56c8] schedule+0x54/0xf0
[c04fbc00] [c01d6390] rt_spin_lock_slowlock+0x100/0x224
[c04fbc60] [c0052198] get_zone_pcp+0x30/0x58
[c04fbc80] [c0053078] free_hot_cold_page+0x114/0x1fc
[c04fbcb0] [c0056424] put_page+0x50/0x15c
[c04fbcd0] [c0069854] free_page_and_swap_cache+0x44/0x80
[c04fbce0] [c005d760] unmap_vmas+0x2e0/0x788
[c04fbd70] [c006292c] exit_mmap+0x74/0x108
[c04fbda0] [c001adc0] mmput+0x54/0xf0
[c04fbdc0] [c001ecfc] exit_mm+0xc0/0x124
[c04fbde0] [c001ffd0] do_exit+0x164/0x8bc
[c04fbe20] [c0020760] do_group_exit+0x38/0x90
[c04fbe40] [c002ba48] get_signal_to_deliver+0x2ac/0x3c0
[c04fbe70] [c000739c] do_signal+0x50/0x60c
[c04fbf40] [c000fec0] do_user_signal+0x74/0xc4
--- Exception: c00 at 0xfeab1dc
LR = 0xff51fcc
I have also tested the same case on x86 and that does not throw up
anything. Another observation is the fact is that this happens when
--vm is > 1. So I am guessing its a very PowerPC specific problem.
Maybe some part of the vm code for powerpc. This happens for all
powerpc -rt kernels I have used.
Link to stress is : http://weather.ou.edu/~apw/projects/stress/
A friend of mine has already posted this problem to the rt-preempt
mailing list and I have posted this on the linuxppc-dev list as well.
Any pointers suggestions as to where the problem could
be is most welcome.
Thanks in advance
regards
/prady
--
htp://prady.livejournal.com
^ permalink raw reply
* Re: [PATCH] Fix special PTE code for secondary hash bucket
From: Benjamin Herrenschmidt @ 2007-08-03 11:51 UTC (permalink / raw)
To: Paul Mackerras; +Cc: linuxppc-dev
In-Reply-To: <18098.61003.38084.554299@cargo.ozlabs.ibm.com>
On Fri, 2007-08-03 at 18:58 +1000, Paul Mackerras wrote:
> The code for mapping special 4k pages on kernels using a 64kB base
> page size was missing the code for doing the RPN (real page number)
> manipulation when inserting the hardware PTE in the secondary hash
> bucket. It needs the same code as has already been added to the
> code that inserts the HPTE in the primary hash bucket. This adds it.
>
> Spotted by Ben Herrenschmidt.
>
> Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
> diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
> index 4762ff7..35eabfb 100644
> --- a/arch/powerpc/mm/hash_low_64.S
> +++ b/arch/powerpc/mm/hash_low_64.S
> @@ -472,10 +472,12 @@ _GLOBAL(htab_call_hpte_insert1)
> /* Now try secondary slot */
>
> /* real page number in r5, PTE RPN value + index */
> - rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
> + andis. r0,r31,_PAGE_4K_PFN@h
> + srdi r5,r31,PTE_RPN_SHIFT
> + bne- 3f
> sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT
> add r5,r5,r25
> - sldi r5,r5,HW_PAGE_SHIFT
> +3: sldi r5,r5,HW_PAGE_SHIFT
>
> /* Calculate secondary group hash */
> andc r0,r27,r28
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@ozlabs.org
> https://ozlabs.org/mailman/listinfo/linuxppc-dev
^ permalink raw reply
* 22.1-rt8 : BUG: scheduling while atomic: stress/0x00000002/739, CPU#0
From: Pradyumna Sampath @ 2007-08-03 11:50 UTC (permalink / raw)
To: linuxppc-dev
Hi,
I am running linux-2.6.22.1-rt8 on mpc5200 and on running stress
#./stress --cpu 8 --io 4 --vm 2 --vm-bytes 2M --timeout 300s
I get the following stack.But this does not happen with non -rt kernel.
LR = 0xff51fcc
BUG: scheduling while atomic: stress/0x00000002/739, CPU#0
Call Trace:
[c04fbb80] [c0008650] show_stack+0x50/0x190 (unreliable)
[c04fbbb0] [c0016fc0] __schedule_bug+0x38/0x48
[c04fbbc0] [c01d4f7c] __schedule+0x3dc/0x450
[c04fbbf0] [c01d56c8] schedule+0x54/0xf0
[c04fbc00] [c01d6390] rt_spin_lock_slowlock+0x100/0x224
[c04fbc60] [c0052198] get_zone_pcp+0x30/0x58
[c04fbc80] [c0053078] free_hot_cold_page+0x114/0x1fc
[c04fbcb0] [c0056424] put_page+0x50/0x15c
[c04fbcd0] [c0069854] free_page_and_swap_cache+0x44/0x80
[c04fbce0] [c005d760] unmap_vmas+0x2e0/0x788
[c04fbd70] [c006292c] exit_mmap+0x74/0x108
[c04fbda0] [c001adc0] mmput+0x54/0xf0
[c04fbdc0] [c001ecfc] exit_mm+0xc0/0x124
[c04fbde0] [c001ffd0] do_exit+0x164/0x8bc
[c04fbe20] [c0020760] do_group_exit+0x38/0x90
[c04fbe40] [c002ba48] get_signal_to_deliver+0x2ac/0x3c0
[c04fbe70] [c000739c] do_signal+0x50/0x60c
[c04fbf40] [c000fec0] do_user_signal+0x74/0xc4
--- Exception: c00 at 0xfeab1dc
LR = 0xff51fcc
I have also tested the same case on x86 and that does not throw up
anything. Another observation is the fact is that this happens when
--vm is > 1. So I am guessing its a very PowerPC specific problem.
Maybe some part of the vm code for powerpc. This happens for all
powerpc -rt kernels I have used.
Link to stress is : http://weather.ou.edu/~apw/projects/stress/
A friend of mine has already posted this problem to the rt-preempt
mailing list. Any pointers suggestions as to where the problem could
be is most welcome.
Thanks in advance
regards
/prady
--
htp://prady.livejournal.com
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox