* Re: [PATCH 18/18] cpufreq: powerpc/platforms/cell: move cpufreq driver to drivers/cpufreq
From: Viresh Kumar @ 2013-04-04 13:50 UTC (permalink / raw)
To: Arnd Bergmann
Cc: robin.randhawa, linux-pm, patches, Liviu.Dudau, linux-kernel,
cpufreq, rjw, Steve.Bannister, arvind.chauhan, linuxppc-dev,
linaro-kernel, charles.garcia-tobin
In-Reply-To: <201304041302.09995.arnd@arndb.de>
[-- Attachment #1: Type: text/plain, Size: 2756 bytes --]
On 4 April 2013 18:32, Arnd Bergmann <arnd@arndb.de> wrote:
> Moving the files is good, but I don't see a reason to make the cbe_cpufreq.h
> header globally visible in include/linux/. Isn't it just used by as the
> interface between ppc_cbe_cpufreq_pmi.c and ppc_cbe_cpufreq.c?
When i moved it initially i thought it is also used by
arch/powerpc/platforms/cell/cpufreq_spudemand.c and hence moved it
to include/linux and never realized i am wrong..
Here is the fixup (And attached complete patch: original+fixup for Rafael
to apply):
Compile tested only.
---
drivers/cpufreq/ppc_cbe_cpufreq.c | 3 ++-
{include/linux => drivers/cpufreq}/ppc_cbe_cpufreq.h | 0
drivers/cpufreq/ppc_cbe_cpufreq_pervasive.c | 3 ++-
drivers/cpufreq/ppc_cbe_cpufreq_pmi.c | 3 ++-
4 files changed, 6 insertions(+), 3 deletions(-)
rename {include/linux => drivers/cpufreq}/ppc_cbe_cpufreq.h (100%)
diff --git a/drivers/cpufreq/ppc_cbe_cpufreq.c
b/drivers/cpufreq/ppc_cbe_cpufreq.c
index 138d88c..56e0c17 100644
--- a/drivers/cpufreq/ppc_cbe_cpufreq.c
+++ b/drivers/cpufreq/ppc_cbe_cpufreq.c
@@ -23,12 +23,13 @@
#include <linux/cpufreq.h>
#include <linux/module.h>
#include <linux/of_platform.h>
-#include <linux/ppc_cbe_cpufreq.h>
#include <asm/machdep.h>
#include <asm/prom.h>
#include <asm/cell-regs.h>
+#include "ppc_cbe_cpufreq.h"
+
static DEFINE_MUTEX(cbe_switch_mutex);
diff --git a/include/linux/ppc_cbe_cpufreq.h b/drivers/cpufreq/ppc_cbe_cpufreq.h
similarity index 100%
rename from include/linux/ppc_cbe_cpufreq.h
rename to drivers/cpufreq/ppc_cbe_cpufreq.h
diff --git a/drivers/cpufreq/ppc_cbe_cpufreq_pervasive.c
b/drivers/cpufreq/ppc_cbe_cpufreq_pervasive.c
index 6708710..84d2f2c 100644
--- a/drivers/cpufreq/ppc_cbe_cpufreq_pervasive.c
+++ b/drivers/cpufreq/ppc_cbe_cpufreq_pervasive.c
@@ -25,12 +25,13 @@
#include <linux/io.h>
#include <linux/kernel.h>
-#include <linux/ppc_cbe_cpufreq.h>
#include <linux/time.h>
#include <asm/machdep.h>
#include <asm/hw_irq.h>
#include <asm/cell-regs.h>
+#include "ppc_cbe_cpufreq.h"
+
/* to write to MIC register */
static u64 MIC_Slow_Fast_Timer_table[] = {
[0 ... 7] = 0x007fc00000000000ull,
diff --git a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
index 448b888..d29e8da 100644
--- a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
+++ b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
@@ -25,7 +25,6 @@
#include <linux/timer.h>
#include <linux/module.h>
#include <linux/of_platform.h>
-#include <linux/ppc_cbe_cpufreq.h>
#include <asm/processor.h>
#include <asm/prom.h>
@@ -36,6 +35,8 @@
#include <asm/time.h>
#endif
+#include "ppc_cbe_cpufreq.h"
+
static u8 pmi_slow_mode_limit[MAX_CBE];
bool cbe_cpufreq_has_pmi = false;
[-- Attachment #2: 0001-cpufreq-powerpc-platforms-cell-move-cpufreq-driver-t.patch --]
[-- Type: application/octet-stream, Size: 7490 bytes --]
From 47275a6b874359f1b42fbea3e9d10372049ee6c6 Mon Sep 17 00:00:00 2001
Message-Id: <47275a6b874359f1b42fbea3e9d10372049ee6c6.1365083336.git.viresh.kumar@linaro.org>
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 25 Mar 2013 11:20:23 +0530
Subject: [PATCH] cpufreq: powerpc/platforms/cell: move cpufreq driver to
drivers/cpufreq
This patch moves cpufreq driver of powerpc platforms/cell to drivers/cpufreq.
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
arch/powerpc/platforms/cell/Kconfig | 26 +---------------------
arch/powerpc/platforms/cell/Makefile | 3 ---
drivers/cpufreq/Kconfig.powerpc | 18 +++++++++++++++
drivers/cpufreq/Makefile | 3 +++
.../cpufreq/ppc_cbe_cpufreq.c | 3 ++-
.../cpufreq/ppc_cbe_cpufreq.h | 4 ++--
.../cpufreq/ppc_cbe_cpufreq_pervasive.c | 2 +-
.../cpufreq/ppc_cbe_cpufreq_pmi.c | 2 +-
8 files changed, 28 insertions(+), 33 deletions(-)
rename arch/powerpc/platforms/cell/cbe_cpufreq.c => drivers/cpufreq/ppc_cbe_cpufreq.c (99%)
rename arch/powerpc/platforms/cell/cbe_cpufreq.h => drivers/cpufreq/ppc_cbe_cpufreq.h (82%)
rename arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c => drivers/cpufreq/ppc_cbe_cpufreq_pervasive.c (99%)
rename arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c => drivers/cpufreq/ppc_cbe_cpufreq_pmi.c (99%)
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index 53aaefe..9978f59 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -113,34 +113,10 @@ config CBE_THERM
default m
depends on CBE_RAS && SPU_BASE
-config CBE_CPUFREQ
- tristate "CBE frequency scaling"
- depends on CBE_RAS && CPU_FREQ
- default m
- help
- This adds the cpufreq driver for Cell BE processors.
- For details, take a look at <file:Documentation/cpu-freq/>.
- If you don't have such processor, say N
-
-config CBE_CPUFREQ_PMI_ENABLE
- bool "CBE frequency scaling using PMI interface"
- depends on CBE_CPUFREQ
- default n
- help
- Select this, if you want to use the PMI interface
- to switch frequencies. Using PMI, the
- processor will not only be able to run at lower speed,
- but also at lower core voltage.
-
-config CBE_CPUFREQ_PMI
- tristate
- depends on CBE_CPUFREQ_PMI_ENABLE
- default CBE_CPUFREQ
-
config PPC_PMI
tristate
default y
- depends on CBE_CPUFREQ_PMI || PPC_IBM_CELL_POWERBUTTON
+ depends on CPU_FREQ_CBE_PMI || PPC_IBM_CELL_POWERBUTTON
help
PMI (Platform Management Interrupt) is a way to
communicate with the BMC (Baseboard Management Controller).
diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile
index a4a8935..fe053e7 100644
--- a/arch/powerpc/platforms/cell/Makefile
+++ b/arch/powerpc/platforms/cell/Makefile
@@ -5,9 +5,6 @@ obj-$(CONFIG_PPC_CELL_NATIVE) += iommu.o setup.o spider-pic.o \
obj-$(CONFIG_CBE_RAS) += ras.o
obj-$(CONFIG_CBE_THERM) += cbe_thermal.o
-obj-$(CONFIG_CBE_CPUFREQ_PMI) += cbe_cpufreq_pmi.o
-obj-$(CONFIG_CBE_CPUFREQ) += cbe-cpufreq.o
-cbe-cpufreq-y += cbe_cpufreq_pervasive.o cbe_cpufreq.o
obj-$(CONFIG_CBE_CPUFREQ_SPU_GOVERNOR) += cpufreq_spudemand.o
obj-$(CONFIG_PPC_IBM_CELL_POWERBUTTON) += cbe_powerbutton.o
diff --git a/drivers/cpufreq/Kconfig.powerpc b/drivers/cpufreq/Kconfig.powerpc
index 2e5a007..9e1ae94 100644
--- a/drivers/cpufreq/Kconfig.powerpc
+++ b/drivers/cpufreq/Kconfig.powerpc
@@ -1,3 +1,21 @@
+config CPU_FREQ_CBE
+ tristate "CBE frequency scaling"
+ depends on CBE_RAS && PPC_CELL
+ default m
+ help
+ This adds the cpufreq driver for Cell BE processors.
+ For details, take a look at <file:Documentation/cpu-freq/>.
+ If you don't have such processor, say N
+
+config CPU_FREQ_CBE_PMI
+ bool "CBE frequency scaling using PMI interface"
+ depends on CPU_FREQ_CBE
+ default n
+ help
+ Select this, if you want to use the PMI interface to switch
+ frequencies. Using PMI, the processor will not only be able to run at
+ lower speed, but also at lower core voltage.
+
config CPU_FREQ_MAPLE
bool "Support for Maple 970FX Evaluation Board"
depends on PPC_MAPLE
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 7b16092f..bfefa22 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -78,6 +78,9 @@ obj-$(CONFIG_ARCH_TEGRA) += tegra-cpufreq.o
##################################################################################
# PowerPC platform drivers
+obj-$(CONFIG_CPU_FREQ_CBE) += ppc-cbe-cpufreq.o
+ppc-cbe-cpufreq-y += ppc_cbe_cpufreq_pervasive.o ppc_cbe_cpufreq.o
+obj-$(CONFIG_CPU_FREQ_CBE_PMI) += ppc_cbe_cpufreq_pmi.o
obj-$(CONFIG_CPU_FREQ_MAPLE) += maple-cpufreq.o
obj-$(CONFIG_CPU_FREQ_PMAC) += pmac32-cpufreq.o
obj-$(CONFIG_CPU_FREQ_PMAC64) += pmac64-cpufreq.o
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq.c b/drivers/cpufreq/ppc_cbe_cpufreq.c
similarity index 99%
rename from arch/powerpc/platforms/cell/cbe_cpufreq.c
rename to drivers/cpufreq/ppc_cbe_cpufreq.c
index 718c6a3..e577a1d 100644
--- a/arch/powerpc/platforms/cell/cbe_cpufreq.c
+++ b/drivers/cpufreq/ppc_cbe_cpufreq.c
@@ -27,7 +27,8 @@
#include <asm/machdep.h>
#include <asm/prom.h>
#include <asm/cell-regs.h>
-#include "cbe_cpufreq.h"
+
+#include "ppc_cbe_cpufreq.h"
static DEFINE_MUTEX(cbe_switch_mutex);
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq.h b/drivers/cpufreq/ppc_cbe_cpufreq.h
similarity index 82%
rename from arch/powerpc/platforms/cell/cbe_cpufreq.h
rename to drivers/cpufreq/ppc_cbe_cpufreq.h
index c1d86bf..b4c00a5 100644
--- a/arch/powerpc/platforms/cell/cbe_cpufreq.h
+++ b/drivers/cpufreq/ppc_cbe_cpufreq.h
@@ -1,5 +1,5 @@
/*
- * cbe_cpufreq.h
+ * ppc_cbe_cpufreq.h
*
* This file contains the definitions used by the cbe_cpufreq driver.
*
@@ -17,7 +17,7 @@ int cbe_cpufreq_get_pmode(int cpu);
int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode);
-#if defined(CONFIG_CBE_CPUFREQ_PMI) || defined(CONFIG_CBE_CPUFREQ_PMI_MODULE)
+#if defined(CONFIG_CPU_FREQ_CBE_PMI) || defined(CONFIG_CPU_FREQ_CBE_PMI_MODULE)
extern bool cbe_cpufreq_has_pmi;
#else
#define cbe_cpufreq_has_pmi (0)
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c b/drivers/cpufreq/ppc_cbe_cpufreq_pervasive.c
similarity index 99%
rename from arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c
rename to drivers/cpufreq/ppc_cbe_cpufreq_pervasive.c
index 20472e4..84d2f2c 100644
--- a/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c
+++ b/drivers/cpufreq/ppc_cbe_cpufreq_pervasive.c
@@ -30,7 +30,7 @@
#include <asm/hw_irq.h>
#include <asm/cell-regs.h>
-#include "cbe_cpufreq.h"
+#include "ppc_cbe_cpufreq.h"
/* to write to MIC register */
static u64 MIC_Slow_Fast_Timer_table[] = {
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
similarity index 99%
rename from arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c
rename to drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
index 60a07a4..d29e8da 100644
--- a/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c
+++ b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
@@ -35,7 +35,7 @@
#include <asm/time.h>
#endif
-#include "cbe_cpufreq.h"
+#include "ppc_cbe_cpufreq.h"
static u8 pmi_slow_mode_limit[MAX_CBE];
--
1.7.12.rc2.18.g61b472e
^ permalink raw reply related
* Re: [PATCH] bookehv: Handle debug exception on guest exit
From: Alexander Graf @ 2013-04-04 13:25 UTC (permalink / raw)
To: Bharat Bhushan; +Cc: scottwood, Bharat Bhushan, linuxppc-dev, kvm-ppc, kvm
In-Reply-To: <1363801557-27436-1-git-send-email-Bharat.Bhushan@freescale.com>
On 20.03.2013, at 18:45, Bharat Bhushan wrote:
> EPCR.DUVD controls whether the debug events can come in
> hypervisor mode or not. When KVM guest is using the debug
> resource then we do not want debug events to be captured
> in guest entry/exit path. So we set EPCR.DUVD when entering
> and clears EPCR.DUVD when exiting from guest.
>=20
> Debug instruction complete is a post-completion debug
> exception but debug event gets posted on the basis of MSR
> before the instruction is executed. Now if the instruction
> switches the context from guest mode (MSR.GS =3D 1) to hypervisor
> mode (MSR.GS =3D 0) then the xSRR0 points to first instruction of
> KVM handler and xSRR1 points that MSR.GS is clear
> (hypervisor context). Now as xSRR1.GS is used to decide whether
> KVM handler will be invoked to handle the exception or host
> host kernel debug handler will be invoked to handle the exception.
> This leads to host kernel debug handler handling the exception
> which should either be handled by KVM.
>=20
> This is tested on e500mc in 32 bit mode
>=20
> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> ---
> v0:
> - Do not apply this change for debug_crit as we do not know those =
chips have issue or not.
> - corrected 64bit case branching
>=20
> arch/powerpc/kernel/exceptions-64e.S | 29 =
++++++++++++++++++++++++++++-
> arch/powerpc/kernel/head_booke.h | 26 ++++++++++++++++++++++++++
> 2 files changed, 54 insertions(+), 1 deletions(-)
>=20
> diff --git a/arch/powerpc/kernel/exceptions-64e.S =
b/arch/powerpc/kernel/exceptions-64e.S
> index 4684e33..8b26294 100644
> --- a/arch/powerpc/kernel/exceptions-64e.S
> +++ b/arch/powerpc/kernel/exceptions-64e.S
> @@ -516,6 +516,33 @@ kernel_dbg_exc:
> andis. r15,r14,DBSR_IC@h
> beq+ 1f
>=20
> +#ifdef CONFIG_KVM_BOOKE_HV
> + /*
> + * EPCR.DUVD controls whether the debug events can come in
> + * hypervisor mode or not. When KVM guest is using the debug
> + * resource then we do not want debug events to be captured
> + * in guest entry/exit path. So we set EPCR.DUVD when entering
> + * and clears EPCR.DUVD when exiting from guest.
> + * Debug instruction complete is a post-completion debug
> + * exception but debug event gets posted on the basis of MSR
> + * before the instruction is executed. Now if the instruction
> + * switches the context from guest mode (MSR.GS =3D 1) to =
hypervisor
> + * mode (MSR.GS =3D 0) then the xSRR0 points to first =
instruction of
Can't we just execute that code path with MSR.DE=3D0?
Alex
> + * KVM handler and xSRR1 points that MSR.GS is clear
> + * (hypervisor context). Now as xSRR1.GS is used to decide =
whether
> + * KVM handler will be invoked to handle the exception or host
> + * host kernel debug handler will be invoked to handle the =
exception.
> + * This leads to host kernel debug handler handling the =
exception
> + * which should either be handled by KVM.
> + */
> + mfspr r10, SPRN_EPCR
> + andis. r10,r10,SPRN_EPCR_DUVD@h
> + beq+ 2f
> +
> + andis. r10,r9,MSR_GS@h
> + beq+ 3f
> +2:
> +#endif
> LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
> LOAD_REG_IMMEDIATE(r15,interrupt_end_book3e)
> cmpld cr0,r10,r14
> @@ -523,7 +550,7 @@ kernel_dbg_exc:
> blt+ cr0,1f
> bge+ cr1,1f
>=20
> - /* here it looks like we got an inappropriate debug exception. =
*/
> +3: /* here it looks like we got an inappropriate debug exception. =
*/
> lis r14,DBSR_IC@h /* clear the IC event */
> rlwinm r11,r11,0,~MSR_DE /* clear DE in the DSRR1 value =
*/
> mtspr SPRN_DBSR,r14
> diff --git a/arch/powerpc/kernel/head_booke.h =
b/arch/powerpc/kernel/head_booke.h
> index 5f051ee..edc6a3b 100644
> --- a/arch/powerpc/kernel/head_booke.h
> +++ b/arch/powerpc/kernel/head_booke.h
> @@ -285,7 +285,33 @@ label:
> mfspr r10,SPRN_DBSR; /* check single-step/branch =
taken */ \
> andis. r10,r10,(DBSR_IC|DBSR_BT)@h; =
\
> beq+ 2f; =
\
> +#ifdef CONFIG_KVM_BOOKE_HV =
\
> + /* =
\
> + * EPCR.DUVD controls whether the debug events can come in =
\
> + * hypervisor mode or not. When KVM guest is using the debug =
\
> + * resource then we do not want debug events to be captured =
\
> + * in guest entry/exit path. So we set EPCR.DUVD when entering =
\
> + * and clears EPCR.DUVD when exiting from guest. =
\
> + * Debug instruction complete is a post-completion debug =
\
> + * exception but debug event gets posted on the basis of MSR =
\
> + * before the instruction is executed. Now if the instruction =
\
> + * switches the context from guest mode (MSR.GS =3D 1) to =
hypervisor \
> + * mode (MSR.GS =3D 0) then the xSRR0 points to first =
instruction of \
> + * KVM handler and xSRR1 points that MSR.GS is clear =
\
> + * (hypervisor context). Now as xSRR1.GS is used to decide =
whether \
> + * KVM handler will be invoked to handle the exception or host =
\
> + * host kernel debug handler will be invoked to handle the =
exception. \
> + * This leads to host kernel debug handler handling the =
exception \
> + * which should either be handled by KVM. =
\
> + */ =
\
> + mfspr r10, SPRN_EPCR; =
\
> + andis. r10,r10,SPRN_EPCR_DUVD@h; =
\
> + beq+ 3f; =
\
> =
\
> + andis. r10,r9,MSR_GS@h; =
\
> + beq+ 1f; =
\
> +3: =
\
> +#endif =
\
> lis r10,KERNELBASE@h; /* check if exception in vectors =
*/ \
> ori r10,r10,KERNELBASE@l; =
\
> cmplw r12,r10; =
\
> --=20
> 1.7.0.4
>=20
>=20
> --
> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: [PATCH 18/18] cpufreq: powerpc/platforms/cell: move cpufreq driver to drivers/cpufreq
From: Arnd Bergmann @ 2013-04-04 13:02 UTC (permalink / raw)
To: Viresh Kumar
Cc: robin.randhawa, linux-pm, patches, Liviu.Dudau, linux-kernel,
cpufreq, rjw, Steve.Bannister, arvind.chauhan, linuxppc-dev,
linaro-kernel, charles.garcia-tobin
In-Reply-To: <0f989533f2e47d4d632710029bf0273082bafee3.1365079581.git.viresh.kumar@linaro.org>
On Thursday 04 April 2013, Viresh Kumar wrote:
> This patch moves cpufreq driver of powerpc platforms/cell to drivers/cpufreq.
>
> Cc: Arnd Bergmann <arnd@arndb.de>
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Cc: linuxppc-dev@lists.ozlabs.org
> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
> ---
> Compile Tested only.
>
> arch/powerpc/platforms/cell/Kconfig | 26 +---------------------
> arch/powerpc/platforms/cell/Makefile | 3 ---
> drivers/cpufreq/Kconfig.powerpc | 18 +++++++++++++++
> drivers/cpufreq/Makefile | 3 +++
> .../cpufreq/ppc_cbe_cpufreq.c | 2 +-
> .../cpufreq/ppc_cbe_cpufreq_pervasive.c | 3 +--
> .../cpufreq/ppc_cbe_cpufreq_pmi.c | 3 +--
> .../linux/ppc_cbe_cpufreq.h | 4 ++--
Moving the files is good, but I don't see a reason to make the cbe_cpufreq.h
header globally visible in include/linux/. Isn't it just used by as the
interface between ppc_cbe_cpufreq_pmi.c and ppc_cbe_cpufreq.c?
Arnd
^ permalink raw reply
* RE: [PATCH 5/5 v11] iommu/fsl: Freescale PAMU driver and iommu implementation.
From: Sethi Varun-B16395 @ 2013-04-04 13:00 UTC (permalink / raw)
To: Alex Williamson, Joerg Roedel
Cc: Wood Scott-B07421, linux-kernel@vger.kernel.org,
Yoder Stuart-B08248, iommu@lists.linux-foundation.org,
linuxppc-dev@lists.ozlabs.org
In-Reply-To: <1365012091.2882.252.camel@bling.home>
DQoNCj4gLS0tLS1PcmlnaW5hbCBNZXNzYWdlLS0tLS0NCj4gRnJvbTogQWxleCBXaWxsaWFtc29u
IFttYWlsdG86YWxleC53aWxsaWFtc29uQHJlZGhhdC5jb21dDQo+IFNlbnQ6IFdlZG5lc2RheSwg
QXByaWwgMDMsIDIwMTMgMTE6MzIgUE0NCj4gVG86IEpvZXJnIFJvZWRlbA0KPiBDYzogU2V0aGkg
VmFydW4tQjE2Mzk1OyBZb2RlciBTdHVhcnQtQjA4MjQ4OyBXb29kIFNjb3R0LUIwNzQyMTsNCj4g
aW9tbXVAbGlzdHMubGludXgtZm91bmRhdGlvbi5vcmc7IGxpbnV4cHBjLWRldkBsaXN0cy5vemxh
YnMub3JnOyBsaW51eC0NCj4ga2VybmVsQHZnZXIua2VybmVsLm9yZzsgZ2FsYWtAa2VybmVsLmNy
YXNoaW5nLm9yZzsNCj4gYmVuaEBrZXJuZWwuY3Jhc2hpbmcub3JnDQo+IFN1YmplY3Q6IFJlOiBb
UEFUQ0ggNS81IHYxMV0gaW9tbXUvZnNsOiBGcmVlc2NhbGUgUEFNVSBkcml2ZXIgYW5kIGlvbW11
DQo+IGltcGxlbWVudGF0aW9uLg0KPiANCj4gT24gVHVlLCAyMDEzLTA0LTAyIGF0IDE4OjE4ICsw
MjAwLCBKb2VyZyBSb2VkZWwgd3JvdGU6DQo+ID4gQ2MnaW5nIEFsZXggV2lsbGlhbXNvbg0KPiA+
DQo+ID4gQWxleCwgY2FuIHlvdSBwbGVhc2UgcmV2aWV3IHRoZSBpb21tdS1ncm91cCBwYXJ0IG9m
IHRoaXMgcGF0Y2g/DQo+IA0KPiBTdXJlLCBpdCBsb29rcyBwcmV0dHkgcmVhc29uYWJsZS4gIEFJ
VUksIGFsbCBQQ0kgZGV2aWNlcyBhcmUgYmVsb3cgc29tZQ0KPiBraW5kIG9mIGhvc3QgYnJpZGdl
IHRoYXQgaXMgZWl0aGVyIG5ldyBhbmQgc3VwcG9ydHMgcGFydGl0aW9uaW5nIG9yIG9sZA0KPiBh
bmQgZG9lc24ndC4gIEkgZG9uJ3Qga25vdyBpZiB0aGF0J3MgYSB2aXNpYmlsaXR5IG9yIGlzb2xh
dGlvbg0KPiByZXF1aXJlbWVudCwgcGVyaGFwcyBQQ0kgQUNTLWlzaC4gIEluIHRoZSBuZXcgaG9z
dCBicmlkZ2UgY2FzZSwgZWFjaA0KPiBkZXZpY2UgZ2V0cyBhIGdyb3VwLiAgVGhpcyBzZWVtcyBu
b3QgdG8gaGF2ZSBhbnkgcXVpcmtzIGZvciBtdWx0aWZ1bmN0aW9uDQo+IGRldmljZXMgdGhvdWdo
LiAgT24gQU1EIGFuZCBJbnRlbCBJT01NVXMgd2UgdGVzdCBtdWx0aWZ1bmN0aW9uIGRldmljZSBB
Q1MNCj4gc3VwcG9ydCB0byBkZXRlcm1pbmUgd2hldGhlciBhbGwgdGhlIGZ1bmN0aW9ucyBzaG91
bGQgYmUgaW4gdGhlIHNhbWUNCj4gZ3JvdXAuICBJcyB0aGVyZSBhbnkgcmVhc29uIHRvIHRydXN0
IG11bHRpZnVuY3Rpb24gZGV2aWNlcyBvbiBQQU1VPw0KPiANCltTZXRoaSBWYXJ1bi1CMTYzOTVd
IEluIHRoZSBjYXNlIHdoZXJlIHdlIGNhbiBwYXJ0aXRpb24gZW5kcG9pbnRzIHdlIGNhbiBkaXN0
aW5ndWlzaCB0cmFuc2FjdGlvbnMgYmFzZWQgb24gdGhlIGJ1cyxkZXZpY2UsZnVuY3Rpb24gbnVt
YmVyIGNvbWJpbmF0aW9uLiBUaGlzIHN1cHBvcnQgaXMgYXZhaWxhYmxlIGluIHRoZSBQQ0llIGNv
bnRyb2xsZXIgKGhvc3QgYnJpZGdlKS4NCg0KPiBJIGFsc28gZmluZCBpdCBjdXJpb3VzIHdoYXQg
aGFwcGVucyB0byB0aGUgaW9tbXUgZ3JvdXAgb2YgdGhlIGhvc3QNCj4gYnJpZGdlLiAgSW4gdGhl
IHBhcnRpdGlvbmFibGUgY2FzZSB0aGUgaG9zdCBicmlkZ2UgZ3JvdXAgaXMgcmVtb3ZlZCwgaW4N
Cj4gdGhlIG5vbi1wYXJ0aXRpb25hYmxlIGNhc2UgdGhlIGhvc3QgYnJpZGdlIGdyb3VwIGJlY29t
ZXMgdGhlIGdyb3VwIGZvcg0KPiB0aGUgY2hpbGRyZW4sIHJlbW92aW5nIHRoZSBob3N0IGJyaWRn
ZS4gIEl0J3MgdW5pcXVlIHRvIFBBTVUgc28gZmFyIHRoYXQNCj4gdGhlc2UgaG9zdCBicmlkZ2Vz
IGFyZSBldmVuIGluIGFuIGlvbW11IGdyb3VwICh4ODYgb25seSBhZGRzIHBjaQ0KPiBkZXZpY2Vz
KSwgYnV0IEkgZG9uJ3Qgc2VlIGl0IGFzIG5lY2Vzc2FyaWx5IHdyb25nIGxlYXZpbmcgaXQgaW4g
ZWl0aGVyDQo+IHNjZW5hcmlvLiAgRG9lcyBpdCBzb2x2ZSBzb21lIHByb2JsZW0gdG8gcmVtb3Zl
IHRoZW0gZnJvbSB0aGUgZ3JvdXBzPw0KPiBUaGFua3MsDQpbU2V0aGkgVmFydW4tQjE2Mzk1XSBU
aGUgUENJZSBjb250cm9sbGVyIGlzbid0IGEgcGFydGl0aW9uYWJsZSBlbnRpdHksIGl0IHdvdWxk
IGFsd2F5cyBiZSBvd25lZCBieSB0aGUgaG9zdC4NCg0KLVZhcnVuDQoNCg==
^ permalink raw reply
* [PATCH 18/18] cpufreq: powerpc/platforms/cell: move cpufreq driver to drivers/cpufreq
From: Viresh Kumar @ 2013-04-04 12:54 UTC (permalink / raw)
To: rjw
Cc: robin.randhawa, Arnd Bergmann, linux-pm, Viresh Kumar, patches,
Liviu.Dudau, linux-kernel, cpufreq, Steve.Bannister,
arvind.chauhan, linuxppc-dev, linaro-kernel, charles.garcia-tobin
In-Reply-To: <cover.1365079581.git.viresh.kumar@linaro.org>
This patch moves cpufreq driver of powerpc platforms/cell to drivers/cpufreq.
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
Compile Tested only.
arch/powerpc/platforms/cell/Kconfig | 26 +---------------------
arch/powerpc/platforms/cell/Makefile | 3 ---
drivers/cpufreq/Kconfig.powerpc | 18 +++++++++++++++
drivers/cpufreq/Makefile | 3 +++
.../cpufreq/ppc_cbe_cpufreq.c | 2 +-
.../cpufreq/ppc_cbe_cpufreq_pervasive.c | 3 +--
.../cpufreq/ppc_cbe_cpufreq_pmi.c | 3 +--
.../linux/ppc_cbe_cpufreq.h | 4 ++--
8 files changed, 27 insertions(+), 35 deletions(-)
rename arch/powerpc/platforms/cell/cbe_cpufreq.c => drivers/cpufreq/ppc_cbe_cpufreq.c (99%)
rename arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c => drivers/cpufreq/ppc_cbe_cpufreq_pervasive.c (98%)
rename arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c => drivers/cpufreq/ppc_cbe_cpufreq_pmi.c (99%)
rename arch/powerpc/platforms/cell/cbe_cpufreq.h => include/linux/ppc_cbe_cpufreq.h (82%)
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index 53aaefe..9978f59 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -113,34 +113,10 @@ config CBE_THERM
default m
depends on CBE_RAS && SPU_BASE
-config CBE_CPUFREQ
- tristate "CBE frequency scaling"
- depends on CBE_RAS && CPU_FREQ
- default m
- help
- This adds the cpufreq driver for Cell BE processors.
- For details, take a look at <file:Documentation/cpu-freq/>.
- If you don't have such processor, say N
-
-config CBE_CPUFREQ_PMI_ENABLE
- bool "CBE frequency scaling using PMI interface"
- depends on CBE_CPUFREQ
- default n
- help
- Select this, if you want to use the PMI interface
- to switch frequencies. Using PMI, the
- processor will not only be able to run at lower speed,
- but also at lower core voltage.
-
-config CBE_CPUFREQ_PMI
- tristate
- depends on CBE_CPUFREQ_PMI_ENABLE
- default CBE_CPUFREQ
-
config PPC_PMI
tristate
default y
- depends on CBE_CPUFREQ_PMI || PPC_IBM_CELL_POWERBUTTON
+ depends on CPU_FREQ_CBE_PMI || PPC_IBM_CELL_POWERBUTTON
help
PMI (Platform Management Interrupt) is a way to
communicate with the BMC (Baseboard Management Controller).
diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile
index a4a8935..fe053e7 100644
--- a/arch/powerpc/platforms/cell/Makefile
+++ b/arch/powerpc/platforms/cell/Makefile
@@ -5,9 +5,6 @@ obj-$(CONFIG_PPC_CELL_NATIVE) += iommu.o setup.o spider-pic.o \
obj-$(CONFIG_CBE_RAS) += ras.o
obj-$(CONFIG_CBE_THERM) += cbe_thermal.o
-obj-$(CONFIG_CBE_CPUFREQ_PMI) += cbe_cpufreq_pmi.o
-obj-$(CONFIG_CBE_CPUFREQ) += cbe-cpufreq.o
-cbe-cpufreq-y += cbe_cpufreq_pervasive.o cbe_cpufreq.o
obj-$(CONFIG_CBE_CPUFREQ_SPU_GOVERNOR) += cpufreq_spudemand.o
obj-$(CONFIG_PPC_IBM_CELL_POWERBUTTON) += cbe_powerbutton.o
diff --git a/drivers/cpufreq/Kconfig.powerpc b/drivers/cpufreq/Kconfig.powerpc
index 2e5a007..9e1ae94 100644
--- a/drivers/cpufreq/Kconfig.powerpc
+++ b/drivers/cpufreq/Kconfig.powerpc
@@ -1,3 +1,21 @@
+config CPU_FREQ_CBE
+ tristate "CBE frequency scaling"
+ depends on CBE_RAS && PPC_CELL
+ default m
+ help
+ This adds the cpufreq driver for Cell BE processors.
+ For details, take a look at <file:Documentation/cpu-freq/>.
+ If you don't have such processor, say N
+
+config CPU_FREQ_CBE_PMI
+ bool "CBE frequency scaling using PMI interface"
+ depends on CPU_FREQ_CBE
+ default n
+ help
+ Select this, if you want to use the PMI interface to switch
+ frequencies. Using PMI, the processor will not only be able to run at
+ lower speed, but also at lower core voltage.
+
config CPU_FREQ_MAPLE
bool "Support for Maple 970FX Evaluation Board"
depends on PPC_MAPLE
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 7b16092f..bfefa22 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -78,6 +78,9 @@ obj-$(CONFIG_ARCH_TEGRA) += tegra-cpufreq.o
##################################################################################
# PowerPC platform drivers
+obj-$(CONFIG_CPU_FREQ_CBE) += ppc-cbe-cpufreq.o
+ppc-cbe-cpufreq-y += ppc_cbe_cpufreq_pervasive.o ppc_cbe_cpufreq.o
+obj-$(CONFIG_CPU_FREQ_CBE_PMI) += ppc_cbe_cpufreq_pmi.o
obj-$(CONFIG_CPU_FREQ_MAPLE) += maple-cpufreq.o
obj-$(CONFIG_CPU_FREQ_PMAC) += pmac32-cpufreq.o
obj-$(CONFIG_CPU_FREQ_PMAC64) += pmac64-cpufreq.o
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq.c b/drivers/cpufreq/ppc_cbe_cpufreq.c
similarity index 99%
rename from arch/powerpc/platforms/cell/cbe_cpufreq.c
rename to drivers/cpufreq/ppc_cbe_cpufreq.c
index 718c6a3..6d7a51d 100644
--- a/arch/powerpc/platforms/cell/cbe_cpufreq.c
+++ b/drivers/cpufreq/ppc_cbe_cpufreq.c
@@ -23,11 +23,11 @@
#include <linux/cpufreq.h>
#include <linux/module.h>
#include <linux/of_platform.h>
+#include <linux/ppc_cbe_cpufreq.h>
#include <asm/machdep.h>
#include <asm/prom.h>
#include <asm/cell-regs.h>
-#include "cbe_cpufreq.h"
static DEFINE_MUTEX(cbe_switch_mutex);
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c b/drivers/cpufreq/ppc_cbe_cpufreq_pervasive.c
similarity index 98%
rename from arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c
rename to drivers/cpufreq/ppc_cbe_cpufreq_pervasive.c
index 20472e4..6708710 100644
--- a/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c
+++ b/drivers/cpufreq/ppc_cbe_cpufreq_pervasive.c
@@ -25,13 +25,12 @@
#include <linux/io.h>
#include <linux/kernel.h>
+#include <linux/ppc_cbe_cpufreq.h>
#include <linux/time.h>
#include <asm/machdep.h>
#include <asm/hw_irq.h>
#include <asm/cell-regs.h>
-#include "cbe_cpufreq.h"
-
/* to write to MIC register */
static u64 MIC_Slow_Fast_Timer_table[] = {
[0 ... 7] = 0x007fc00000000000ull,
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
similarity index 99%
rename from arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c
rename to drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
index 60a07a4..448b888 100644
--- a/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c
+++ b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
@@ -25,6 +25,7 @@
#include <linux/timer.h>
#include <linux/module.h>
#include <linux/of_platform.h>
+#include <linux/ppc_cbe_cpufreq.h>
#include <asm/processor.h>
#include <asm/prom.h>
@@ -35,8 +36,6 @@
#include <asm/time.h>
#endif
-#include "cbe_cpufreq.h"
-
static u8 pmi_slow_mode_limit[MAX_CBE];
bool cbe_cpufreq_has_pmi = false;
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq.h b/include/linux/ppc_cbe_cpufreq.h
similarity index 82%
rename from arch/powerpc/platforms/cell/cbe_cpufreq.h
rename to include/linux/ppc_cbe_cpufreq.h
index c1d86bf..b4c00a5 100644
--- a/arch/powerpc/platforms/cell/cbe_cpufreq.h
+++ b/include/linux/ppc_cbe_cpufreq.h
@@ -1,5 +1,5 @@
/*
- * cbe_cpufreq.h
+ * ppc_cbe_cpufreq.h
*
* This file contains the definitions used by the cbe_cpufreq driver.
*
@@ -17,7 +17,7 @@ int cbe_cpufreq_get_pmode(int cpu);
int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode);
-#if defined(CONFIG_CBE_CPUFREQ_PMI) || defined(CONFIG_CBE_CPUFREQ_PMI_MODULE)
+#if defined(CONFIG_CPU_FREQ_CBE_PMI) || defined(CONFIG_CPU_FREQ_CBE_PMI_MODULE)
extern bool cbe_cpufreq_has_pmi;
#else
#define cbe_cpufreq_has_pmi (0)
--
1.7.12.rc2.18.g61b472e
^ permalink raw reply related
* [PATCH 17/18] cpufreq: powerpc: move cpufreq driver to drivers/cpufreq
From: Viresh Kumar @ 2013-04-04 12:54 UTC (permalink / raw)
To: rjw
Cc: robin.randhawa, linux-pm, Viresh Kumar, patches, Liviu.Dudau,
linux-kernel, cpufreq, Steve.Bannister, Paul Mackerras,
Olof Johansson, arvind.chauhan, linuxppc-dev, linaro-kernel,
charles.garcia-tobin
In-Reply-To: <cover.1365079581.git.viresh.kumar@linaro.org>
This patch moves cpufreq driver of powerpc platform to drivers/cpufreq.
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Olof Johansson <olof@lixom.net>
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
Compile Tested only.
arch/powerpc/platforms/Kconfig | 31 ----------------------
arch/powerpc/platforms/pasemi/Makefile | 1 -
arch/powerpc/platforms/powermac/Makefile | 2 --
drivers/cpufreq/Kconfig.powerpc | 26 ++++++++++++++++++
drivers/cpufreq/Makefile | 3 +++
.../cpufreq.c => drivers/cpufreq/pasemi-cpufreq.c | 0
.../cpufreq/pmac32-cpufreq.c | 0
.../cpufreq/pmac64-cpufreq.c | 0
8 files changed, 29 insertions(+), 34 deletions(-)
rename arch/powerpc/platforms/pasemi/cpufreq.c => drivers/cpufreq/pasemi-cpufreq.c (100%)
rename arch/powerpc/platforms/powermac/cpufreq_32.c => drivers/cpufreq/pmac32-cpufreq.c (100%)
rename arch/powerpc/platforms/powermac/cpufreq_64.c => drivers/cpufreq/pmac64-cpufreq.c (100%)
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 52de8bc..46a223f 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -194,37 +194,6 @@ config PPC_IO_WORKAROUNDS
source "drivers/cpufreq/Kconfig"
-menu "CPU Frequency drivers"
- depends on CPU_FREQ
-
-config CPU_FREQ_PMAC
- bool "Support for Apple PowerBooks"
- depends on ADB_PMU && PPC32
- select CPU_FREQ_TABLE
- help
- This adds support for frequency switching on Apple PowerBooks,
- this currently includes some models of iBook & Titanium
- PowerBook.
-
-config CPU_FREQ_PMAC64
- bool "Support for some Apple G5s"
- depends on PPC_PMAC && PPC64
- select CPU_FREQ_TABLE
- help
- This adds support for frequency switching on Apple iMac G5,
- and some of the more recent desktop G5 machines as well.
-
-config PPC_PASEMI_CPUFREQ
- bool "Support for PA Semi PWRficient"
- depends on PPC_PASEMI
- default y
- select CPU_FREQ_TABLE
- help
- This adds the support for frequency switching on PA Semi
- PWRficient processors.
-
-endmenu
-
menu "CPUIdle driver"
source "drivers/cpuidle/Kconfig"
diff --git a/arch/powerpc/platforms/pasemi/Makefile b/arch/powerpc/platforms/pasemi/Makefile
index ce6d789..8e8d4ca 100644
--- a/arch/powerpc/platforms/pasemi/Makefile
+++ b/arch/powerpc/platforms/pasemi/Makefile
@@ -1,3 +1,2 @@
obj-y += setup.o pci.o time.o idle.o powersave.o iommu.o dma_lib.o misc.o
obj-$(CONFIG_PPC_PASEMI_MDIO) += gpio_mdio.o
-obj-$(CONFIG_PPC_PASEMI_CPUFREQ) += cpufreq.o
diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile
index ea47df6..52c6ce1 100644
--- a/arch/powerpc/platforms/powermac/Makefile
+++ b/arch/powerpc/platforms/powermac/Makefile
@@ -9,8 +9,6 @@ obj-y += pic.o setup.o time.o feature.o pci.o \
sleep.o low_i2c.o cache.o pfunc_core.o \
pfunc_base.o udbg_scc.o udbg_adb.o
obj-$(CONFIG_PMAC_BACKLIGHT) += backlight.o
-obj-$(CONFIG_CPU_FREQ_PMAC) += cpufreq_32.o
-obj-$(CONFIG_CPU_FREQ_PMAC64) += cpufreq_64.o
# CONFIG_NVRAM is an arch. independent tristate symbol, for pmac32 we really
# need this to be a bool. Cheat here and pretend CONFIG_NVRAM=m is really
# CONFIG_NVRAM=y
diff --git a/drivers/cpufreq/Kconfig.powerpc b/drivers/cpufreq/Kconfig.powerpc
index e76992f..2e5a007 100644
--- a/drivers/cpufreq/Kconfig.powerpc
+++ b/drivers/cpufreq/Kconfig.powerpc
@@ -5,3 +5,29 @@ config CPU_FREQ_MAPLE
help
This adds support for frequency switching on Maple 970FX
Evaluation Board and compatible boards (IBM JS2x blades).
+
+config CPU_FREQ_PMAC
+ bool "Support for Apple PowerBooks"
+ depends on ADB_PMU && PPC32
+ select CPU_FREQ_TABLE
+ help
+ This adds support for frequency switching on Apple PowerBooks,
+ this currently includes some models of iBook & Titanium
+ PowerBook.
+
+config CPU_FREQ_PMAC64
+ bool "Support for some Apple G5s"
+ depends on PPC_PMAC && PPC64
+ select CPU_FREQ_TABLE
+ help
+ This adds support for frequency switching on Apple iMac G5,
+ and some of the more recent desktop G5 machines as well.
+
+config PPC_PASEMI_CPUFREQ
+ bool "Support for PA Semi PWRficient"
+ depends on PPC_PASEMI
+ select CPU_FREQ_TABLE
+ default y
+ help
+ This adds the support for frequency switching on PA Semi
+ PWRficient processors.
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index db96d74..7b16092f 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -79,6 +79,9 @@ obj-$(CONFIG_ARCH_TEGRA) += tegra-cpufreq.o
##################################################################################
# PowerPC platform drivers
obj-$(CONFIG_CPU_FREQ_MAPLE) += maple-cpufreq.o
+obj-$(CONFIG_CPU_FREQ_PMAC) += pmac32-cpufreq.o
+obj-$(CONFIG_CPU_FREQ_PMAC64) += pmac64-cpufreq.o
+obj-$(CONFIG_PPC_PASEMI_CPUFREQ) += pasemi-cpufreq.o
##################################################################################
# Other platform drivers
diff --git a/arch/powerpc/platforms/pasemi/cpufreq.c b/drivers/cpufreq/pasemi-cpufreq.c
similarity index 100%
rename from arch/powerpc/platforms/pasemi/cpufreq.c
rename to drivers/cpufreq/pasemi-cpufreq.c
diff --git a/arch/powerpc/platforms/powermac/cpufreq_32.c b/drivers/cpufreq/pmac32-cpufreq.c
similarity index 100%
rename from arch/powerpc/platforms/powermac/cpufreq_32.c
rename to drivers/cpufreq/pmac32-cpufreq.c
diff --git a/arch/powerpc/platforms/powermac/cpufreq_64.c b/drivers/cpufreq/pmac64-cpufreq.c
similarity index 100%
rename from arch/powerpc/platforms/powermac/cpufreq_64.c
rename to drivers/cpufreq/pmac64-cpufreq.c
--
1.7.12.rc2.18.g61b472e
^ permalink raw reply related
* Re: [PATCH -V5 00/25] THP support for PPC64
From: Aneesh Kumar K.V @ 2013-04-04 8:38 UTC (permalink / raw)
To: Simon Jeons; +Cc: paulus, linuxppc-dev, linux-mm
In-Reply-To: <515D1A2C.1000606@gmail.com>
Simon Jeons <simon.jeons@gmail.com> writes:
> Hi Aneesh,
> On 04/04/2013 01:57 PM, Aneesh Kumar K.V wrote:
>> Hi,
>>
>> This patchset adds transparent hugepage support for PPC64.
>>
>> TODO:
>> * hash preload support in update_mmu_cache_pmd (we don't do that for hugetlb)
>>
>> Some numbers:
>>
>> The latency measurements code from Anton found at
>> http://ozlabs.org/~anton/junkcode/latency2001.c
>>
>> THP disabled 64K page size
>> ------------------------
>> [root@llmp24l02 ~]# ./latency2001 8G
>> 8589934592 731.73 cycles 205.77 ns
>> [root@llmp24l02 ~]# ./latency2001 8G
>> 8589934592 743.39 cycles 209.05 ns
>
> Could you explain what's the meaning of result?
>
That is the total memory range, cycles taken to access an address and
time taken to access. That numbers shows the overhead of tlb miss.
you can find the source at http://ozlabs.org/~anton/junkcode/latency2001.c
-aneesh
^ permalink raw reply
* Re: [PATCH v2 2/11] Add PRRN Event Handler
From: Benjamin Herrenschmidt @ 2013-04-04 7:16 UTC (permalink / raw)
To: Paul Mackerras; +Cc: Nathan Fontenot, linuxppc-dev
In-Reply-To: <20130404033436.GC19443@drongo>
On Thu, 2013-04-04 at 14:34 +1100, Paul Mackerras wrote:
> Also, rtasd isn't actually a task, it's just a function that gets run
> via schedule_delayed_work_on() and re-schedules itself each time it
> runs. Is there any deadlock possibility in calling flush_work from a
> work function?
There used to be, but I'm not familiar with the "new" implementation of
the work queue stuff.
Cheers,
Ben.
^ permalink raw reply
* RE: [PATCH 4/5] powerpc/fsl-booke: Add B4_QDS board support
From: Leekha Shaveta-B20052 @ 2013-04-04 7:10 UTC (permalink / raw)
To: Kumar Gala; +Cc: linuxppc-dev@lists.ozlabs.org
In-Reply-To: <13F7FFD3-2CFB-463F-84D2-2613D2B697E1@kernel.crashing.org>
-----Original Message-----
From: Kumar Gala [mailto:galak@kernel.crashing.org]=20
Sent: Wednesday, April 03, 2013 10:12 PM
To: Leekha Shaveta-B20052
Cc: linuxppc-dev@lists.ozlabs.org
Subject: Re: [PATCH 4/5] powerpc/fsl-booke: Add B4_QDS board support
On Apr 2, 2013, at 2:16 AM, Shaveta Leekha wrote:
> - Add support for B4 board in board file b4_qds.c, It is common for=20
> B4860, B4420 and B4220QDS as they share same QDS board
> - Add B4QDS support in Kconfig and Makefile
>=20
> B4860QDS is a high-performance computing evaluation, development and=20
> test platform supporting the B4860 QorIQ Power Architecture processor,=20
> with following major features:
>=20
> - Four dual-threaded e6500 Power Architecture processors
> organized in one cluster-each core runs up to 1.8 GHz
> - Two DDR3/3L controllers for high-speed memory interface each
> runs at up to 1866.67 MHz
> - CoreNet fabric that fully supports coherency using MESI protocol
> between the e6500 cores, SC3900 FVP cores, memories and
> external interfaces.
> - Data Path Acceleration Architecture having FMAN, QMan, BMan, SEC 5.3 =
and RMAN
> - Large internal cache memory with snooping and stashing capabilities
> - Sixteen 10-GHz SerDes lanes that serve:
> - Two SRIO interfaces. Each supports up to 4 lanes and
> a total of up to 8 lanes
> - Up to 8-lanes Common Public Radio Interface (CPRI) controller
> for glue-less antenna connection
> - Two 10-Gbit Ethernet controllers (10GEC)
> - Six 1G/2.5-Gbit Ethernet controllers for network communications
> - PCI Express controller
> - Debug (Aurora)
> - Various system peripherals
>=20
> B4420 and B4220 have some differences in comparison to B4860 with=20
> fewer core/clusters(both SC3900 and e6500), fewer DDR controllers, fewer =
serdes lanes, fewer SGMII interfaces and reduced target frequencies.
>=20
> Key differences between B4860 and B4420:
> B4420 has:
> - Fewer e6500 cores:
> 1 cluster with 2 e6500 cores
> - Fewer SC3900 cores/clusters:
> 1 cluster with 2 SC3900 cores per cluster
> - Single DDRC @ 1.6GHz
> - 2 X 4 lane serdes
> - 3 SGMII interfaces
> - no sRIO
> - no 10G
>=20
> Key differences between B4860 and B4220:
> B4220 has:
> - Fewer e6500 cores:
> 1 cluster with 1 e6500 core
> - Fewer SC3900 cores/clusters:
> 1 cluster with 2 SC3900 cores per cluster
> - Single DDRC @ 1.33GHz
> - 2 X 2 lane serdes
> - 2 SGMII interfaces
> - no sRIO
> - no 10G
>=20
> Signed-off-by: Shaveta Leekha <shaveta@freescale.com>
> ---
> arch/powerpc/platforms/85xx/Kconfig | 17 ++++++
> arch/powerpc/platforms/85xx/Makefile | 1 +
> arch/powerpc/platforms/85xx/b4_qds.c | 102=20
> ++++++++++++++++++++++++++++++++++
> 3 files changed, 120 insertions(+), 0 deletions(-) create mode 100644=20
> arch/powerpc/platforms/85xx/b4_qds.c
commit messages should line wrap at 75 chars.
- k
[SL] Ok, will do that.
Regards,
Shaveta=20
^ permalink raw reply
* RE: [PATCH 2/5] powerpc/fsl-booke: Add initial silicon device tree files for B4860 and B4420
From: Leekha Shaveta-B20052 @ 2013-04-04 7:10 UTC (permalink / raw)
To: Kumar Gala
Cc: Li Yang-R58472, Zhao Chenhui-B35336, Mehresh Ramneek-B31383,
Garg Vakul-B16394, Lian Minghuan-B31939, Tang Yuantian-B29983,
Fleming Andy-AFLEMING, Sethi Varun-B16395,
linuxppc-dev@lists.ozlabs.org
In-Reply-To: <2E7EAABC-5A34-46D2-B4C8-80081AC1563F@kernel.crashing.org>
-----Original Message-----
From: Kumar Gala [mailto:galak@kernel.crashing.org]=20
Sent: Wednesday, April 03, 2013 10:10 PM
To: Leekha Shaveta-B20052
Cc: linuxppc-dev@lists.ozlabs.org; Zhao Chenhui-B35336; Li Yang-R58472; Tan=
g Yuantian-B29983; Sethi Varun-B16395; Lian Minghuan-B31939; Mehresh Ramnee=
k-B31383; Fleming Andy-AFLEMING; Garg Vakul-B16394
Subject: Re: [PATCH 2/5] powerpc/fsl-booke: Add initial silicon device tree=
files for B4860 and B4420
On Apr 2, 2013, at 2:16 AM, Shaveta Leekha wrote:
> B4860 and B4420 are similar that share some commonalities
>=20
> * common features have been added in b4si-pre.dtsi and b4si-post.dtsi
> * differences are added in respective silicon files of B4860 and B4420
What are the differences between B4860 & B4420, beyond # of cores?
[SL] have detailed the differences in board support patch sent in this patc=
h set.
Do I need to mention the differences here also?=20
>=20
> There are several things missing from the device trees of B4860 and B4420=
:
>=20
> * DPAA related nodes (Qman, Bman, Fman, Rman)
> * DSP related nodes/information
What about:
serdes, sfp [security fuse processor], thermal, gpio, maple, cpri, quad tim=
ers,=20
[SL] I would prefer to add, what have been added in device tree so far in p=
atch description
As that is clear to me.
But as u suggested, I mentioned some of the nodes/things missing, though th=
e list is not
Exhaustive. Also I am not sure of, what would be added/required in future i=
n these device tree files.
Anyways, I can add all the things you have mentioned above.
Please tell if anything else is missing.
Regards,
Shaveta
>=20
> Signed-off-by: Shaveta Leekha <shaveta@freescale.com>
> Signed-off-by: Zhao Chenhui <chenhui.zhao@freescale.com>
> Signed-off-by: Li Yang <leoli@freescale.com>
> Signed-off-by: Tang Yuantian <Yuantian.Tang@freescale.com>
> Signed-off-by: Varun Sethi <Varun.Sethi@freescale.com>
> Signed-off-by: Minghuan Lian <Minghuan.Lian@freescale.com>
> Signed-off-by: Ramneek Mehresh <ramneek.mehresh@freescale.com>
> Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
> Signed-off-by: Andy Fleming <afleming@freescale.com>
> Signed-off-by: Vakul Garg <vakul@freescale.com>
> ---
> arch/powerpc/boot/dts/fsl/b4420si-post.dtsi | 94 ++++++++++
> arch/powerpc/boot/dts/fsl/b4420si-pre.dtsi | 49 +++++
> arch/powerpc/boot/dts/fsl/b4860si-post.dtsi | 138 ++++++++++++++
> arch/powerpc/boot/dts/fsl/b4860si-pre.dtsi | 59 ++++++
> arch/powerpc/boot/dts/fsl/b4si-post.dtsi | 262 ++++++++++++++++++++++=
+++++
> arch/powerpc/boot/dts/fsl/b4si-pre.dtsi | 65 +++++++
Remove b4si-pre.dtsi, there isn't enough here to warrant not just merging i=
t into b4420si-pre.dtsi & b4860si-pre.dtsi
> 6 files changed, 667 insertions(+), 0 deletions(-) create mode 100644=20
> arch/powerpc/boot/dts/fsl/b4420si-post.dtsi
> create mode 100644 arch/powerpc/boot/dts/fsl/b4420si-pre.dtsi
> create mode 100644 arch/powerpc/boot/dts/fsl/b4860si-post.dtsi
> create mode 100644 arch/powerpc/boot/dts/fsl/b4860si-pre.dtsi
> create mode 100644 arch/powerpc/boot/dts/fsl/b4si-post.dtsi
> create mode 100644 arch/powerpc/boot/dts/fsl/b4si-pre.dtsi
>=20
> diff --git a/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi=20
> b/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi
> new file mode 100644
> index 0000000..bba0c03
> --- /dev/null
> +++ b/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi
> @@ -0,0 +1,94 @@
> +/*
> + * B4420 Silicon/SoC Device Tree Source (post include)
> + *
> + * Copyright 2012 Freescale Semiconductor, Inc.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions ar=
e met:
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyrig=
ht
> + * notice, this list of conditions and the following disclaimer in=
the
> + * documentation and/or other materials provided with the distribu=
tion.
> + * * Neither the name of Freescale Semiconductor nor the
> + * names of its contributors may be used to endorse or promote pro=
ducts
> + * derived from this software without specific prior written permi=
ssion.
> + *
> + *
> + * ALTERNATIVELY, this software may be distributed under the terms of=20
> +the
> + * GNU General Public License ("GPL") as published by the Free=20
> +Software
> + * Foundation, either version 2 of that License or (at your option)=20
> +any
> + * later version.
> + *
> + * This software is provided by Freescale Semiconductor "as is" and=20
> +any
> + * express or implied warranties, including, but not limited to, the=20
> +implied
> + * warranties of merchantability and fitness for a particular purpose=20
> +are
> + * disclaimed. In no event shall Freescale Semiconductor be liable=20
> +for any
> + * direct, indirect, incidental, special, exemplary, or consequential=20
> +damages
> + * (including, but not limited to, procurement of substitute goods or=20
> +services;
> + * loss of use, data, or profits; or business interruption) however=20
> +caused and
> + * on any theory of liability, whether in contract, strict liability,=20
> +or tort
> + * (including negligence or otherwise) arising in any way out of the=20
> +use of
> + * this software, even if advised of the possibility of such damage.
> + */
> +
> +/include/ "b4si-post.dtsi"
> +
> +/* controller at 0x200000 */
> +&pci0 {
> + compatible =3D "fsl,b4420-pcie", "fsl,qoriq-pcie-v2.4"; };
> +
> +&dcsr {
> + dcsr-epu@0 {
> + compatible =3D "fsl,b4420-dcsr-epu", "fsl,dcsr-epu";
> + };
> + dcsr-npc {
> + compatible =3D "fsl,b4420-dcsr-cnpc", "fsl,dcsr-cnpc";
> + };
> + dcsr-dpaa@9000 {
> + compatible =3D "fsl,b4420-dcsr-dpaa", "fsl,dcsr-dpaa";
> + };
> + dcsr-ocn@11000 {
> + compatible =3D "fsl,b4420-dcsr-ocn", "fsl,dcsr-ocn";
> + };
> + dcsr-nal@18000 {
> + compatible =3D "fsl,b4420-dcsr-nal", "fsl,dcsr-nal";
> + };
> + dcsr-rcpm@22000 {
> + compatible =3D "fsl,b4420-dcsr-rcpm", "fsl,dcsr-rcpm";
> + };
> + dcsr-snpc@30000 {
> + compatible =3D "fsl,b4420-dcsr-snpc", "fsl,dcsr-snpc";
> + };
> + dcsr-snpc@31000 {
> + compatible =3D "fsl,b4420-dcsr-snpc", "fsl,dcsr-snpc";
> + };
> + dcsr-cpu-sb-proxy@108000 {
> + compatible =3D "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
> + cpu-handle =3D <&cpu1>;
> + reg =3D <0x108000 0x1000 0x109000 0x1000>;
> + };
> +};
> +
> +&soc {
> + cpc: l3-cache-controller@10000 {
> + compatible =3D "fsl,b4420-l3-cache-controller", "cache";
> + };
> +
> + corenet-cf@18000 {
> + compatible =3D "fsl,b4420-corenet-cf";
> + };
> +
> + guts: global-utilities@e0000 {
> + compatible =3D "fsl,b4420-device-config", "fsl,qoriq-device-config-2.0=
";
> + };
> +
> + clockgen: global-utilities@e1000 {
> + compatible =3D "fsl,b4420-clockgen", "fsl,qoriq-clockgen-2";
> + };
> +
> + L2: l2-cache-controller@c20000 {
> + compatible =3D "fsl,b4420-l2-cache-controller";
> + };
> +};
> diff --git a/arch/powerpc/boot/dts/fsl/b4420si-pre.dtsi=20
> b/arch/powerpc/boot/dts/fsl/b4420si-pre.dtsi
> new file mode 100644
> index 0000000..555b0e4
> --- /dev/null
> +++ b/arch/powerpc/boot/dts/fsl/b4420si-pre.dtsi
> @@ -0,0 +1,49 @@
> +/*
> + * B4420 Silicon/SoC Device Tree Source (pre include)
> + *
> + * Copyright 2012 Freescale Semiconductor, Inc.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions ar=
e met:
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyrig=
ht
> + * notice, this list of conditions and the following disclaimer in=
the
> + * documentation and/or other materials provided with the distribu=
tion.
> + * * Neither the name of Freescale Semiconductor nor the
> + * names of its contributors may be used to endorse or promote pro=
ducts
> + * derived from this software without specific prior written permi=
ssion.
> + *
> + *
> + * ALTERNATIVELY, this software may be distributed under the terms of=20
> +the
> + * GNU General Public License ("GPL") as published by the Free=20
> +Software
> + * Foundation, either version 2 of that License or (at your option)=20
> +any
> + * later version.
> + *
> + * This software is provided by Freescale Semiconductor "as is" and=20
> +any
> + * express or implied warranties, including, but not limited to, the=20
> +implied
> + * warranties of merchantability and fitness for a particular purpose=20
> +are
> + * disclaimed. In no event shall Freescale Semiconductor be liable=20
> +for any
> + * direct, indirect, incidental, special, exemplary, or consequential=20
> +damages
> + * (including, but not limited to, procurement of substitute goods or=20
> +services;
> + * loss of use, data, or profits; or business interruption) however=20
> +caused and
> + * on any theory of liability, whether in contract, strict liability,=20
> +or tort
> + * (including negligence or otherwise) arising in any way out of the=20
> +use of
> + * this software, even if advised of the possibility of such damage.
> + */
> +
> +/dts-v1/;
> +
> +/include/ "b4si-pre.dtsi"
> +
> +/ {
> + compatible =3D "fsl,B4420";
> +
> + cpus {
> + cpu1: PowerPC,e6500@1 {
> + device_type =3D "cpu";
> + reg =3D <2 3>;
> + next-level-cache =3D <&L2>;
> + };
> + };
> +};
> diff --git a/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi=20
> b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi
> new file mode 100644
> index 0000000..f43910f
> --- /dev/null
> +++ b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi
> @@ -0,0 +1,138 @@
> +/*
> + * B4860 Silicon/SoC Device Tree Source (post include)
> + *
> + * Copyright 2012 Freescale Semiconductor Inc.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions ar=
e met:
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyrig=
ht
> + * notice, this list of conditions and the following disclaimer in=
the
> + * documentation and/or other materials provided with the distribu=
tion.
> + * * Neither the name of Freescale Semiconductor nor the
> + * names of its contributors may be used to endorse or promote pro=
ducts
> + * derived from this software without specific prior written permi=
ssion.
> + *
> + *
> + * ALTERNATIVELY, this software may be distributed under the terms of=20
> +the
> + * GNU General Public License ("GPL") as published by the Free=20
> +Software
> + * Foundation, either version 2 of that License or (at your option)=20
> +any
> + * later version.
> + *
> + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND=20
> +ANY
> + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE=20
> +IMPLIED
> + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE=20
> +ARE
> + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE=20
> +FOR ANY
> + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL=20
> +DAMAGES
> + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR=20
> +SERVICES;
> + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER=20
> +CAUSED AND
> + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,=20
> +OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE=20
> +USE OF THIS
> + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +/include/ "b4si-post.dtsi"
> +
> +/* controller at 0x200000 */
> +&pci0 {
> + compatible =3D "fsl,b4860-pcie", "fsl,qoriq-pcie-v2.4"; };
> +
> +&rio {
> + compatible =3D "fsl,srio";
> + interrupts =3D <16 2 1 11>;
> + #address-cells =3D <2>;
> + #size-cells =3D <2>;
> + fsl,iommu-parent =3D <&pamu0>;
> + ranges;
> +
> + port1 {
> + #address-cells =3D <2>;
> + #size-cells =3D <2>;
> + cell-index =3D <1>;
> + fsl,liodn-reg =3D <&guts 0x510>; /* RIO1LIODNR */
> + };
> +
> + port2 {
> + #address-cells =3D <2>;
> + #size-cells =3D <2>;
> + cell-index =3D <2>;
> + fsl,liodn-reg =3D <&guts 0x514>; /* RIO2LIODNR */
> + };
> +};
> +
> +&dcsr {
> + dcsr-epu@0 {
> + compatible =3D "fsl,b4860-dcsr-epu", "fsl,dcsr-epu";
> + };
> + dcsr-npc {
> + compatible =3D "fsl,b4860-dcsr-cnpc", "fsl,dcsr-cnpc";
> + };
> + dcsr-dpaa@9000 {
> + compatible =3D "fsl,b4860-dcsr-dpaa", "fsl,dcsr-dpaa";
> + };
> + dcsr-ocn@11000 {
> + compatible =3D "fsl,b4860-dcsr-ocn", "fsl,dcsr-ocn";
> + };
> + dcsr-ddr@13000 {
> + compatible =3D "fsl,dcsr-ddr";
> + dev-handle =3D <&ddr2>;
> + reg =3D <0x13000 0x1000>;
> + };
> + dcsr-nal@18000 {
> + compatible =3D "fsl,b4860-dcsr-nal", "fsl,dcsr-nal";
> + };
> + dcsr-rcpm@22000 {
> + compatible =3D "fsl,b4860-dcsr-rcpm", "fsl,dcsr-rcpm";
> + };
> + dcsr-snpc@30000 {
> + compatible =3D "fsl,b4860-dcsr-snpc", "fsl,dcsr-snpc";
> + };
> + dcsr-snpc@31000 {
> + compatible =3D "fsl,b4860-dcsr-snpc", "fsl,dcsr-snpc";
> + };
> + dcsr-cpu-sb-proxy@108000 {
> + compatible =3D "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
> + cpu-handle =3D <&cpu1>;
> + reg =3D <0x108000 0x1000 0x109000 0x1000>;
> + };
> + dcsr-cpu-sb-proxy@110000 {
> + compatible =3D "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
> + cpu-handle =3D <&cpu2>;
> + reg =3D <0x110000 0x1000 0x111000 0x1000>;
> + };
> + dcsr-cpu-sb-proxy@118000 {
> + compatible =3D "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
> + cpu-handle =3D <&cpu3>;
> + reg =3D <0x118000 0x1000 0x119000 0x1000>;
> + };
> +};
> +
> +&soc {
> + ddr2: memory-controller@9000 {
> + compatible =3D "fsl,qoriq-memory-controller-v4.5", "fsl,qoriq-memory-c=
ontroller";
> + reg =3D <0x9000 0x1000>;
> + interrupts =3D <16 2 1 9>;
> + };
> +
> + cpc: l3-cache-controller@10000 {
> + compatible =3D "fsl,b4860-l3-cache-controller", "cache";
> + };
> +
> + corenet-cf@18000 {
> + compatible =3D "fsl,b4860-corenet-cf";
> + };
> +
> + guts: global-utilities@e0000 {
> + compatible =3D "fsl,b4860-device-config", "fsl,qoriq-device-config-2.0=
";
> + };
> +
> + clockgen: global-utilities@e1000 {
> + compatible =3D "fsl,b4860-clockgen", "fsl,qoriq-clockgen-2";
> + };
> +
> + L2: l2-cache-controller@c20000 {
> + compatible =3D "fsl,b4860-l2-cache-controller";
> + };
> +};
> diff --git a/arch/powerpc/boot/dts/fsl/b4860si-pre.dtsi=20
> b/arch/powerpc/boot/dts/fsl/b4860si-pre.dtsi
> new file mode 100644
> index 0000000..f5737a0
> --- /dev/null
> +++ b/arch/powerpc/boot/dts/fsl/b4860si-pre.dtsi
> @@ -0,0 +1,59 @@
> +/*
> + * B4860 Silicon/SoC Device Tree Source (pre include)
> + *
> + * Copyright 2012 Freescale Semiconductor Inc.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions ar=
e met:
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyrig=
ht
> + * notice, this list of conditions and the following disclaimer in=
the
> + * documentation and/or other materials provided with the distribu=
tion.
> + * * Neither the name of Freescale Semiconductor nor the
> + * names of its contributors may be used to endorse or promote pro=
ducts
> + * derived from this software without specific prior written permi=
ssion.
> + *
> + *
> + * ALTERNATIVELY, this software may be distributed under the terms of=20
> +the
> + * GNU General Public License ("GPL") as published by the Free=20
> +Software
> + * Foundation, either version 2 of that License or (at your option)=20
> +any
> + * later version.
> + *
> + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND=20
> +ANY
> + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE=20
> +IMPLIED
> + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE=20
> +ARE
> + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE=20
> +FOR ANY
> + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL=20
> +DAMAGES
> + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR=20
> +SERVICES;
> + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER=20
> +CAUSED AND
> + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,=20
> +OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE=20
> +USE OF THIS
> + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +/dts-v1/;
> +
> +/include/ "b4si-pre.dtsi"
> +
> +/ {
> + compatible =3D "fsl,B4860";
> +
> + cpus {
> + cpu1: PowerPC,e6500@1 {
> + device_type =3D "cpu";
> + reg =3D <2 3>;
> + next-level-cache =3D <&L2>;
> + };
> + cpu2: PowerPC,e6500@2 {
> + device_type =3D "cpu";
> + reg =3D <4 5>;
> + next-level-cache =3D <&L2>;
> + };
> + cpu3: PowerPC,e6500@3 {
> + device_type =3D "cpu";
> + reg =3D <6 7>;
> + next-level-cache =3D <&L2>;
> + };
> + };
> +};
> diff --git a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi=20
> b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
> new file mode 100644
> index 0000000..06c97a2
> --- /dev/null
> +++ b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
> @@ -0,0 +1,262 @@
> +/*
> + * B4420 Silicon/SoC Device Tree Source (post include)
> + *
> + * Copyright 2012 Freescale Semiconductor, Inc.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions ar=
e met:
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyrig=
ht
> + * notice, this list of conditions and the following disclaimer in=
the
> + * documentation and/or other materials provided with the distribu=
tion.
> + * * Neither the name of Freescale Semiconductor nor the
> + * names of its contributors may be used to endorse or promote pro=
ducts
> + * derived from this software without specific prior written permi=
ssion.
> + *
> + *
> + * ALTERNATIVELY, this software may be distributed under the terms of=20
> +the
> + * GNU General Public License ("GPL") as published by the Free=20
> +Software
> + * Foundation, either version 2 of that License or (at your option)=20
> +any
> + * later version.
> + *
> + * This software is provided by Freescale Semiconductor "as is" and=20
> +any
> + * express or implied warranties, including, but not limited to, the=20
> +implied
> + * warranties of merchantability and fitness for a particular purpose=20
> +are
> + * disclaimed. In no event shall Freescale Semiconductor be liable=20
> +for any
> + * direct, indirect, incidental, special, exemplary, or consequential=20
> +damages
> + * (including, but not limited to, procurement of substitute goods or=20
> +services;
> + * loss of use, data, or profits; or business interruption) however=20
> +caused and
> + * on any theory of liability, whether in contract, strict liability,=20
> +or tort
> + * (including negligence or otherwise) arising in any way out of the=20
> +use of
> + * this software, even if advised of the possibility of such damage.
> + */
> +
> +&ifc {
> + #address-cells =3D <2>;
> + #size-cells =3D <1>;
> + compatible =3D "fsl,ifc", "simple-bus";
> + interrupts =3D <25 2 0 0>;
> +};
> +
> +/* controller at 0x200000 */
> +&pci0 {
> + compatible =3D "fsl,b4-pcie", "fsl,qoriq-pcie-v2.4";
> + device_type =3D "pci";
> + #size-cells =3D <2>;
> + #address-cells =3D <3>;
> + bus-range =3D <0x0 0xff>;
> + interrupts =3D <20 2 0 0>;
> + fsl,iommu-parent =3D <&pamu0>;
> + pcie@0 {
> + #interrupt-cells =3D <1>;
> + #size-cells =3D <2>;
> + #address-cells =3D <3>;
> + device_type =3D "pci";
> + interrupts =3D <20 2 0 0>;
> + interrupt-map-mask =3D <0xf800 0 0 7>;
> + interrupt-map =3D <
> + /* IDSEL 0x0 */
> + 0000 0 0 1 &mpic 40 1 0 0
> + 0000 0 0 2 &mpic 1 1 0 0
> + 0000 0 0 3 &mpic 2 1 0 0
> + 0000 0 0 4 &mpic 3 1 0 0
> + >;
> + };
> +};
> +
> +&dcsr {
> + #address-cells =3D <1>;
> + #size-cells =3D <1>;
> + compatible =3D "fsl,dcsr", "simple-bus";
> +
> + dcsr-epu@0 {
> + compatible =3D "fsl,b4-dcsr-epu", "fsl,dcsr-epu";
> + interrupts =3D <52 2 0 0
> + 84 2 0 0
> + 85 2 0 0
> + 94 2 0 0
> + 95 2 0 0>;
> + reg =3D <0x0 0x1000>;
> + };
> + dcsr-npc {
> + compatible =3D "fsl,b4-dcsr-cnpc", "fsl,dcsr-cnpc";
> + reg =3D <0x1000 0x1000 0x1002000 0x10000>;
> + };
> + dcsr-nxc@2000 {
> + compatible =3D "fsl,dcsr-nxc";
> + reg =3D <0x2000 0x1000>;
> + };
> + dcsr-corenet {
> + compatible =3D "fsl,dcsr-corenet";
> + reg =3D <0x8000 0x1000 0x1A000 0x1000>;
> + };
> + dcsr-dpaa@9000 {
> + compatible =3D "fsl,b4-dcsr-dpaa", "fsl,dcsr-dpaa";
> + reg =3D <0x9000 0x1000>;
> + };
> + dcsr-ocn@11000 {
> + compatible =3D "fsl,b4-dcsr-ocn", "fsl,dcsr-ocn";
> + reg =3D <0x11000 0x1000>;
> + };
> + dcsr-ddr@12000 {
> + compatible =3D "fsl,dcsr-ddr";
> + dev-handle =3D <&ddr1>;
> + reg =3D <0x12000 0x1000>;
> + };
> + dcsr-nal@18000 {
> + compatible =3D "fsl,b4-dcsr-nal", "fsl,dcsr-nal";
> + reg =3D <0x18000 0x1000>;
> + };
> + dcsr-rcpm@22000 {
> + compatible =3D "fsl,b4-dcsr-rcpm", "fsl,dcsr-rcpm";
> + reg =3D <0x22000 0x1000>;
> + };
> + dcsr-snpc@30000 {
> + compatible =3D "fsl,b4-dcsr-snpc", "fsl,dcsr-snpc";
> + reg =3D <0x30000 0x1000 0x1022000 0x10000>;
> + };
> + dcsr-snpc@31000 {
> + compatible =3D "fsl,b4-dcsr-snpc", "fsl,dcsr-snpc";
> + reg =3D <0x31000 0x1000 0x1042000 0x10000>;
> + };
> + dcsr-cpu-sb-proxy@100000 {
> + compatible =3D "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
> + cpu-handle =3D <&cpu0>;
> + reg =3D <0x100000 0x1000 0x101000 0x1000>;
> + };
> +};
> +
> +&soc {
> + #address-cells =3D <1>;
> + #size-cells =3D <1>;
> + device_type =3D "soc";
> + compatible =3D "simple-bus";
> +
> + soc-sram-error {
> + compatible =3D "fsl,soc-sram-error";
> + interrupts =3D <16 2 1 2>;
> + };
> +
> + corenet-law@0 {
> + compatible =3D "fsl,corenet-law";
> + reg =3D <0x0 0x1000>;
> + fsl,num-laws =3D <32>;
> + };
> +
> + ddr1: memory-controller@8000 {
> + compatible =3D "fsl,qoriq-memory-controller-v4.5", "fsl,qoriq-memory-c=
ontroller";
> + reg =3D <0x8000 0x1000>;
> + interrupts =3D <16 2 1 8>;
> + };
> +
> + cpc: l3-cache-controller@10000 {
> + compatible =3D "fsl,b4-l3-cache-controller", "cache";
> + reg =3D <0x10000 0x1000>;
> + interrupts =3D <16 2 1 4>;
> + };
> +
> + corenet-cf@18000 {
> + compatible =3D "fsl,b4-corenet-cf";
> + reg =3D <0x18000 0x1000>;
> + interrupts =3D <16 2 1 0>;
> + fsl,ccf-num-csdids =3D <32>;
> + fsl,ccf-num-snoopids =3D <32>;
> + };
> +
> + iommu@20000 {
> + compatible =3D "fsl,pamu-v1.0", "fsl,pamu";
> + reg =3D <0x20000 0x4000>;
> + #address-cells =3D <1>;
> + #size-cells =3D <1>;
> + interrupts =3D <
> + 24 2 0 0
> + 16 2 1 1>;
> +
> +
> + /* PCIe, DMA, SRIO */
> + pamu0: pamu@0 {
> + reg =3D <0 0x1000>;
> + fsl,primary-cache-geometry =3D <8 1>;
> + fsl,secondary-cache-geometry =3D <32 2>;
> + };
> +
> + /* AXI2, Maple */
> + pamu1: pamu@1000 {
> + reg =3D <0x1000 0x1000>;
> + fsl,primary-cache-geometry =3D <32 1>;
> + fsl,secondary-cache-geometry =3D <32 2>;
> + };
> +
> + /* Q/BMan */
> + pamu2: pamu@2000 {
> + reg =3D <0x2000 0x1000>;
> + fsl,primary-cache-geometry =3D <32 1>;
> + fsl,secondary-cache-geometry =3D <32 2>;
> + };
> +
> + /* AXI1, FMAN */
> + pamu3: pamu@3000 {
> + reg =3D <0x3000 0x1000>;
> + fsl,primary-cache-geometry =3D <32 1>;
> + fsl,secondary-cache-geometry =3D <32 2>;
> + };
> + };
> +
> +/include/ "qoriq-mpic.dtsi"
> +
> + guts: global-utilities@e0000 {
> + compatible =3D "fsl,b4-device-config";
> + reg =3D <0xe0000 0xe00>;
> + fsl,has-rstcr;
> + fsl,liodn-bits =3D <12>;
> + };
> +
> + rcpm: global-utilities@e2000 {
> + compatible =3D "fsl,b4-rcpm", "fsl,qoriq-rcpm-2";
> + reg =3D <0xe2000 0x1000>;
> + };
> +
> +/include/ "qoriq-dma-0.dtsi"
> + dma@100300 {
> + fsl,iommu-parent =3D <&pamu0>;
> + fsl,liodn-reg =3D <&guts 0x580>; /* DMA1LIODNR */
> + };
> +
> +/include/ "qoriq-dma-1.dtsi"
> + dma@101300 {
> + fsl,iommu-parent =3D <&pamu0>;
> + fsl,liodn-reg =3D <&guts 0x584>; /* DMA2LIODNR */
> + };
> +
> +/include/ "qonverge-usb2-dr-0.dtsi"
> + usb0: usb@210000 {
> + compatible =3D "fsl-usb2-dr-v2.4", "fsl-usb2-dr";
> + fsl,iommu-parent =3D <&pamu1>;
> + fsl,liodn-reg =3D <&guts 0x520>; /* USB1LIODNR */
> + };
> +
> +/include/ "qoriq-espi-0.dtsi"
> + spi@110000 {
> + fsl,espi-num-chipselects =3D <4>;
> + };
> +
> +/include/ "qoriq-esdhc-0.dtsi"
> + sdhc@114000 {
> + sdhci,auto-cmd12;
> + fsl,iommu-parent =3D <&pamu1>;
> + fsl,liodn-reg =3D <&guts 0x530>; /* eSDHCLIODNR */
> + };
> +
> +/include/ "qoriq-i2c-0.dtsi"
> +/include/ "qoriq-i2c-1.dtsi"
> +/include/ "qoriq-duart-0.dtsi"
> +/include/ "qoriq-duart-1.dtsi"
> +/include/ "qoriq-sec5.3-0.dtsi"
> +
> + L2: l2-cache-controller@c20000 {
> + compatible =3D "fsl,b4-l2-cache-controller";
> + reg =3D <0xc20000 0x1000>;
white space issue
> + next-level-cache =3D <&cpc>;
> + };
> +};
> diff --git a/arch/powerpc/boot/dts/fsl/b4si-pre.dtsi=20
> b/arch/powerpc/boot/dts/fsl/b4si-pre.dtsi
> new file mode 100644
> index 0000000..b6161c8
> --- /dev/null
> +++ b/arch/powerpc/boot/dts/fsl/b4si-pre.dtsi
> @@ -0,0 +1,65 @@
> +/*
> + * B4420 Silicon/SoC Device Tree Source (pre include)
> + *
> + * Copyright 2012 Freescale Semiconductor, Inc.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions ar=
e met:
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyrig=
ht
> + * notice, this list of conditions and the following disclaimer in=
the
> + * documentation and/or other materials provided with the distribu=
tion.
> + * * Neither the name of Freescale Semiconductor nor the
> + * names of its contributors may be used to endorse or promote pro=
ducts
> + * derived from this software without specific prior written permi=
ssion.
> + *
> + *
> + * ALTERNATIVELY, this software may be distributed under the terms of=20
> +the
> + * GNU General Public License ("GPL") as published by the Free=20
> +Software
> + * Foundation, either version 2 of that License or (at your option)=20
> +any
> + * later version.
> + *
> + * This software is provided by Freescale Semiconductor "as is" and=20
> +any
> + * express or implied warranties, including, but not limited to, the=20
> +implied
> + * warranties of merchantability and fitness for a particular purpose=20
> +are
> + * disclaimed. In no event shall Freescale Semiconductor be liable=20
> +for any
> + * direct, indirect, incidental, special, exemplary, or consequential=20
> +damages
> + * (including, but not limited to, procurement of substitute goods or=20
> +services;
> + * loss of use, data, or profits; or business interruption) however=20
> +caused and
> + * on any theory of liability, whether in contract, strict liability,=20
> +or tort
> + * (including negligence or otherwise) arising in any way out of the=20
> +use of
> + * this software, even if advised of the possibility of such damage.
> + */
> +
> +/ {
> + compatible =3D "fsl,B4";
> + #address-cells =3D <2>;
> + #size-cells =3D <2>;
> + interrupt-parent =3D <&mpic>;
> +
> + aliases {
> + ccsr =3D &soc;
> + dcsr =3D &dcsr;
> +
> + serial0 =3D &serial0;
> + serial1 =3D &serial1;
> + serial2 =3D &serial2;
> + serial3 =3D &serial3;
> + pci0 =3D &pci0;
> + dma0 =3D &dma0;
> + dma1 =3D &dma1;
> + sdhc =3D &sdhc;
> + };
> +
> + cpus {
> + #address-cells =3D <1>;
> + #size-cells =3D <0>;
> +
> + cpu0: PowerPC,e6500@0 {
> + device_type =3D "cpu";
> + reg =3D <0 1>;
> + next-level-cache =3D <&L2>;
> + };
> + };
> +};
> --
> 1.7.6.GIT
>=20
^ permalink raw reply
* RE: [PATCH 2/5] powerpc/fsl-booke: Add initial silicon device tree files for B4860 and B4420
From: Leekha Shaveta-B20052 @ 2013-04-04 7:03 UTC (permalink / raw)
To: Wood Scott-B07421
Cc: Zhao Chenhui-B35336, Mehresh Ramneek-B31383, Garg Vakul-B16394,
Lian Minghuan-B31939, Tang Yuantian-B29983, Fleming Andy-AFLEMING,
Sethi Varun-B16395, linuxppc-dev@lists.ozlabs.org
In-Reply-To: <1365007194.25627.1@snotra>
-----Original Message-----
From: Wood Scott-B07421=20
Sent: Wednesday, April 03, 2013 10:10 PM
To: Leekha Shaveta-B20052
Cc: Wood Scott-B07421; linuxppc-dev@lists.ozlabs.org; Zhao Chenhui-B35336; =
Lian Minghuan-B31939; Garg Vakul-B16394; Tang Yuantian-B29983; Fleming Andy=
-AFLEMING; Mehresh Ramneek-B31383; Sethi Varun-B16395
Subject: Re: [PATCH 2/5] powerpc/fsl-booke: Add initial silicon device tree=
files for B4860 and B4420
On 04/03/2013 01:42:14 AM, Leekha Shaveta-B20052 wrote:
>=20
>=20
> -----Original Message-----
> From: Wood Scott-B07421
> Sent: Wednesday, April 03, 2013 12:49 AM
> To: Leekha Shaveta-B20052
> Cc: linuxppc-dev@lists.ozlabs.org; Zhao Chenhui-B35336; Lian=20
> Minghuan-B31939; Leekha Shaveta-B20052; Garg Vakul-B16394; Tang=20
> Yuantian-B29983; Fleming Andy-AFLEMING; Mehresh Ramneek-B31383; Sethi
> Varun-B16395
> Subject: Re: [PATCH 2/5] powerpc/fsl-booke: Add initial silicon device=20
> tree files for B4860 and B4420
>=20
> On 04/02/2013 02:16:05 AM, Shaveta Leekha wrote:
> > +/ {
> > + compatible =3D "fsl,B4860";
> > +
> > + cpus {
> > + cpu1: PowerPC,e6500@1 {
> > + device_type =3D "cpu";
> > + reg =3D <2 3>;
> > + next-level-cache =3D <&L2>;
> > + };
> > + cpu2: PowerPC,e6500@2 {
> > + device_type =3D "cpu";
> > + reg =3D <4 5>;
> > + next-level-cache =3D <&L2>;
> > + };
> > + cpu3: PowerPC,e6500@3 {
> > + device_type =3D "cpu";
> > + reg =3D <6 7>;
> > + next-level-cache =3D <&L2>;
> > + };
>=20
> The unit addresses need to match "reg".
> [SL] You mean "@1" should match to "reg =3D <2 3>" ?
Yes, it should be "@2" for that node.
> As each e6500 core in B4860 is dual- threaded, reg property here=20
> represents the thread's identifier in that PA core.
>=20
> So convention used in T4 and B4 is: core 0 having threads 0 and 1,
> Core 1 having <2 3> and
> so on....
The convention used in device trees is that the unit address matches the re=
g.
-Scott
[SL] Ok, I can change that. Will make unit address as @2 for <2 3>, @4 for =
<4 5> and so on....
Kumar, please respond here, as I have followed the convention used in T4 de=
vice tree files for
Dual-threaded cores.
Regards,
Shaveta
^ permalink raw reply
* Re: [PATCH 9/9] powerpc: cpufreq: move cpufreq driver to drivers/cpufreq
From: Amit Kucheria @ 2013-04-04 6:30 UTC (permalink / raw)
To: Viresh Kumar, deepthi
Cc: Robin Randhawa, linux-pm, Liviu Dudau, linux-kernel, cpufreq,
Rafael J. Wysocki, Steve Bannister, Paul Mackerras,
Olof Johansson, Arvind Chauhan, linuxppc-dev, Lists linaro-kernel,
Charles Garcia-Tobin
In-Reply-To: <CAKohpo=EPbGocbNaZcNtGuZy4Tst5jH4rPviQ6L=YuZceDnKMw@mail.gmail.com>
On Thu, Apr 4, 2013 at 11:55 AM, Viresh Kumar <viresh.kumar@linaro.org> wrote:
> On 3 April 2013 16:00, Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:
>> On Wed, 2013-04-03 at 15:00 +0530, Viresh Kumar wrote:
>>> On 31 March 2013 09:33, Viresh Kumar <viresh.kumar@linaro.org> wrote:
>
>>> > Benjamin/Paul/Olof,
>>> >
>>> > Any comments on this?
>>>
>>> Ping!!
>>
>> I'm on vacation until end of April. No objection to the patch but
>> somebody needs to test it.
>
> Hi,
>
> Can somebody else from powerpc world give it a try?
Deepthi, can you help?
> OR
>
> @Rafael: Can we get this pushed in linux-next as is and then people would
> be forced to test it and in case there are any complains, i will fix them or
> you can revert it?
^ permalink raw reply
* Re: [PATCH 9/9] powerpc: cpufreq: move cpufreq driver to drivers/cpufreq
From: Viresh Kumar @ 2013-04-04 6:25 UTC (permalink / raw)
To: Olof Johansson, linuxppc-dev, Benjamin Herrenschmidt,
Paul Mackerras, Rafael J. Wysocki
Cc: robin.randhawa, linux-pm, Liviu.Dudau, linux-kernel, cpufreq,
Steve.Bannister, Arnd Bergmann, arvind.chauhan, linaro-kernel,
charles.garcia-tobin
In-Reply-To: <1364985034.16520.12.camel@pasglop>
On 3 April 2013 16:00, Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:
> On Wed, 2013-04-03 at 15:00 +0530, Viresh Kumar wrote:
>> On 31 March 2013 09:33, Viresh Kumar <viresh.kumar@linaro.org> wrote:
>> > Benjamin/Paul/Olof,
>> >
>> > Any comments on this?
>>
>> Ping!!
>
> I'm on vacation until end of April. No objection to the patch but
> somebody needs to test it.
Hi,
Can somebody else from powerpc world give it a try?
OR
@Rafael: Can we get this pushed in linux-next as is and then people would
be forced to test it and in case there are any complains, i will fix them or
you can revert it?
^ permalink raw reply
* Re: [PATCH -V5 00/25] THP support for PPC64
From: Simon Jeons @ 2013-04-04 6:14 UTC (permalink / raw)
To: Aneesh Kumar K.V; +Cc: paulus, linuxppc-dev, linux-mm
In-Reply-To: <1365055083-31956-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
Hi Aneesh,
On 04/04/2013 01:57 PM, Aneesh Kumar K.V wrote:
> Hi,
>
> This patchset adds transparent hugepage support for PPC64.
>
> TODO:
> * hash preload support in update_mmu_cache_pmd (we don't do that for hugetlb)
>
> Some numbers:
>
> The latency measurements code from Anton found at
> http://ozlabs.org/~anton/junkcode/latency2001.c
>
> THP disabled 64K page size
> ------------------------
> [root@llmp24l02 ~]# ./latency2001 8G
> 8589934592 731.73 cycles 205.77 ns
> [root@llmp24l02 ~]# ./latency2001 8G
> 8589934592 743.39 cycles 209.05 ns
Could you explain what's the meaning of result?
> [root@llmp24l02 ~]#
>
> THP disabled large page via hugetlbfs
> -------------------------------------
> [root@llmp24l02 ~]# ./latency2001 -l 8G
> 8589934592 416.09 cycles 117.01 ns
> [root@llmp24l02 ~]# ./latency2001 -l 8G
> 8589934592 415.74 cycles 116.91 ns
>
> THP enabled 64K page size.
> ----------------
> [root@llmp24l02 ~]# ./latency2001 8G
> 8589934592 405.07 cycles 113.91 ns
> [root@llmp24l02 ~]# ./latency2001 8G
> 8589934592 411.82 cycles 115.81 ns
> [root@llmp24l02 ~]#
>
> We are close to hugetlbfs in latency and we can achieve this with zero
> config/page reservation. Most of the allocations above are fault allocated.
>
> Another test that does 50000000 random access over 1GB area goes from
> 2.65 seconds to 1.07 seconds with this patchset.
>
> split_huge_page impact:
> ---------------------
> To look at the performance impact of large page invalidate, I tried the below
> experiment. The test involved, accessing a large contiguous region of memory
> location as below
>
> for (i = 0; i < size; i += PAGE_SIZE)
> data[i] = i;
>
> We wanted to access the data in sequential order so that we look at the
> worst case THP performance. Accesing the data in sequential order implies
> we have the Page table cached and overhead of TLB miss is as minimal as
> possible. We also don't touch the entire page, because that can result in
> cache evict.
>
> After we touched the full range as above, we now call mprotect on each
> of that page. A mprotect will result in a hugepage split. This should
> allow us to measure the impact of hugepage split.
>
> for (i = 0; i < size; i += PAGE_SIZE)
> mprotect(&data[i], PAGE_SIZE, PROT_READ);
>
> Split hugepage impact:
> ---------------------
> THP enabled: 2.851561705 seconds for test completion
> THP disable: 3.599146098 seconds for test completion
>
> We are 20.7% better than non THP case even when we have all the large pages split.
>
> Detailed output:
>
> THP enabled:
> ---------------------------------------
> [root@llmp24l02 ~]# cat /proc/vmstat | grep thp
> thp_fault_alloc 0
> thp_fault_fallback 0
> thp_collapse_alloc 0
> thp_collapse_alloc_failed 0
> thp_split 0
> thp_zero_page_alloc 0
> thp_zero_page_alloc_failed 0
> [root@llmp24l02 ~]# /root/thp/tools/perf/perf stat -e page-faults,dTLB-load-misses ./split-huge-page-mpro 20G
> time taken to touch all the data in ns: 2763096913
>
> Performance counter stats for './split-huge-page-mpro 20G':
>
> 1,581 page-faults
> 3,159 dTLB-load-misses
>
> 2.851561705 seconds time elapsed
>
> [root@llmp24l02 ~]#
> [root@llmp24l02 ~]# cat /proc/vmstat | grep thp
> thp_fault_alloc 1279
> thp_fault_fallback 0
> thp_collapse_alloc 0
> thp_collapse_alloc_failed 0
> thp_split 1279
> thp_zero_page_alloc 0
> thp_zero_page_alloc_failed 0
> [root@llmp24l02 ~]#
>
> 77.05% split-huge-page [kernel.kallsyms] [k] .clear_user_page
> 7.10% split-huge-page [kernel.kallsyms] [k] .perf_event_mmap_ctx
> 1.51% split-huge-page split-huge-page-mpro [.] 0x0000000000000a70
> 0.96% split-huge-page [unknown] [H] 0x000000000157e3bc
> 0.81% split-huge-page [kernel.kallsyms] [k] .up_write
> 0.76% split-huge-page [kernel.kallsyms] [k] .perf_event_mmap
> 0.76% split-huge-page [kernel.kallsyms] [k] .down_write
> 0.74% split-huge-page [kernel.kallsyms] [k] .lru_add_page_tail
> 0.61% split-huge-page [kernel.kallsyms] [k] .split_huge_page
> 0.59% split-huge-page [kernel.kallsyms] [k] .change_protection
> 0.51% split-huge-page [kernel.kallsyms] [k] .release_pages
>
>
> 0.96% split-huge-page [unknown] [H] 0x000000000157e3bc
> |
> |--79.44%-- reloc_start
> | |
> | |--86.54%-- .__pSeries_lpar_hugepage_invalidate
> | | .pSeries_lpar_hugepage_invalidate
> | | .hpte_need_hugepage_flush
> | | .split_huge_page
> | | .__split_huge_page_pmd
> | | .vma_adjust
> | | .vma_merge
> | | .mprotect_fixup
> | | .SyS_mprotect
>
>
> THP disabled:
> ---------------
> [root@llmp24l02 ~]# echo never > /sys/kernel/mm/transparent_hugepage/enabled
> [root@llmp24l02 ~]# /root/thp/tools/perf/perf stat -e page-faults,dTLB-load-misses ./split-huge-page-mpro 20G
> time taken to touch all the data in ns: 3513767220
>
> Performance counter stats for './split-huge-page-mpro 20G':
>
> 3,27,726 page-faults
> 3,29,654 dTLB-load-misses
>
> 3.599146098 seconds time elapsed
>
> [root@llmp24l02 ~]#
>
> Changes from V4:
> * Fix bad page error in page_table_alloc
> BUG: Bad page state in process stream pfn:f1a59
> page:f0000000034dc378 count:1 mapcount:0 mapping: (null) index:0x0
> [c000000f322c77d0] [c00000000015e198] .bad_page+0xe8/0x140
> [c000000f322c7860] [c00000000015e3c4] .free_pages_prepare+0x1d4/0x1e0
> [c000000f322c7910] [c000000000160450] .free_hot_cold_page+0x50/0x230
> [c000000f322c79c0] [c00000000003ad18] .page_table_alloc+0x168/0x1c0
>
> Changes from V3:
> * PowerNV boot fixes
>
> Change from V2:
> * Change patch "powerpc: Reduce PTE table memory wastage" to use much simpler approach
> for PTE page sharing.
> * Changes to handle huge pages in KVM code.
> * Address other review comments
>
> Changes from V1
> * Address review comments
> * More patch split
> * Add batch hpte invalidate for hugepages.
>
> Changes from RFC V2:
> * Address review comments
> * More code cleanup and patch split
>
> Changes from RFC V1:
> * HugeTLB fs now works
> * Compile issues fixed
> * rebased to v3.8
> * Patch series reorded so that ppc64 cleanups and MM THP changes are moved
> early in the series. This should help in picking those patches early.
>
> Thanks,
> -aneesh
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply
* Re: [PATCH -V5 00/25] THP support for PPC64
From: Aneesh Kumar K.V @ 2013-04-04 6:10 UTC (permalink / raw)
To: Simon Jeons; +Cc: paulus, linuxppc-dev, linux-mm
In-Reply-To: <515D16E4.8020207@gmail.com>
Simon Jeons <simon.jeons@gmail.com> writes:
> Hi Aneesh,
> On 04/04/2013 01:57 PM, Aneesh Kumar K.V wrote:
>> Hi,
>>
>> This patchset adds transparent hugepage support for PPC64.
>>
>> TODO:
>> * hash preload support in update_mmu_cache_pmd (we don't do that for hugetlb)
>>
>> Some numbers:
>>
>> The latency measurements code from Anton found at
>> http://ozlabs.org/~anton/junkcode/latency2001.c
>
> Is there test case against x86?
>
That test should work even with x86
-aneesh
^ permalink raw reply
* Re: [PATCH -V5 00/25] THP support for PPC64
From: Simon Jeons @ 2013-04-04 6:00 UTC (permalink / raw)
To: Aneesh Kumar K.V; +Cc: paulus, linuxppc-dev, linux-mm
In-Reply-To: <1365055083-31956-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
Hi Aneesh,
On 04/04/2013 01:57 PM, Aneesh Kumar K.V wrote:
> Hi,
>
> This patchset adds transparent hugepage support for PPC64.
>
> TODO:
> * hash preload support in update_mmu_cache_pmd (we don't do that for hugetlb)
>
> Some numbers:
>
> The latency measurements code from Anton found at
> http://ozlabs.org/~anton/junkcode/latency2001.c
Is there test case against x86?
>
> THP disabled 64K page size
> ------------------------
> [root@llmp24l02 ~]# ./latency2001 8G
> 8589934592 731.73 cycles 205.77 ns
> [root@llmp24l02 ~]# ./latency2001 8G
> 8589934592 743.39 cycles 209.05 ns
> [root@llmp24l02 ~]#
>
> THP disabled large page via hugetlbfs
> -------------------------------------
> [root@llmp24l02 ~]# ./latency2001 -l 8G
> 8589934592 416.09 cycles 117.01 ns
> [root@llmp24l02 ~]# ./latency2001 -l 8G
> 8589934592 415.74 cycles 116.91 ns
>
> THP enabled 64K page size.
> ----------------
> [root@llmp24l02 ~]# ./latency2001 8G
> 8589934592 405.07 cycles 113.91 ns
> [root@llmp24l02 ~]# ./latency2001 8G
> 8589934592 411.82 cycles 115.81 ns
> [root@llmp24l02 ~]#
>
> We are close to hugetlbfs in latency and we can achieve this with zero
> config/page reservation. Most of the allocations above are fault allocated.
>
> Another test that does 50000000 random access over 1GB area goes from
> 2.65 seconds to 1.07 seconds with this patchset.
>
> split_huge_page impact:
> ---------------------
> To look at the performance impact of large page invalidate, I tried the below
> experiment. The test involved, accessing a large contiguous region of memory
> location as below
>
> for (i = 0; i < size; i += PAGE_SIZE)
> data[i] = i;
>
> We wanted to access the data in sequential order so that we look at the
> worst case THP performance. Accesing the data in sequential order implies
> we have the Page table cached and overhead of TLB miss is as minimal as
> possible. We also don't touch the entire page, because that can result in
> cache evict.
>
> After we touched the full range as above, we now call mprotect on each
> of that page. A mprotect will result in a hugepage split. This should
> allow us to measure the impact of hugepage split.
>
> for (i = 0; i < size; i += PAGE_SIZE)
> mprotect(&data[i], PAGE_SIZE, PROT_READ);
>
> Split hugepage impact:
> ---------------------
> THP enabled: 2.851561705 seconds for test completion
> THP disable: 3.599146098 seconds for test completion
>
> We are 20.7% better than non THP case even when we have all the large pages split.
>
> Detailed output:
>
> THP enabled:
> ---------------------------------------
> [root@llmp24l02 ~]# cat /proc/vmstat | grep thp
> thp_fault_alloc 0
> thp_fault_fallback 0
> thp_collapse_alloc 0
> thp_collapse_alloc_failed 0
> thp_split 0
> thp_zero_page_alloc 0
> thp_zero_page_alloc_failed 0
> [root@llmp24l02 ~]# /root/thp/tools/perf/perf stat -e page-faults,dTLB-load-misses ./split-huge-page-mpro 20G
> time taken to touch all the data in ns: 2763096913
>
> Performance counter stats for './split-huge-page-mpro 20G':
>
> 1,581 page-faults
> 3,159 dTLB-load-misses
>
> 2.851561705 seconds time elapsed
>
> [root@llmp24l02 ~]#
> [root@llmp24l02 ~]# cat /proc/vmstat | grep thp
> thp_fault_alloc 1279
> thp_fault_fallback 0
> thp_collapse_alloc 0
> thp_collapse_alloc_failed 0
> thp_split 1279
> thp_zero_page_alloc 0
> thp_zero_page_alloc_failed 0
> [root@llmp24l02 ~]#
>
> 77.05% split-huge-page [kernel.kallsyms] [k] .clear_user_page
> 7.10% split-huge-page [kernel.kallsyms] [k] .perf_event_mmap_ctx
> 1.51% split-huge-page split-huge-page-mpro [.] 0x0000000000000a70
> 0.96% split-huge-page [unknown] [H] 0x000000000157e3bc
> 0.81% split-huge-page [kernel.kallsyms] [k] .up_write
> 0.76% split-huge-page [kernel.kallsyms] [k] .perf_event_mmap
> 0.76% split-huge-page [kernel.kallsyms] [k] .down_write
> 0.74% split-huge-page [kernel.kallsyms] [k] .lru_add_page_tail
> 0.61% split-huge-page [kernel.kallsyms] [k] .split_huge_page
> 0.59% split-huge-page [kernel.kallsyms] [k] .change_protection
> 0.51% split-huge-page [kernel.kallsyms] [k] .release_pages
>
>
> 0.96% split-huge-page [unknown] [H] 0x000000000157e3bc
> |
> |--79.44%-- reloc_start
> | |
> | |--86.54%-- .__pSeries_lpar_hugepage_invalidate
> | | .pSeries_lpar_hugepage_invalidate
> | | .hpte_need_hugepage_flush
> | | .split_huge_page
> | | .__split_huge_page_pmd
> | | .vma_adjust
> | | .vma_merge
> | | .mprotect_fixup
> | | .SyS_mprotect
>
>
> THP disabled:
> ---------------
> [root@llmp24l02 ~]# echo never > /sys/kernel/mm/transparent_hugepage/enabled
> [root@llmp24l02 ~]# /root/thp/tools/perf/perf stat -e page-faults,dTLB-load-misses ./split-huge-page-mpro 20G
> time taken to touch all the data in ns: 3513767220
>
> Performance counter stats for './split-huge-page-mpro 20G':
>
> 3,27,726 page-faults
> 3,29,654 dTLB-load-misses
>
> 3.599146098 seconds time elapsed
>
> [root@llmp24l02 ~]#
>
> Changes from V4:
> * Fix bad page error in page_table_alloc
> BUG: Bad page state in process stream pfn:f1a59
> page:f0000000034dc378 count:1 mapcount:0 mapping: (null) index:0x0
> [c000000f322c77d0] [c00000000015e198] .bad_page+0xe8/0x140
> [c000000f322c7860] [c00000000015e3c4] .free_pages_prepare+0x1d4/0x1e0
> [c000000f322c7910] [c000000000160450] .free_hot_cold_page+0x50/0x230
> [c000000f322c79c0] [c00000000003ad18] .page_table_alloc+0x168/0x1c0
>
> Changes from V3:
> * PowerNV boot fixes
>
> Change from V2:
> * Change patch "powerpc: Reduce PTE table memory wastage" to use much simpler approach
> for PTE page sharing.
> * Changes to handle huge pages in KVM code.
> * Address other review comments
>
> Changes from V1
> * Address review comments
> * More patch split
> * Add batch hpte invalidate for hugepages.
>
> Changes from RFC V2:
> * Address review comments
> * More code cleanup and patch split
>
> Changes from RFC V1:
> * HugeTLB fs now works
> * Compile issues fixed
> * rebased to v3.8
> * Patch series reorded so that ppc64 cleanups and MM THP changes are moved
> early in the series. This should help in picking those patches early.
>
> Thanks,
> -aneesh
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply
* [PATCH -V5 22/25] powerpc/THP: get_user_pages_fast changes
From: Aneesh Kumar K.V @ 2013-04-04 5:58 UTC (permalink / raw)
To: benh, paulus; +Cc: linux-mm, linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1365055083-31956-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
handle large pages for get_user_pages_fast. Also take care of large page splitting.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
arch/powerpc/mm/gup.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 82 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c
index d7efdbf..835c1ae 100644
--- a/arch/powerpc/mm/gup.c
+++ b/arch/powerpc/mm/gup.c
@@ -55,6 +55,72 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
return 1;
}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline int gup_huge_pmd(pmd_t *pmdp, unsigned long addr,
+ unsigned long end, int write,
+ struct page **pages, int *nr)
+{
+ int refs;
+ pmd_t pmd;
+ unsigned long mask;
+ struct page *head, *page, *tail;
+
+ pmd = *pmdp;
+ mask = PMD_HUGE_PRESENT | PMD_HUGE_USER;
+ if (write)
+ mask |= PMD_HUGE_RW;
+
+ if ((pmd_val(pmd) & mask) != mask)
+ return 0;
+
+ /* large pages are never "special" */
+ VM_BUG_ON(!pfn_valid(pmd_pfn(pmd)));
+
+ refs = 0;
+ head = pmd_page(pmd);
+ page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+ tail = page;
+ do {
+ VM_BUG_ON(compound_head(page) != head);
+ pages[*nr] = page;
+ (*nr)++;
+ page++;
+ refs++;
+ } while (addr += PAGE_SIZE, addr != end);
+
+ if (!page_cache_add_speculative(head, refs)) {
+ *nr -= refs;
+ return 0;
+ }
+
+ if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) {
+ *nr -= refs;
+ while (refs--)
+ put_page(head);
+ return 0;
+ }
+ /*
+ * Any tail page need their mapcount reference taken before we
+ * return.
+ */
+ while (refs--) {
+ if (PageTail(tail))
+ get_huge_page_tail(tail);
+ tail++;
+ }
+
+ return 1;
+}
+#else
+
+static inline int gup_huge_pmd(pmd_t *pmdp, unsigned long addr,
+ unsigned long end, int write,
+ struct page **pages, int *nr)
+{
+ return 1;
+}
+#endif
+
static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
{
@@ -66,9 +132,23 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
pmd_t pmd = *pmdp;
next = pmd_addr_end(addr, end);
- if (pmd_none(pmd))
+ /*
+ * The pmd_trans_splitting() check below explains why
+ * pmdp_splitting_flush has to flush the tlb, to stop
+ * this gup-fast code from running while we set the
+ * splitting bit in the pmd. Returning zero will take
+ * the slow path that will call wait_split_huge_page()
+ * if the pmd is still in splitting state. gup-fast
+ * can't because it has irq disabled and
+ * wait_split_huge_page() would never return as the
+ * tlb flush IPI wouldn't run.
+ */
+ if (pmd_none(pmd) || pmd_trans_splitting(pmd))
return 0;
- if (is_hugepd(pmdp)) {
+ if (unlikely(pmd_large(pmd))) {
+ if (!gup_huge_pmd(pmdp, addr, next, write, pages, nr))
+ return 0;
+ } else if (is_hugepd(pmdp)) {
if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT,
addr, next, write, pages, nr))
return 0;
--
1.7.10
^ permalink raw reply related
* [PATCH -V5 24/25] powerpc: Optimize hugepage invalidate
From: Aneesh Kumar K.V @ 2013-04-04 5:58 UTC (permalink / raw)
To: benh, paulus; +Cc: linux-mm, linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1365055083-31956-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Hugepage invalidate involves invalidating multiple hpte entries.
Optimize the operation using H_BULK_REMOVE on lpar platforms.
On native, reduce the number of tlb flush.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/machdep.h | 3 +
arch/powerpc/mm/hash_native_64.c | 78 ++++++++++++++++++++
arch/powerpc/mm/pgtable.c | 13 +++-
arch/powerpc/platforms/pseries/lpar.c | 126 +++++++++++++++++++++++++++++++--
4 files changed, 210 insertions(+), 10 deletions(-)
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 6cee6e0..3bc7816 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -56,6 +56,9 @@ struct machdep_calls {
void (*hpte_removebolted)(unsigned long ea,
int psize, int ssize);
void (*flush_hash_range)(unsigned long number, int local);
+ void (*hugepage_invalidate)(struct mm_struct *mm,
+ unsigned char *hpte_slot_array,
+ unsigned long addr, int psize);
/* special for kexec, to be called in real mode, linear mapping is
* destroyed as well */
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index ac84fa6..59f29bf 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -450,6 +450,83 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
local_irq_restore(flags);
}
+static void native_hugepage_invalidate(struct mm_struct *mm,
+ unsigned char *hpte_slot_array,
+ unsigned long addr, int psize)
+{
+ int ssize = 0, i;
+ int lock_tlbie;
+ struct hash_pte *hptep;
+ int actual_psize = MMU_PAGE_16M;
+ unsigned int max_hpte_count, valid;
+ unsigned long flags, s_addr = addr;
+ unsigned long hpte_v, want_v, shift;
+ unsigned long hidx, vpn = 0, vsid, hash, slot;
+
+ shift = mmu_psize_defs[psize].shift;
+ max_hpte_count = HUGE_PAGE_SIZE/(1ul << shift);
+
+ local_irq_save(flags);
+ for (i = 0; i < max_hpte_count; i++) {
+ /*
+ * 8 bits per each hpte entries
+ * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
+ */
+ valid = hpte_slot_array[i] & 0x1;
+ if (!valid)
+ continue;
+ hidx = hpte_slot_array[i] >> 1;
+
+ /* get the vpn */
+ addr = s_addr + (i * (1ul << shift));
+ if (!is_kernel_addr(addr)) {
+ ssize = user_segment_size(addr);
+ vsid = get_vsid(mm->context.id, addr, ssize);
+ WARN_ON(vsid == 0);
+ } else {
+ vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
+ ssize = mmu_kernel_ssize;
+ }
+
+ vpn = hpt_vpn(addr, vsid, ssize);
+ hash = hpt_hash(vpn, shift, ssize);
+ if (hidx & _PTEIDX_SECONDARY)
+ hash = ~hash;
+
+ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot += hidx & _PTEIDX_GROUP_IX;
+
+ hptep = htab_address + slot;
+ want_v = hpte_encode_avpn(vpn, psize, ssize);
+ native_lock_hpte(hptep);
+ hpte_v = hptep->v;
+
+ /* Even if we miss, we need to invalidate the TLB */
+ if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
+ native_unlock_hpte(hptep);
+ else
+ /* Invalidate the hpte. NOTE: this also unlocks it */
+ hptep->v = 0;
+ }
+ /*
+ * Since this is a hugepage, we just need a single tlbie.
+ * use the last vpn.
+ */
+ lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+ if (lock_tlbie)
+ raw_spin_lock(&native_tlbie_lock);
+
+ asm volatile("ptesync":::"memory");
+ __tlbie(vpn, psize, actual_psize, ssize);
+ asm volatile("eieio; tlbsync; ptesync":::"memory");
+
+ if (lock_tlbie)
+ raw_spin_unlock(&native_tlbie_lock);
+
+ local_irq_restore(flags);
+}
+
+
static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
int *psize, int *apsize, int *ssize, unsigned long *vpn)
{
@@ -678,4 +755,5 @@ void __init hpte_init_native(void)
ppc_md.hpte_remove = native_hpte_remove;
ppc_md.hpte_clear_all = native_hpte_clear;
ppc_md.flush_hash_range = native_flush_hash_range;
+ ppc_md.hugepage_invalidate = native_hugepage_invalidate;
}
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index fbff062..386cab8 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -433,6 +433,7 @@ void hpte_need_hugepage_flush(struct mm_struct *mm, unsigned long addr,
{
int ssize, i;
unsigned long s_addr;
+ int max_hpte_count;
unsigned int psize, valid;
unsigned char *hpte_slot_array;
unsigned long hidx, vpn, vsid, hash, shift, slot;
@@ -446,12 +447,18 @@ void hpte_need_hugepage_flush(struct mm_struct *mm, unsigned long addr,
* second half of the PMD
*/
hpte_slot_array = *(char **)(pmdp + PTRS_PER_PMD);
-
/* get the base page size */
psize = get_slice_psize(mm, s_addr);
- shift = mmu_psize_defs[psize].shift;
- for (i = 0; i < HUGE_PAGE_SIZE/(1ul << shift); i++) {
+ if (ppc_md.hugepage_invalidate)
+ return ppc_md.hugepage_invalidate(mm, hpte_slot_array,
+ s_addr, psize);
+ /*
+ * No bluk hpte removal support, invalidate each entry
+ */
+ shift = mmu_psize_defs[psize].shift;
+ max_hpte_count = HUGE_PAGE_SIZE/(1ul << shift);
+ for (i = 0; i < max_hpte_count; i++) {
/*
* 8 bits per each hpte entries
* 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 3daced3..5fcc621 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -45,6 +45,13 @@
#include "plpar_wrappers.h"
#include "pseries.h"
+/* Flag bits for H_BULK_REMOVE */
+#define HBR_REQUEST 0x4000000000000000UL
+#define HBR_RESPONSE 0x8000000000000000UL
+#define HBR_END 0xc000000000000000UL
+#define HBR_AVPN 0x0200000000000000UL
+#define HBR_ANDCOND 0x0100000000000000UL
+
/* in hvCall.S */
EXPORT_SYMBOL(plpar_hcall);
@@ -339,6 +346,117 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
BUG_ON(lpar_rc != H_SUCCESS);
}
+/*
+ * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need
+ * to make sure that we avoid bouncing the hypervisor tlbie lock.
+ */
+#define PPC64_HUGE_HPTE_BATCH 12
+
+static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
+ unsigned long *vpn, int count,
+ int psize, int ssize)
+{
+ unsigned long param[9];
+ int i = 0, pix = 0, rc;
+ unsigned long flags = 0;
+ int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+
+ if (lock_tlbie)
+ spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
+
+ for (i = 0; i < count; i++) {
+
+ if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
+ pSeries_lpar_hpte_invalidate(slot[i], vpn[i], psize,
+ ssize, 0);
+ } else {
+ param[pix] = HBR_REQUEST | HBR_AVPN | slot[i];
+ param[pix+1] = hpte_encode_avpn(vpn[i], psize, ssize);
+ pix += 2;
+ if (pix == 8) {
+ rc = plpar_hcall9(H_BULK_REMOVE, param,
+ param[0], param[1], param[2],
+ param[3], param[4], param[5],
+ param[6], param[7]);
+ BUG_ON(rc != H_SUCCESS);
+ pix = 0;
+ }
+ }
+ }
+ if (pix) {
+ param[pix] = HBR_END;
+ rc = plpar_hcall9(H_BULK_REMOVE, param, param[0], param[1],
+ param[2], param[3], param[4], param[5],
+ param[6], param[7]);
+ BUG_ON(rc != H_SUCCESS);
+ }
+
+ if (lock_tlbie)
+ spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
+}
+
+static void pSeries_lpar_hugepage_invalidate(struct mm_struct *mm,
+ unsigned char *hpte_slot_array,
+ unsigned long addr, int psize)
+{
+ int ssize = 0, i, index = 0;
+ unsigned long s_addr = addr;
+ unsigned int max_hpte_count, valid;
+ unsigned long vpn_array[PPC64_HUGE_HPTE_BATCH];
+ unsigned long slot_array[PPC64_HUGE_HPTE_BATCH];
+ unsigned long shift, hidx, vpn = 0, vsid, hash, slot;
+
+ shift = mmu_psize_defs[psize].shift;
+ max_hpte_count = HUGE_PAGE_SIZE/(1ul << shift);
+
+ for (i = 0; i < max_hpte_count; i++) {
+ /*
+ * 8 bits per each hpte entries
+ * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
+ */
+ valid = hpte_slot_array[i] & 0x1;
+ if (!valid)
+ continue;
+ hidx = hpte_slot_array[i] >> 1;
+
+ /* get the vpn */
+ addr = s_addr + (i * (1ul << shift));
+ if (!is_kernel_addr(addr)) {
+ ssize = user_segment_size(addr);
+ vsid = get_vsid(mm->context.id, addr, ssize);
+ WARN_ON(vsid == 0);
+ } else {
+ vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
+ ssize = mmu_kernel_ssize;
+ }
+
+ vpn = hpt_vpn(addr, vsid, ssize);
+ hash = hpt_hash(vpn, shift, ssize);
+ if (hidx & _PTEIDX_SECONDARY)
+ hash = ~hash;
+
+ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot += hidx & _PTEIDX_GROUP_IX;
+
+ slot_array[index] = slot;
+ vpn_array[index] = vpn;
+ if (index == PPC64_HUGE_HPTE_BATCH - 1) {
+ /*
+ * Now do a bluk invalidate
+ */
+ __pSeries_lpar_hugepage_invalidate(slot_array,
+ vpn_array,
+ PPC64_HUGE_HPTE_BATCH,
+ psize, ssize);
+ index = 0;
+ } else
+ index++;
+ }
+ if (index)
+ __pSeries_lpar_hugepage_invalidate(slot_array, vpn_array,
+ index, psize, ssize);
+}
+
static void pSeries_lpar_hpte_removebolted(unsigned long ea,
int psize, int ssize)
{
@@ -354,13 +472,6 @@ static void pSeries_lpar_hpte_removebolted(unsigned long ea,
pSeries_lpar_hpte_invalidate(slot, vpn, psize, ssize, 0);
}
-/* Flag bits for H_BULK_REMOVE */
-#define HBR_REQUEST 0x4000000000000000UL
-#define HBR_RESPONSE 0x8000000000000000UL
-#define HBR_END 0xc000000000000000UL
-#define HBR_AVPN 0x0200000000000000UL
-#define HBR_ANDCOND 0x0100000000000000UL
-
/*
* Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
* lock.
@@ -446,6 +557,7 @@ void __init hpte_init_lpar(void)
ppc_md.hpte_removebolted = pSeries_lpar_hpte_removebolted;
ppc_md.flush_hash_range = pSeries_lpar_flush_hash_range;
ppc_md.hpte_clear_all = pSeries_lpar_hptab_clear;
+ ppc_md.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
}
#ifdef CONFIG_PPC_SMLPAR
--
1.7.10
^ permalink raw reply related
* [PATCH -V5 25/25] powerpc: Handle hugepages in kvm
From: Aneesh Kumar K.V @ 2013-04-04 5:58 UTC (permalink / raw)
To: benh, paulus; +Cc: linux-mm, linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1365055083-31956-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
We could possibly avoid some of these changes because most of the HUGE PMD bits
map to PTE bits.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/kvm_book3s_64.h | 31 ++++++++++++
arch/powerpc/kvm/book3s_64_mmu_hv.c | 12 ++++-
arch/powerpc/kvm/book3s_hv_rm_mmu.c | 75 ++++++++++++++++++++++--------
3 files changed, 97 insertions(+), 21 deletions(-)
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 38bec1d..1c5c799 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -110,6 +110,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
return rb;
}
+/* FIXME !! should we use hpte_actual_psize or hpte decode ? */
static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
{
/* only handle 4k, 64k and 16M pages for now */
@@ -189,6 +190,36 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *p, int writing)
return pte;
}
+/*
+ * Lock and read a linux hugepage PMD. If it's present and writable, atomically
+ * set dirty and referenced bits and return the PMD, otherwise return 0.
+ */
+static inline pmd_t kvmppc_read_update_linux_hugepmd(pmd_t *p, int writing)
+{
+ pmd_t pmd, tmp;
+
+ /* wait until _PAGE_BUSY is clear then set it atomically */
+ __asm__ __volatile__ (
+ "1: ldarx %0,0,%3\n"
+ " andi. %1,%0,%4\n"
+ " bne- 1b\n"
+ " ori %1,%0,%4\n"
+ " stdcx. %1,0,%3\n"
+ " bne- 1b"
+ : "=&r" (pmd), "=&r" (tmp), "=m" (*p)
+ : "r" (p), "i" (PMD_HUGE_BUSY)
+ : "cc");
+
+ if (pmd_large(pmd)) {
+ pmd = pmd_mkyoung(pmd);
+ if (writing && pmd_write(pmd))
+ pmd = pte_mkdirty(pmd);
+ }
+
+ *p = pmd; /* clears PMD_HUGE_BUSY */
+ return pmd;
+}
+
/* Return HPTE cache control bits corresponding to Linux pte bits */
static inline unsigned long hpte_cache_bits(unsigned long pte_val)
{
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 4f2a7dc..da006da 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -675,6 +675,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
/* if the guest wants write access, see if that is OK */
if (!writing && hpte_is_writable(r)) {
+ int hugepage;
pte_t *ptep, pte;
/*
@@ -683,11 +684,18 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
*/
rcu_read_lock_sched();
ptep = find_linux_pte_or_hugepte(current->mm->pgd,
- hva, NULL, NULL);
- if (ptep && pte_present(*ptep)) {
+ hva, NULL, &hugepage);
+ if (!hugepage && ptep && pte_present(*ptep)) {
pte = kvmppc_read_update_linux_pte(ptep, 1);
if (pte_write(pte))
write_ok = 1;
+ } else if (hugepage && ptep) {
+ pmd_t pmd = *(pmd_t *)ptep;
+ if (pmd_large(pmd)) {
+ pmd = kvmppc_read_update_linux_hugepmd((pmd_t *)ptep, 1);
+ if (pmd_write(pmd))
+ write_ok = 1;
+ }
}
rcu_read_unlock_sched();
}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 7c8e1ed..e9d4e3a 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -146,24 +146,37 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
}
static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
- int writing, unsigned long *pte_sizep)
+ int writing, unsigned long *pte_sizep,
+ int *hugepage)
{
pte_t *ptep;
unsigned long ps = *pte_sizep;
unsigned int shift;
- ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift, NULL);
+ ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift, hugepage);
if (!ptep)
return __pte(0);
- if (shift)
- *pte_sizep = 1ul << shift;
- else
- *pte_sizep = PAGE_SIZE;
+ if (*hugepage) {
+ *pte_sizep = 1ul << 24;
+ } else {
+ if (shift)
+ *pte_sizep = 1ul << shift;
+ else
+ *pte_sizep = PAGE_SIZE;
+ }
if (ps > *pte_sizep)
return __pte(0);
- if (!pte_present(*ptep))
- return __pte(0);
- return kvmppc_read_update_linux_pte(ptep, writing);
+
+ if (*hugepage) {
+ pmd_t *pmdp = (pmd_t *)ptep;
+ if (!pmd_large(*pmdp))
+ return __pmd(0);
+ return kvmppc_read_update_linux_hugepmd(pmdp, writing);
+ } else {
+ if (!pte_present(*ptep))
+ return __pte(0);
+ return kvmppc_read_update_linux_pte(ptep, writing);
+ }
}
static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
@@ -239,18 +252,34 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
pa &= PAGE_MASK;
} else {
+ int hugepage;
+
/* Translate to host virtual address */
hva = __gfn_to_hva_memslot(memslot, gfn);
/* Look up the Linux PTE for the backing page */
pte_size = psize;
- pte = lookup_linux_pte(pgdir, hva, writing, &pte_size);
- if (pte_present(pte)) {
- if (writing && !pte_write(pte))
- /* make the actual HPTE be read-only */
- ptel = hpte_make_readonly(ptel);
- is_io = hpte_cache_bits(pte_val(pte));
- pa = pte_pfn(pte) << PAGE_SHIFT;
+ pte = lookup_linux_pte(pgdir, hva, writing, &pte_size, &hugepage);
+ if (hugepage) {
+ pmd_t pmd = (pmd_t)pte;
+ if (!pmd_large(pmd)) {
+ if (writing && !pmd_write(pmd))
+ /* make the actual HPTE be read-only */
+ ptel = hpte_make_readonly(ptel);
+ /*
+ * we support hugepage only for RAM
+ */
+ is_io = 0;
+ pa = pmd_pfn(pmd) << PAGE_SHIFT;
+ }
+ } else {
+ if (pte_present(pte)) {
+ if (writing && !pte_write(pte))
+ /* make the actual HPTE be read-only */
+ ptel = hpte_make_readonly(ptel);
+ is_io = hpte_cache_bits(pte_val(pte));
+ pa = pte_pfn(pte) << PAGE_SHIFT;
+ }
}
}
@@ -645,10 +674,18 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
if (memslot) {
+ int hugepage;
hva = __gfn_to_hva_memslot(memslot, gfn);
- pte = lookup_linux_pte(pgdir, hva, 1, &psize);
- if (pte_present(pte) && !pte_write(pte))
- r = hpte_make_readonly(r);
+ pte = lookup_linux_pte(pgdir, hva, 1,
+ &psize, &hugepage);
+ if (hugepage) {
+ pmd_t pmd = (pmd_t)pte;
+ if (pmd_large(pmd) && !pmd_write(pmd))
+ r = hpte_make_readonly(r);
+ } else {
+ if (pte_present(pte) && !pte_write(pte))
+ r = hpte_make_readonly(r);
+ }
}
}
}
--
1.7.10
^ permalink raw reply related
* [PATCH -V5 21/25] powerpc: Handle hugepage in perf callchain
From: Aneesh Kumar K.V @ 2013-04-04 5:57 UTC (permalink / raw)
To: benh, paulus; +Cc: linux-mm, linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1365055083-31956-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
arch/powerpc/perf/callchain.c | 32 +++++++++++++++++++++-----------
1 file changed, 21 insertions(+), 11 deletions(-)
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index 578cac7..99262ce 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -115,7 +115,7 @@ static int read_user_stack_slow(void __user *ptr, void *ret, int nb)
{
pgd_t *pgdir;
pte_t *ptep, pte;
- unsigned shift;
+ unsigned shift, hugepage;
unsigned long addr = (unsigned long) ptr;
unsigned long offset;
unsigned long pfn;
@@ -125,20 +125,30 @@ static int read_user_stack_slow(void __user *ptr, void *ret, int nb)
if (!pgdir)
return -EFAULT;
- ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift, NULL);
+ ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift, &hugepage);
if (!shift)
shift = PAGE_SHIFT;
- /* align address to page boundary */
- offset = addr & ((1UL << shift) - 1);
- addr -= offset;
-
- if (ptep == NULL)
- return -EFAULT;
- pte = *ptep;
- if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER))
+ if (!ptep)
return -EFAULT;
- pfn = pte_pfn(pte);
+
+ if (hugepage) {
+ pmd_t pmd = *(pmd_t *)ptep;
+ shift = mmu_psize_defs[MMU_PAGE_16M].shift;
+ offset = addr & ((1UL << shift) - 1);
+
+ if (!pmd_large(pmd) || !(pmd_val(pmd) & PMD_HUGE_USER))
+ return -EFAULT;
+ pfn = pmd_pfn(pmd);
+ } else {
+ offset = addr & ((1UL << shift) - 1);
+
+ pte = *ptep;
+ if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER))
+ return -EFAULT;
+ pfn = pte_pfn(pte);
+ }
+
if (!page_is_ram(pfn))
return -EFAULT;
--
1.7.10
^ permalink raw reply related
* [PATCH -V5 23/25] powerpc/THP: Enable THP on PPC64
From: Aneesh Kumar K.V @ 2013-04-04 5:58 UTC (permalink / raw)
To: benh, paulus; +Cc: linux-mm, linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1365055083-31956-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
We enable only if the we support 16MB page size.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/pgtable.h | 31 +++++++++++++++++++++++++++++--
1 file changed, 29 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 9681de4..5617dee 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -81,8 +81,35 @@ static inline int pmd_trans_huge(pmd_t pmd)
return ((pmd_val(pmd) & PMD_ISHUGE) == PMD_ISHUGE);
}
-/* We will enable it in the last patch */
-#define has_transparent_hugepage() 0
+static inline int has_transparent_hugepage(void)
+{
+ if (!mmu_has_feature(MMU_FTR_16M_PAGE))
+ return 0;
+ /*
+ * We support THP only if HPAGE_SHIFT is 16MB.
+ */
+ if (!HPAGE_SHIFT || (HPAGE_SHIFT != mmu_psize_defs[MMU_PAGE_16M].shift))
+ return 0;
+ /*
+ * We need to make sure that we support 16MB hugepage in a segement
+ * with base page size 64K or 4K. We only enable THP with a PAGE_SIZE
+ * of 64K.
+ */
+ /*
+ * If we have 64K HPTE, we will be using that by default
+ */
+ if (mmu_psize_defs[MMU_PAGE_64K].shift &&
+ (mmu_psize_defs[MMU_PAGE_64K].penc[MMU_PAGE_16M] == -1))
+ return 0;
+ /*
+ * Ok we only have 4K HPTE
+ */
+ if (mmu_psize_defs[MMU_PAGE_4K].penc[MMU_PAGE_16M] == -1)
+ return 0;
+
+ return 1;
+}
+
#else
#define pmd_large(pmd) 0
#define has_transparent_hugepage() 0
--
1.7.10
^ permalink raw reply related
* [PATCH -V5 19/25] powerpc/THP: Differentiate THP PMD entries from HUGETLB PMD entries
From: Aneesh Kumar K.V @ 2013-04-04 5:57 UTC (permalink / raw)
To: benh, paulus; +Cc: linux-mm, linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1365055083-31956-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
HUGETLB clear the top bit of PMD entries and use that to indicate
a HUGETLB page directory. Since we store pfns in PMDs for THP,
we would have the top bit cleared by default. Add the top bit mask
for THP PMD entries and clear that when we are looking for pmd_pfn.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/pgtable.h | 16 +++++++++++++---
arch/powerpc/mm/pgtable.c | 5 ++++-
arch/powerpc/mm/pgtable_64.c | 2 +-
3 files changed, 18 insertions(+), 5 deletions(-)
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 9fbe2a7..9681de4 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -31,7 +31,7 @@ struct mm_struct;
#define PMD_HUGE_SPLITTING 0x008
#define PMD_HUGE_SAO 0x010 /* strong Access order */
#define PMD_HUGE_HASHPTE 0x020
-#define PMD_ISHUGE 0x040
+#define _PMD_ISHUGE 0x040
#define PMD_HUGE_DIRTY 0x080 /* C: page changed */
#define PMD_HUGE_ACCESSED 0x100 /* R: page referenced */
#define PMD_HUGE_RW 0x200 /* software: user write access allowed */
@@ -44,6 +44,14 @@ struct mm_struct;
#define PMD_HUGE_RPN_SHIFT PTE_RPN_SHIFT
#define HUGE_PAGE_SIZE (ASM_CONST(1) << 24)
#define HUGE_PAGE_MASK (~(HUGE_PAGE_SIZE - 1))
+/*
+ * HugeTLB looks at the top bit of the Linux page table entries to
+ * decide whether it is a huge page directory or not. Mark HUGE
+ * PMD to differentiate
+ */
+#define PMD_HUGE_NOT_HUGETLB (ASM_CONST(1) << 63)
+#define PMD_ISHUGE (_PMD_ISHUGE | PMD_HUGE_NOT_HUGETLB)
+#define PMD_HUGE_PROTBITS (0xfff | PMD_HUGE_NOT_HUGETLB)
#ifndef __ASSEMBLY__
extern void hpte_need_hugepage_flush(struct mm_struct *mm, unsigned long addr,
@@ -70,8 +78,9 @@ static inline int pmd_trans_splitting(pmd_t pmd)
static inline int pmd_trans_huge(pmd_t pmd)
{
- return pmd_val(pmd) & PMD_ISHUGE;
+ return ((pmd_val(pmd) & PMD_ISHUGE) == PMD_ISHUGE);
}
+
/* We will enable it in the last patch */
#define has_transparent_hugepage() 0
#else
@@ -84,7 +93,8 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
/*
* Only called for hugepage pmd
*/
- return pmd_val(pmd) >> PMD_HUGE_RPN_SHIFT;
+ unsigned long val = pmd_val(pmd) & ~PMD_HUGE_PROTBITS;
+ return val >> PMD_HUGE_RPN_SHIFT;
}
static inline int pmd_young(pmd_t pmd)
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 9f33780..cf3ca8e 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -517,7 +517,10 @@ static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
{
pmd_t pmd;
-
+ /*
+ * We cannot support that many PFNs
+ */
+ VM_BUG_ON(pfn & PMD_HUGE_NOT_HUGETLB);
pmd_val(pmd) = pfn << PMD_HUGE_RPN_SHIFT;
pmd_val(pmd) |= PMD_ISHUGE;
pmd = pmd_set_protbits(pmd, pgprot);
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 6fc3488..cd53020 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -345,7 +345,7 @@ EXPORT_SYMBOL(__iounmap_at);
struct page *pmd_page(pmd_t pmd)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (pmd_val(pmd) & PMD_ISHUGE)
+ if ((pmd_val(pmd) & PMD_ISHUGE) == PMD_ISHUGE)
return pfn_to_page(pmd_pfn(pmd));
#endif
return virt_to_page(pmd_page_vaddr(pmd));
--
1.7.10
^ permalink raw reply related
* [PATCH -V5 18/25] powerpc/THP: Double the PMD table size for THP
From: Aneesh Kumar K.V @ 2013-04-04 5:57 UTC (permalink / raw)
To: benh, paulus; +Cc: linux-mm, linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1365055083-31956-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
THP code does PTE page allocation along with large page request and deposit them
for later use. This is to ensure that we won't have any failures when we split
hugepages to regular pages.
On powerpc we want to use the deposited PTE page for storing hash pte slot and
secondary bit information for the HPTEs. We use the second half
of the pmd table to save the deposted PTE page.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/pgalloc-64.h | 6 +++---
arch/powerpc/include/asm/pgtable-ppc64.h | 6 +++++-
arch/powerpc/mm/init_64.c | 9 ++++++---
3 files changed, 14 insertions(+), 7 deletions(-)
diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h
index 3418989..46c6ffa 100644
--- a/arch/powerpc/include/asm/pgalloc-64.h
+++ b/arch/powerpc/include/asm/pgalloc-64.h
@@ -208,17 +208,17 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- return kmem_cache_alloc(PGT_CACHE(PMD_INDEX_SIZE),
+ return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
GFP_KERNEL|__GFP_REPEAT);
}
static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
{
- kmem_cache_free(PGT_CACHE(PMD_INDEX_SIZE), pmd);
+ kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd);
}
#define __pmd_free_tlb(tlb, pmd, addr) \
- pgtable_free_tlb(tlb, pmd, PMD_INDEX_SIZE)
+ pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX)
#ifndef CONFIG_PPC_64K_PAGES
#define __pud_free_tlb(tlb, pud, addr) \
pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index c0747c7..d4e845c 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -20,7 +20,11 @@
PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT)
#define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE)
-
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define PMD_CACHE_INDEX (PMD_INDEX_SIZE + 1)
+#else
+#define PMD_CACHE_INDEX PMD_INDEX_SIZE
+#endif
/*
* Define the address range of the kernel non-linear virtual area
*/
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 95a4529..7608b0d 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -88,7 +88,11 @@ static void pgd_ctor(void *addr)
static void pmd_ctor(void *addr)
{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ memset(addr, 0, PMD_TABLE_SIZE * 2);
+#else
memset(addr, 0, PMD_TABLE_SIZE);
+#endif
}
struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];
@@ -138,10 +142,9 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
void pgtable_cache_init(void)
{
pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
- pgtable_cache_add(PMD_INDEX_SIZE, pmd_ctor);
- if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_INDEX_SIZE))
+ pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor);
+ if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_CACHE_INDEX))
panic("Couldn't allocate pgtable caches");
-
/* In all current configs, when the PUD index exists it's the
* same size as either the pgd or pmd index. Verify that the
* initialization above has also created a PUD cache. This
--
1.7.10
^ permalink raw reply related
* [PATCH -V5 20/25] powerpc/THP: Add code to handle HPTE faults for large pages
From: Aneesh Kumar K.V @ 2013-04-04 5:57 UTC (permalink / raw)
To: benh, paulus; +Cc: linux-mm, linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1365055083-31956-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
We now have pmd entries covering to 16MB range. To implement THP on powerpc,
we double the size of PMD. The second half is used to deposit the pgtable (PTE page).
We also use the depoisted PTE page for tracking the HPTE information. The information
include [ secondary group | 3 bit hidx | valid ]. We use one byte per each HPTE entry.
With 16MB hugepage and 64K HPTE we need 256 entries and with 4K HPTE we need
4096 entries. Both will fit in a 4K PTE page.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/mmu-hash64.h | 5 +
arch/powerpc/include/asm/pgtable-ppc64.h | 31 +----
arch/powerpc/kernel/io-workarounds.c | 3 +-
arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +-
arch/powerpc/kvm/book3s_hv_rm_mmu.c | 4 +-
arch/powerpc/mm/Makefile | 1 +
arch/powerpc/mm/hash_utils_64.c | 16 ++-
arch/powerpc/mm/hugepage-hash64.c | 185 ++++++++++++++++++++++++++++++
arch/powerpc/mm/hugetlbpage.c | 31 ++++-
arch/powerpc/mm/pgtable.c | 38 ++++++
arch/powerpc/mm/tlb_hash64.c | 5 +-
arch/powerpc/perf/callchain.c | 2 +-
arch/powerpc/platforms/pseries/eeh.c | 5 +-
13 files changed, 286 insertions(+), 42 deletions(-)
create mode 100644 arch/powerpc/mm/hugepage-hash64.c
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index e187254..a74a3de 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -322,6 +322,11 @@ extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
pte_t *ptep, unsigned long trap, int local, int ssize,
unsigned int shift, unsigned int mmu_psize);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern int __hash_page_thp(unsigned long ea, unsigned long access,
+ unsigned long vsid, pmd_t *pmdp, unsigned long trap,
+ int local, int ssize, unsigned int psize);
+#endif
extern void hash_failure_debug(unsigned long ea, unsigned long access,
unsigned long vsid, unsigned long trap,
int ssize, int psize, int lpsize,
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index d4e845c..9b81283 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -345,39 +345,18 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
void pgtable_cache_init(void);
-/*
- * find_linux_pte returns the address of a linux pte for a given
- * effective address and directory. If not found, it returns zero.
- */
-static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea)
-{
- pgd_t *pg;
- pud_t *pu;
- pmd_t *pm;
- pte_t *pt = NULL;
-
- pg = pgdir + pgd_index(ea);
- if (!pgd_none(*pg)) {
- pu = pud_offset(pg, ea);
- if (!pud_none(*pu)) {
- pm = pmd_offset(pu, ea);
- if (pmd_present(*pm))
- pt = pte_offset_kernel(pm, ea);
- }
- }
- return pt;
-}
-
+pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea, unsigned int *thp);
#ifdef CONFIG_HUGETLB_PAGE
pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
- unsigned *shift);
+ unsigned *shift, unsigned int *hugepage);
#else
static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
- unsigned *shift)
+ unsigned *shift,
+ unsigned int *hugepage)
{
if (shift)
*shift = 0;
- return find_linux_pte(pgdir, ea);
+ return find_linux_pte(pgdir, ea, hugepage);
}
#endif /* !CONFIG_HUGETLB_PAGE */
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
index 50e90b7..a9c904f 100644
--- a/arch/powerpc/kernel/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -70,7 +70,8 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END)
return NULL;
- ptep = find_linux_pte(init_mm.pgd, vaddr);
+ /* we won't find hugepages here */
+ ptep = find_linux_pte(init_mm.pgd, vaddr, NULL);
if (ptep == NULL)
paddr = 0;
else
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 8cc18ab..4f2a7dc 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -683,7 +683,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
*/
rcu_read_lock_sched();
ptep = find_linux_pte_or_hugepte(current->mm->pgd,
- hva, NULL);
+ hva, NULL, NULL);
if (ptep && pte_present(*ptep)) {
pte = kvmppc_read_update_linux_pte(ptep, 1);
if (pte_write(pte))
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 19c93ba..7c8e1ed 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -27,7 +27,7 @@ static void *real_vmalloc_addr(void *x)
unsigned long addr = (unsigned long) x;
pte_t *p;
- p = find_linux_pte(swapper_pg_dir, addr);
+ p = find_linux_pte(swapper_pg_dir, addr, NULL);
if (!p || !pte_present(*p))
return NULL;
/* assume we don't have huge pages in vmalloc space... */
@@ -152,7 +152,7 @@ static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
unsigned long ps = *pte_sizep;
unsigned int shift;
- ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift);
+ ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift, NULL);
if (!ptep)
return __pte(0);
if (shift)
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 3787b61..997deb4 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -33,6 +33,7 @@ obj-y += hugetlbpage.o
obj-$(CONFIG_PPC_STD_MMU_64) += hugetlbpage-hash64.o
obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o
endif
+obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hugepage-hash64.o
obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o
obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
obj-$(CONFIG_HIGHMEM) += highmem.o
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 1f2ebbd..cd3ecd8 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -955,7 +955,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
unsigned long vsid;
struct mm_struct *mm;
pte_t *ptep;
- unsigned hugeshift;
+ unsigned hugeshift, hugepage;
const struct cpumask *tmp;
int rc, user_region = 0, local = 0;
int psize, ssize;
@@ -1021,7 +1021,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
#endif /* CONFIG_PPC_64K_PAGES */
/* Get PTE and page size from page tables */
- ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugeshift);
+ ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugeshift, &hugepage);
if (ptep == NULL || !pte_present(*ptep)) {
DBG_LOW(" no PTE !\n");
return 1;
@@ -1044,6 +1044,12 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
ssize, hugeshift, psize);
#endif /* CONFIG_HUGETLB_PAGE */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ if (hugepage)
+ return __hash_page_thp(ea, access, vsid, (pmd_t *)ptep,
+ trap, local, ssize, psize);
+#endif
+
#ifndef CONFIG_PPC_64K_PAGES
DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep));
#else
@@ -1149,7 +1155,11 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
pgdir = mm->pgd;
if (pgdir == NULL)
return;
- ptep = find_linux_pte(pgdir, ea);
+ /*
+ * We haven't implemented update_mmu_cache_pmd yet. We get called
+ * only for non hugepages. Hence can ignore THP here
+ */
+ ptep = find_linux_pte(pgdir, ea, NULL);
if (!ptep)
return;
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
new file mode 100644
index 0000000..3f6140d
--- /dev/null
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright IBM Corporation, 2013
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+/*
+ * PPC64 THP Support for hash based MMUs
+ */
+#include <linux/mm.h>
+#include <asm/machdep.h>
+
+/*
+ * The linux hugepage PMD now include the pmd entries followed by the address
+ * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
+ * [ secondary group | 3 bit hidx | valid ]. We use one byte per each HPTE entry.
+ * With 16MB hugepage and 64K HPTE we need 256 entries and with 4K HPTE we need
+ * 4096 entries. Both will fit in a 4K pgtable_t.
+ */
+int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
+ pmd_t *pmdp, unsigned long trap, int local, int ssize,
+ unsigned int psize)
+{
+ unsigned int index, valid;
+ unsigned char *hpte_slot_array;
+ unsigned long rflags, pa, hidx;
+ unsigned long old_pmd, new_pmd;
+ int ret, lpsize = MMU_PAGE_16M;
+ unsigned long vpn, hash, shift, slot;
+
+ /*
+ * atomically mark the linux large page PMD busy and dirty
+ */
+ do {
+ old_pmd = pmd_val(*pmdp);
+ /* If PMD busy, retry the access */
+ if (unlikely(old_pmd & PMD_HUGE_BUSY))
+ return 0;
+ /* If PMD permissions don't match, take page fault */
+ if (unlikely(access & ~old_pmd))
+ return 1;
+ /*
+ * Try to lock the PTE, add ACCESSED and DIRTY if it was
+ * a write access
+ */
+ new_pmd = old_pmd | PMD_HUGE_BUSY | PMD_HUGE_ACCESSED;
+ if (access & _PAGE_RW)
+ new_pmd |= PMD_HUGE_DIRTY;
+ } while (old_pmd != __cmpxchg_u64((unsigned long *)pmdp,
+ old_pmd, new_pmd));
+ /*
+ * PP bits. PMD_HUGE_USER is already PP bit 0x2, so we only
+ * need to add in 0x1 if it's a read-only user page
+ */
+ rflags = new_pmd & PMD_HUGE_USER;
+ if ((new_pmd & PMD_HUGE_USER) && !((new_pmd & PMD_HUGE_RW) &&
+ (new_pmd & PMD_HUGE_DIRTY)))
+ rflags |= 0x1;
+ /*
+ * PMD_HUGE_EXEC -> HW_NO_EXEC since it's inverted
+ */
+ rflags |= ((new_pmd & PMD_HUGE_EXEC) ? 0 : HPTE_R_N);
+
+#if 0 /* FIXME!! */
+ if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
+
+ /*
+ * No CPU has hugepages but lacks no execute, so we
+ * don't need to worry about that case
+ */
+ rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
+ }
+#endif
+ /*
+ * Find the slot index details for this ea, using base page size.
+ */
+ shift = mmu_psize_defs[psize].shift;
+ index = (ea & (HUGE_PAGE_SIZE - 1)) >> shift;
+ BUG_ON(index > 4096);
+
+ vpn = hpt_vpn(ea, vsid, ssize);
+ hash = hpt_hash(vpn, shift, ssize);
+ /*
+ * The hpte hindex are stored in the pgtable whose address is in the
+ * second half of the PMD
+ */
+ hpte_slot_array = *(char **)(pmdp + PTRS_PER_PMD);
+
+ valid = hpte_slot_array[index] & 0x1;
+ if (unlikely(valid)) {
+ /* update the hpte bits */
+ hidx = hpte_slot_array[index] >> 1;
+ if (hidx & _PTEIDX_SECONDARY)
+ hash = ~hash;
+ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot += hidx & _PTEIDX_GROUP_IX;
+
+ ret = ppc_md.hpte_updatepp(slot, rflags, vpn,
+ psize, ssize, local);
+ /*
+ * We failed to update, try to insert a new entry.
+ */
+ if (ret == -1) {
+ /*
+ * large pte is marked busy, so we can be sure
+ * nobody is looking at hpte_slot_array. hence we can
+ * safely update this here.
+ */
+ hpte_slot_array[index] = 0;
+ valid = 0;
+ }
+ }
+
+ if (likely(!valid)) {
+ unsigned long hpte_group;
+
+ /* insert new entry */
+ pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT;
+repeat:
+ hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
+
+ /* clear the busy bits and set the hash pte bits */
+ new_pmd = (new_pmd & ~PMD_HUGE_HPTEFLAGS) | PMD_HUGE_HASHPTE;
+
+ /*
+ * WIMG bits.
+ * We always have _PAGE_COHERENT enabled for system RAM
+ */
+ rflags |= _PAGE_COHERENT;
+
+ if (new_pmd & PMD_HUGE_SAO)
+ rflags |= _PAGE_SAO;
+
+ /* Insert into the hash table, primary slot */
+ slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
+ psize, lpsize, ssize);
+ /*
+ * Primary is full, try the secondary
+ */
+ if (unlikely(slot == -1)) {
+ hpte_group = ((~hash & htab_hash_mask) *
+ HPTES_PER_GROUP) & ~0x7UL;
+ slot = ppc_md.hpte_insert(hpte_group, vpn, pa,
+ rflags, HPTE_V_SECONDARY,
+ psize, lpsize, ssize);
+ if (slot == -1) {
+ if (mftb() & 0x1)
+ hpte_group = ((hash & htab_hash_mask) *
+ HPTES_PER_GROUP) & ~0x7UL;
+
+ ppc_md.hpte_remove(hpte_group);
+ goto repeat;
+ }
+ }
+ /*
+ * Hypervisor failure. Restore old pmd and return -1
+ * similar to __hash_page_*
+ */
+ if (unlikely(slot == -2)) {
+ *pmdp = __pmd(old_pmd);
+ hash_failure_debug(ea, access, vsid, trap, ssize,
+ psize, lpsize, old_pmd);
+ return -1;
+ }
+ /*
+ * large pte is marked busy, so we can be sure
+ * nobody is looking at hpte_slot_array. hence we can
+ * safely update this here.
+ */
+ hpte_slot_array[index] = slot << 1 | 0x1;
+ }
+ /*
+ * No need to use ldarx/stdcx here
+ */
+ *pmdp = __pmd(new_pmd & ~PMD_HUGE_BUSY);
+ return 0;
+}
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 1a6de0a..7f11fa0 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -67,7 +67,8 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
#define hugepd_none(hpd) ((hpd).pd == 0)
-pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift)
+pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
+ unsigned *shift, unsigned int *hugepage)
{
pgd_t *pg;
pud_t *pu;
@@ -77,6 +78,8 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
if (shift)
*shift = 0;
+ if (hugepage)
+ *hugepage = 0;
pg = pgdir + pgd_index(ea);
if (is_hugepd(pg)) {
@@ -91,12 +94,24 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
pm = pmd_offset(pu, ea);
if (is_hugepd(pm))
hpdp = (hugepd_t *)pm;
- else if (!pmd_none(*pm)) {
+ else if (pmd_large(*pm)) {
+ /* THP page */
+ if (hugepage) {
+ *hugepage = 1;
+ /*
+ * This should be ok, except for few
+ * flags. Most of the pte and hugepage
+ * pmd bits overlap. We don't use the
+ * returned value as pte_t in the caller.
+ */
+ return (pte_t *)pm;
+ } else
+ return NULL;
+ } else if (!pmd_none(*pm)) {
return pte_offset_kernel(pm, ea);
}
}
}
-
if (!hpdp)
return NULL;
@@ -108,7 +123,8 @@ EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte);
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
- return find_linux_pte_or_hugepte(mm->pgd, addr, NULL);
+ /* Only called for HugeTLB pages, hence can ignore THP */
+ return find_linux_pte_or_hugepte(mm->pgd, addr, NULL, NULL);
}
static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
@@ -613,8 +629,11 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
struct page *page;
unsigned shift;
unsigned long mask;
-
- ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift);
+ /*
+ * Transparent hugepages are handled by generic code. We can skip them
+ * here.
+ */
+ ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift, NULL);
/* Verify it is a huge page else bail. */
if (!ptep || !shift)
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index cf3ca8e..fbff062 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -557,3 +557,41 @@ void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+/*
+ * find_linux_pte returns the address of a linux pte for a given
+ * effective address and directory. If not found, it returns zero.
+ */
+pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea, unsigned int *hugepage)
+{
+ pgd_t *pg;
+ pud_t *pu;
+ pmd_t *pm;
+ pte_t *pt = NULL;
+
+ if (hugepage)
+ *hugepage = 0;
+ pg = pgdir + pgd_index(ea);
+ if (!pgd_none(*pg)) {
+ pu = pud_offset(pg, ea);
+ if (!pud_none(*pu)) {
+ pm = pmd_offset(pu, ea);
+ if (pmd_large(*pm)) {
+ /* THP page */
+ if (hugepage) {
+ *hugepage = 1;
+ /*
+ * This should be ok, except for few
+ * flags. Most of the pte and hugepage
+ * pmd bits overlap. We don't use the
+ * returned value as pte_t in the caller.
+ */
+ return (pte_t *)pm;
+ } else
+ return NULL;
+ } else if (pmd_present(*pm))
+ pt = pte_offset_kernel(pm, ea);
+ }
+ }
+ return pt;
+}
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 023ec8a..be0066f 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -206,7 +206,10 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
local_irq_save(flags);
arch_enter_lazy_mmu_mode();
for (; start < end; start += PAGE_SIZE) {
- pte_t *ptep = find_linux_pte(mm->pgd, start);
+ /*
+ * We won't find hugepages here.
+ */
+ pte_t *ptep = find_linux_pte(mm->pgd, start, NULL);
unsigned long pte;
if (ptep == NULL)
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index 74d1e78..578cac7 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -125,7 +125,7 @@ static int read_user_stack_slow(void __user *ptr, void *ret, int nb)
if (!pgdir)
return -EFAULT;
- ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift);
+ ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift, NULL);
if (!shift)
shift = PAGE_SHIFT;
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 9a04322..44c931a 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -261,7 +261,10 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
pte_t *ptep;
unsigned long pa;
- ptep = find_linux_pte(init_mm.pgd, token);
+ /*
+ * We won't find hugepages here
+ */
+ ptep = find_linux_pte(init_mm.pgd, token, NULL);
if (!ptep)
return token;
pa = pte_pfn(*ptep) << PAGE_SHIFT;
--
1.7.10
^ permalink raw reply related
* [PATCH -V5 17/25] powerpc/THP: Implement transparent hugepages for ppc64
From: Aneesh Kumar K.V @ 2013-04-04 5:57 UTC (permalink / raw)
To: benh, paulus; +Cc: linux-mm, linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1365055083-31956-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
We now have pmd entries covering to 16MB range. To implement THP on powerpc,
we double the size of PMD. The second half is used to deposit the pgtable (PTE page).
We also use the depoisted PTE page for tracking the HPTE information. The information
include [ secondary group | 3 bit hidx | valid ]. We use one byte per each HPTE entry.
With 16MB hugepage and 64K HPTE we need 256 entries and with 4K HPTE we need
4096 entries. Both will fit in a 4K PTE page.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/page.h | 2 +-
arch/powerpc/include/asm/pgtable-ppc64-64k.h | 3 +-
arch/powerpc/include/asm/pgtable-ppc64.h | 2 +-
arch/powerpc/include/asm/pgtable.h | 240 ++++++++++++++++++++
arch/powerpc/mm/pgtable.c | 314 ++++++++++++++++++++++++++
arch/powerpc/mm/pgtable_64.c | 13 ++
arch/powerpc/platforms/Kconfig.cputype | 1 +
7 files changed, 572 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 38e7ff6..b927447 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -40,7 +40,7 @@
#ifdef CONFIG_HUGETLB_PAGE
extern unsigned int HPAGE_SHIFT;
#else
-#define HPAGE_SHIFT PAGE_SHIFT
+#define HPAGE_SHIFT PMD_SHIFT
#endif
#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
diff --git a/arch/powerpc/include/asm/pgtable-ppc64-64k.h b/arch/powerpc/include/asm/pgtable-ppc64-64k.h
index 3c529b4..5c5541a 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64-64k.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64-64k.h
@@ -33,7 +33,8 @@
#define PGDIR_MASK (~(PGDIR_SIZE-1))
/* Bits to mask out from a PMD to get to the PTE page */
-#define PMD_MASKED_BITS 0x1ff
+/* PMDs point to PTE table fragments which are 4K aligned. */
+#define PMD_MASKED_BITS 0xfff
/* Bits to mask out from a PGD/PUD to get to the PMD page */
#define PUD_MASKED_BITS 0x1ff
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index 0182c20..c0747c7 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -150,7 +150,7 @@
#define pmd_present(pmd) (pmd_val(pmd) != 0)
#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0)
#define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS)
-#define pmd_page(pmd) virt_to_page(pmd_page_vaddr(pmd))
+extern struct page *pmd_page(pmd_t pmd);
#define pud_set(pudp, pudval) (pud_val(*(pudp)) = (pudval))
#define pud_none(pud) (!pud_val(pud))
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 4b52726..9fbe2a7 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -23,7 +23,247 @@ struct mm_struct;
*/
#define PTE_PAGE_HIDX_OFFSET (PTRS_PER_PTE * 8)
+/* A large part matches with pte bits */
+#define PMD_HUGE_PRESENT 0x001 /* software: pte contains a translation */
+#define PMD_HUGE_USER 0x002 /* matches one of the PP bits */
+#define PMD_HUGE_FILE 0x002 /* (!present only) software: pte holds file offset */
+#define PMD_HUGE_EXEC 0x004 /* No execute on POWER4 and newer (we invert) */
+#define PMD_HUGE_SPLITTING 0x008
+#define PMD_HUGE_SAO 0x010 /* strong Access order */
+#define PMD_HUGE_HASHPTE 0x020
+#define PMD_ISHUGE 0x040
+#define PMD_HUGE_DIRTY 0x080 /* C: page changed */
+#define PMD_HUGE_ACCESSED 0x100 /* R: page referenced */
+#define PMD_HUGE_RW 0x200 /* software: user write access allowed */
+#define PMD_HUGE_BUSY 0x800 /* software: PTE & hash are busy */
+#define PMD_HUGE_HPTEFLAGS (PMD_HUGE_BUSY | PMD_HUGE_HASHPTE)
+/*
+ * We keep both the pmd and pte rpn shift same, eventhough we use only
+ * lower 12 bits for hugepage flags at pmd level
+ */
+#define PMD_HUGE_RPN_SHIFT PTE_RPN_SHIFT
+#define HUGE_PAGE_SIZE (ASM_CONST(1) << 24)
+#define HUGE_PAGE_MASK (~(HUGE_PAGE_SIZE - 1))
+
#ifndef __ASSEMBLY__
+extern void hpte_need_hugepage_flush(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
+extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
+extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
+extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd);
+extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmd);
+static inline int pmd_large(pmd_t pmd)
+{
+ return (pmd_val(pmd) & (PMD_ISHUGE | PMD_HUGE_PRESENT)) ==
+ (PMD_ISHUGE | PMD_HUGE_PRESENT);
+}
+
+static inline int pmd_trans_splitting(pmd_t pmd)
+{
+ return (pmd_val(pmd) & (PMD_ISHUGE|PMD_HUGE_SPLITTING)) ==
+ (PMD_ISHUGE|PMD_HUGE_SPLITTING);
+}
+
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+ return pmd_val(pmd) & PMD_ISHUGE;
+}
+/* We will enable it in the last patch */
+#define has_transparent_hugepage() 0
+#else
+#define pmd_large(pmd) 0
+#define has_transparent_hugepage() 0
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+static inline unsigned long pmd_pfn(pmd_t pmd)
+{
+ /*
+ * Only called for hugepage pmd
+ */
+ return pmd_val(pmd) >> PMD_HUGE_RPN_SHIFT;
+}
+
+static inline int pmd_young(pmd_t pmd)
+{
+ return pmd_val(pmd) & PMD_HUGE_ACCESSED;
+}
+
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+ /* Do nothing, mk_pmd() does this part. */
+ return pmd;
+}
+
+#define __HAVE_ARCH_PMD_WRITE
+static inline int pmd_write(pmd_t pmd)
+{
+ return pmd_val(pmd) & PMD_HUGE_RW;
+}
+
+static inline pmd_t pmd_mkold(pmd_t pmd)
+{
+ pmd_val(pmd) &= ~PMD_HUGE_ACCESSED;
+ return pmd;
+}
+
+static inline pmd_t pmd_wrprotect(pmd_t pmd)
+{
+ pmd_val(pmd) &= ~PMD_HUGE_RW;
+ return pmd;
+}
+
+static inline pmd_t pmd_mkdirty(pmd_t pmd)
+{
+ pmd_val(pmd) |= PMD_HUGE_DIRTY;
+ return pmd;
+}
+
+static inline pmd_t pmd_mkyoung(pmd_t pmd)
+{
+ pmd_val(pmd) |= PMD_HUGE_ACCESSED;
+ return pmd;
+}
+
+static inline pmd_t pmd_mkwrite(pmd_t pmd)
+{
+ pmd_val(pmd) |= PMD_HUGE_RW;
+ return pmd;
+}
+
+static inline pmd_t pmd_mknotpresent(pmd_t pmd)
+{
+ pmd_val(pmd) &= ~PMD_HUGE_PRESENT;
+ return pmd;
+}
+
+static inline pmd_t pmd_mksplitting(pmd_t pmd)
+{
+ pmd_val(pmd) |= PMD_HUGE_SPLITTING;
+ return pmd;
+}
+
+/*
+ * Set the dirty and/or accessed bits atomically in a linux hugepage PMD, this
+ * function doesn't need to flush the hash entry
+ */
+static inline void __pmdp_set_access_flags(pmd_t *pmdp, pmd_t entry)
+{
+ unsigned long bits = pmd_val(entry) & (PMD_HUGE_DIRTY |
+ PMD_HUGE_ACCESSED |
+ PMD_HUGE_RW | PMD_HUGE_EXEC);
+#ifdef PTE_ATOMIC_UPDATES
+ unsigned long old, tmp;
+
+ __asm__ __volatile__(
+ "1: ldarx %0,0,%4\n\
+ andi. %1,%0,%6\n\
+ bne- 1b \n\
+ or %0,%3,%0\n\
+ stdcx. %0,0,%4\n\
+ bne- 1b"
+ :"=&r" (old), "=&r" (tmp), "=m" (*pmdp)
+ :"r" (bits), "r" (pmdp), "m" (*pmdp), "i" (PMD_HUGE_BUSY)
+ :"cc");
+#else
+ unsigned long old = pmd_val(*pmdp);
+ *pmdp = __pmd(old | bits);
+#endif
+}
+
+#define __HAVE_ARCH_PMD_SAME
+static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
+{
+ return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~PMD_HUGE_HPTEFLAGS) == 0);
+}
+
+#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
+extern int pmdp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp,
+ pmd_t entry, int dirty);
+
+static inline unsigned long pmd_hugepage_update(struct mm_struct *mm,
+ unsigned long addr,
+ pmd_t *pmdp, unsigned long clr)
+{
+#ifdef PTE_ATOMIC_UPDATES
+ unsigned long old, tmp;
+
+ __asm__ __volatile__(
+ "1: ldarx %0,0,%3\n\
+ andi. %1,%0,%6\n\
+ bne- 1b \n\
+ andc %1,%0,%4 \n\
+ stdcx. %1,0,%3 \n\
+ bne- 1b"
+ : "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
+ : "r" (pmdp), "r" (clr), "m" (*pmdp), "i" (PMD_HUGE_BUSY)
+ : "cc" );
+#else
+ unsigned long old = pmd_val(*pmdp);
+ *pmdp = __pmd(old & ~clr);
+#endif
+
+#ifdef CONFIG_PPC_STD_MMU_64
+ if (old & PMD_HUGE_HASHPTE)
+ hpte_need_hugepage_flush(mm, addr, pmdp);
+#endif
+ return old;
+}
+
+static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
+{
+ unsigned long old;
+
+ if ((pmd_val(*pmdp) & (PMD_HUGE_ACCESSED | PMD_HUGE_HASHPTE)) == 0)
+ return 0;
+ old = pmd_hugepage_update(mm, addr, pmdp, PMD_HUGE_ACCESSED);
+ return ((old & PMD_HUGE_ACCESSED) != 0);
+}
+
+#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
+extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp);
+#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
+extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp);
+
+#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
+static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
+{
+ unsigned long old = pmd_hugepage_update(mm, addr, pmdp, ~0UL);
+ return __pmd(old);
+}
+
+#define __HAVE_ARCH_PMDP_SET_WRPROTECT
+static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp)
+{
+
+ if ((pmd_val(*pmdp) & PMD_HUGE_RW) == 0)
+ return;
+
+ pmd_hugepage_update(mm, addr, pmdp, PMD_HUGE_RW);
+}
+
+#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
+extern void pmdp_splitting_flush(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp);
+
+#define __HAVE_ARCH_PGTABLE_DEPOSIT
+extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pgtable);
+#define __HAVE_ARCH_PGTABLE_WITHDRAW
+extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+
+#define __HAVE_ARCH_PMDP_INVALIDATE
+extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmdp);
#include <asm/tlbflush.h>
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 214130a..9f33780 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -31,6 +31,7 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
+#include <asm/machdep.h>
#include "mmu_decl.h"
@@ -240,3 +241,316 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
}
#endif /* CONFIG_DEBUG_VM */
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static pmd_t set_hugepage_access_flags_filter(pmd_t pmd,
+ struct vm_area_struct *vma,
+ int dirty)
+{
+ return pmd;
+}
+
+/*
+ * This is called when relaxing access to a hugepage. It's also called in the page
+ * fault path when we don't hit any of the major fault cases, ie, a minor
+ * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
+ * handled those two for us, we additionally deal with missing execute
+ * permission here on some processors
+ */
+int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmdp, pmd_t entry, int dirty)
+{
+ int changed;
+ entry = set_hugepage_access_flags_filter(entry, vma, dirty);
+ changed = !pmd_same(*(pmdp), entry);
+ if (changed) {
+ __pmdp_set_access_flags(pmdp, entry);
+ /*
+ * Since we are not supporting SW TLB systems, we don't
+ * have any thing similar to flush_tlb_page_nohash()
+ */
+ }
+ return changed;
+}
+
+int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
+}
+
+/*
+ * We currently remove entries from the hashtable regardless of whether
+ * the entry was young or dirty. The generic routines only flush if the
+ * entry was young or dirty which is not good enough.
+ *
+ * We should be more intelligent about this but for the moment we override
+ * these functions and force a tlb flush unconditionally
+ */
+int pmdp_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
+}
+
+/*
+ * We mark the pmd splitting and invalidate all the hpte
+ * entries for this hugepage.
+ */
+void pmdp_splitting_flush(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ unsigned long old, tmp;
+
+ VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+#ifdef PTE_ATOMIC_UPDATES
+
+ __asm__ __volatile__(
+ "1: ldarx %0,0,%3\n\
+ andi. %1,%0,%6\n\
+ bne- 1b \n\
+ ori %1,%0,%4 \n\
+ stdcx. %1,0,%3 \n\
+ bne- 1b"
+ : "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
+ : "r" (pmdp), "i" (PMD_HUGE_SPLITTING), "m" (*pmdp), "i" (PMD_HUGE_BUSY)
+ : "cc" );
+#else
+ old = pmd_val(*pmdp);
+ *pmdp = __pmd(old | PMD_HUGE_SPLITTING);
+#endif
+ /*
+ * If we didn't had the splitting flag set, go and flush the
+ * HPTE entries and serialize against gup fast.
+ */
+ if (!(old & PMD_HUGE_SPLITTING)) {
+#ifdef CONFIG_PPC_STD_MMU_64
+ /* We need to flush the hpte */
+ if (old & PMD_HUGE_HASHPTE)
+ hpte_need_hugepage_flush(vma->vm_mm, address, pmdp);
+#endif
+ /* need tlb flush only to serialize against gup-fast */
+ flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+ }
+}
+
+/*
+ * We want to put the pgtable in pmd and use pgtable for tracking
+ * the base page size hptes
+ */
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pgtable)
+{
+ unsigned long *pgtable_slot;
+ assert_spin_locked(&mm->page_table_lock);
+ /*
+ * we store the pgtable in the second half of PMD
+ */
+ pgtable_slot = pmdp + PTRS_PER_PMD;
+ *pgtable_slot = (unsigned long)pgtable;
+}
+
+#define PTE_FRAG_SIZE (2 * PTRS_PER_PTE * sizeof(pte_t))
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
+{
+ pgtable_t pgtable;
+ unsigned long *pgtable_slot;
+
+ assert_spin_locked(&mm->page_table_lock);
+ pgtable_slot = pmdp + PTRS_PER_PMD;
+ pgtable = (pgtable_t) *pgtable_slot;
+ /*
+ * We store HPTE information in the deposited PTE fragment.
+ * zero out the content on withdraw.
+ */
+ memset(pgtable, 0, PTE_FRAG_SIZE);
+ return pgtable;
+}
+
+/*
+ * Since we are looking at latest ppc64, we don't need to worry about
+ * i/d cache coherency on exec fault
+ */
+static pmd_t set_pmd_filter(pmd_t pmd, unsigned long addr)
+{
+ pmd = __pmd(pmd_val(pmd) & ~PMD_HUGE_HPTEFLAGS);
+ return pmd;
+}
+
+/*
+ * We can make it less convoluted than __set_pte_at, because
+ * we can ignore lot of hardware here, because this is only for
+ * MPSS
+ */
+static inline void __set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd, int percpu)
+{
+ /*
+ * There is nothing in hash page table now, so nothing to
+ * invalidate, set_pte_at is used for adding new entry.
+ * For updating we should use update_hugepage_pmd()
+ */
+ *pmdp = pmd;
+}
+
+/*
+ * set a new huge pmd. We should not be called for updating
+ * an existing pmd entry. That should go via pmd_hugepage_update.
+ */
+void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd)
+{
+ /*
+ * Note: mm->context.id might not yet have been assigned as
+ * this context might not have been activated yet when this
+ * is called.
+ */
+ pmd = set_pmd_filter(pmd, addr);
+
+ __set_pmd_at(mm, addr, pmdp, pmd, 0);
+
+}
+
+void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmdp)
+{
+ pmd_hugepage_update(vma->vm_mm, address, pmdp, PMD_HUGE_PRESENT);
+ flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+}
+
+/*
+ * A linux hugepage PMD was changed and the corresponding hash table entry
+ * neesd to be flushed.
+ *
+ * The linux hugepage PMD now include the pmd entries followed by the address
+ * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
+ * [ secondary group | 3 bit hidx | valid ]. We use one byte per each HPTE entry.
+ * With 16MB hugepage and 64K HPTE we need 256 entries and with 4K HPTE we need
+ * 4096 entries. Both will fit in a 4K pgtable_t.
+ */
+void hpte_need_hugepage_flush(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp)
+{
+ int ssize, i;
+ unsigned long s_addr;
+ unsigned int psize, valid;
+ unsigned char *hpte_slot_array;
+ unsigned long hidx, vpn, vsid, hash, shift, slot;
+
+ /*
+ * Flush all the hptes mapping this hugepage
+ */
+ s_addr = addr & HUGE_PAGE_MASK;
+ /*
+ * The hpte hindex are stored in the pgtable whose address is in the
+ * second half of the PMD
+ */
+ hpte_slot_array = *(char **)(pmdp + PTRS_PER_PMD);
+
+ /* get the base page size */
+ psize = get_slice_psize(mm, s_addr);
+ shift = mmu_psize_defs[psize].shift;
+
+ for (i = 0; i < HUGE_PAGE_SIZE/(1ul << shift); i++) {
+ /*
+ * 8 bits per each hpte entries
+ * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
+ */
+ valid = hpte_slot_array[i] & 0x1;
+ if (!valid)
+ continue;
+ hidx = hpte_slot_array[i] >> 1;
+
+ /* get the vpn */
+ addr = s_addr + (i * (1ul << shift));
+ if (!is_kernel_addr(addr)) {
+ ssize = user_segment_size(addr);
+ vsid = get_vsid(mm->context.id, addr, ssize);
+ WARN_ON(vsid == 0);
+ } else {
+ vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
+ ssize = mmu_kernel_ssize;
+ }
+
+ vpn = hpt_vpn(addr, vsid, ssize);
+ hash = hpt_hash(vpn, shift, ssize);
+ if (hidx & _PTEIDX_SECONDARY)
+ hash = ~hash;
+
+ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot += hidx & _PTEIDX_GROUP_IX;
+ ppc_md.hpte_invalidate(slot, vpn, psize, ssize, 0);
+ }
+}
+
+static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
+{
+ unsigned long pmd_prot = 0;
+ unsigned long prot = pgprot_val(pgprot);
+
+ if (prot & _PAGE_PRESENT)
+ pmd_prot |= PMD_HUGE_PRESENT;
+ if (prot & _PAGE_USER)
+ pmd_prot |= PMD_HUGE_USER;
+ if (prot & _PAGE_FILE)
+ pmd_prot |= PMD_HUGE_FILE;
+ if (prot & _PAGE_EXEC)
+ pmd_prot |= PMD_HUGE_EXEC;
+ /*
+ * _PAGE_COHERENT should always be set
+ */
+ VM_BUG_ON(!(prot & _PAGE_COHERENT));
+
+ if (prot & _PAGE_SAO)
+ pmd_prot |= PMD_HUGE_SAO;
+ if (prot & _PAGE_DIRTY)
+ pmd_prot |= PMD_HUGE_DIRTY;
+ if (prot & _PAGE_ACCESSED)
+ pmd_prot |= PMD_HUGE_ACCESSED;
+ if (prot & _PAGE_RW)
+ pmd_prot |= PMD_HUGE_RW;
+
+ pmd_val(pmd) |= pmd_prot;
+ return pmd;
+}
+
+pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
+{
+ pmd_t pmd;
+
+ pmd_val(pmd) = pfn << PMD_HUGE_RPN_SHIFT;
+ pmd_val(pmd) |= PMD_ISHUGE;
+ pmd = pmd_set_protbits(pmd, pgprot);
+ return pmd;
+}
+
+pmd_t mk_pmd(struct page *page, pgprot_t pgprot)
+{
+ return pfn_pmd(page_to_pfn(page), pgprot);
+}
+
+pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+ /* FIXME!! why are this bits cleared ? */
+ pmd_val(pmd) &= ~(PMD_HUGE_PRESENT |
+ PMD_HUGE_RW |
+ PMD_HUGE_EXEC);
+ pmd = pmd_set_protbits(pmd, newprot);
+ return pmd;
+}
+
+/*
+ * This is called at the end of handling a user page fault, when the
+ * fault has been handled by updating a HUGE PMD entry in the linux page tables.
+ * We use it to preload an HPTE into the hash table corresponding to
+ * the updated linux HUGE PMD entry.
+ */
+void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmd)
+{
+ /* FIXME!!
+ * Will be done in a later patch
+ */
+}
+
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index e79840b..6fc3488 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -338,6 +338,19 @@ EXPORT_SYMBOL(iounmap);
EXPORT_SYMBOL(__iounmap);
EXPORT_SYMBOL(__iounmap_at);
+/*
+ * For hugepage we have pfn in the pmd, we use PMD_HUGE_RPN_SHIFT bits for flags
+ * For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address.
+ */
+struct page *pmd_page(pmd_t pmd)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ if (pmd_val(pmd) & PMD_ISHUGE)
+ return pfn_to_page(pmd_pfn(pmd));
+#endif
+ return virt_to_page(pmd_page_vaddr(pmd));
+}
+
#ifdef CONFIG_PPC_64K_PAGES
/*
* we support 16 fragments per PTE page. This is limited by how many
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 72afd28..90ee19b 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -71,6 +71,7 @@ config PPC_BOOK3S_64
select PPC_FPU
select PPC_HAVE_PMU_SUPPORT
select SYS_SUPPORTS_HUGETLBFS
+ select HAVE_ARCH_TRANSPARENT_HUGEPAGE if PPC_64K_PAGES
config PPC_BOOK3E_64
bool "Embedded processors"
--
1.7.10
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox