LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH 5/6] powerpc/fsl-booke: Add B4_QDS board support
From: Kumar Gala @ 2013-03-15 15:58 UTC (permalink / raw)
  To: Shaveta Leekha; +Cc: linuxppc-dev
In-Reply-To: <1363334109-21922-5-git-send-email-shaveta@freescale.com>


On Mar 15, 2013, at 2:55 AM, Shaveta Leekha wrote:

> - Add support for B4 board's personalities in board file
>  b4_qds.c, It is common for B4 personalities B4860 and B4420QDS
> - Add B4QDS support in Kconfig and Makefile

Code also references a B4220, what about it?

>=20
> B4860QDS is a high-performance computing evaluation, development and
> test platform supporting the B4860 QorIQ Power Architecture processor,
> with following major features:
>=20
>    - Four dual-threaded e6500 Power Architecture processors
>      organized in one cluster-each core runs up to 1.8 GHz
>    - Two DDR3/3L controllers for high-speed memory interface each
>      runs at up to 1866.67 MHz
>    - CoreNet fabric that fully supports coherency using MESI protocol
>      between the e6500 cores, SC3900 FVP cores, memories and
>      external interfaces.
>    - Data Path Acceleration Architecture having FMAN, QMan, BMan, SEC =
5.3 and RMAN
>    - Large internal cache memory with snooping and stashing =
capabilities
>    - Sixteen 10-GHz SerDes lanes that serve:
>        - Two SRIO interfaces. Each supports up to 4 lanes and
>          a total of up to 8 lanes
>        - Up to 8-lanes Common Public Radio Interface (CPRI) controller
>          for glue-less antenna connection
>        - Two 10-Gbit Ethernet controllers (10GEC)
>        - Six 1G/2.5-Gbit Ethernet controllers for network =
communications
>        - PCI Express controller
>        - Debug (Aurora)
>    - Various system peripherals
>=20
> B4420 is a reduced personality of B4860 with fewer core/clusters(both =
SC3900 and e6500),
> fewer DDR controllers, fewer serdes lanes, fewer SGMII interfaces and =
reduced target frequencies.
>=20
> Key differences between B4860 and B4420:
> B4420 has:
>    - Fewer e6500 cores:
>        1 cluster with 2 e6500 cores
>    - Fewer SC3900 cores/clusters:
>        1 cluster with 2 SC3900 cores per cluster
>    - Single DDRC
>    - 2X 4 lane serdes
>    - 3 SGMII interfaces
>    - no sRIO
>    - no 10G
>=20
> Signed-off-by: Shaveta Leekha <shaveta@freescale.com>
> ---
> arch/powerpc/platforms/85xx/Kconfig  |   16 +++++
> arch/powerpc/platforms/85xx/Makefile |    1 +
> arch/powerpc/platforms/85xx/b4_qds.c |  102 =
++++++++++++++++++++++++++++++++++
> 3 files changed, 119 insertions(+), 0 deletions(-)
> create mode 100644 arch/powerpc/platforms/85xx/b4_qds.c
>=20
> diff --git a/arch/powerpc/platforms/85xx/Kconfig =
b/arch/powerpc/platforms/85xx/Kconfig
> index 31dc066..7bbd522 100644
> --- a/arch/powerpc/platforms/85xx/Kconfig
> +++ b/arch/powerpc/platforms/85xx/Kconfig
> @@ -262,6 +262,22 @@ config SGY_CTS1000
>=20
> endif # PPC32
>=20
> +config B4_QDS
> +	bool "Freescale B4 QDS"
> +	select DEFAULT_UIMAGE
> +	select E500
> +	select PPC_E500MC
> +	select PHYS_64BIT
> +	select SWIOTLB
> +	select MPC8xxx_GPIO

should be:
        select GENERIC_GPIO
        select ARCH_REQUIRE_GPIOLIB

> +	select HAS_RAPIDIO
> +	select PPC_EPAPR_HV_PIC
> +	help
> +	  This option enables support for the B4 QDS board
> +	  The B4 application development system B4 QDS is a complete
> +	  debugging environment intended for engineers developing
> +	  applications for the B4.
> +

Should be in the if PPC64 section with T4240 QDS support

> config P5020_DS
> 	bool "Freescale P5020 DS"
> 	select DEFAULT_UIMAGE
> diff --git a/arch/powerpc/platforms/85xx/Makefile =
b/arch/powerpc/platforms/85xx/Makefile
> index 712e233..a12ae2d 100644
> --- a/arch/powerpc/platforms/85xx/Makefile
> +++ b/arch/powerpc/platforms/85xx/Makefile
> @@ -6,6 +6,7 @@ obj-$(CONFIG_SMP) +=3D smp.o
> obj-y +=3D common.o
>=20
> obj-$(CONFIG_BSC9131_RDB) +=3D bsc913x_rdb.o
> +obj-$(CONFIG_B4_QDS)   +=3D b4_qds.o corenet_ds.o
> obj-$(CONFIG_MPC8540_ADS) +=3D mpc85xx_ads.o
> obj-$(CONFIG_MPC8560_ADS) +=3D mpc85xx_ads.o
> obj-$(CONFIG_MPC85xx_CDS) +=3D mpc85xx_cds.o
> diff --git a/arch/powerpc/platforms/85xx/b4_qds.c =
b/arch/powerpc/platforms/85xx/b4_qds.c
> new file mode 100644
> index 0000000..0c6702f
> --- /dev/null
> +++ b/arch/powerpc/platforms/85xx/b4_qds.c
> @@ -0,0 +1,102 @@
> +/*
> + * B4 QDS Setup
> + * Should apply for QDS platform of B4860 and it's personalities.
> + * viz B4860/B4420/B4220QDS
> + *
> + * Copyright 2012 Freescale Semiconductor Inc.
> + *
> + * This program is free software; you can redistribute  it and/or =
modify it
> + * under  the terms of  the GNU General  Public License as published =
by the
> + * Free Software Foundation;  either version 2 of the  License, or =
(at your
> + * option) any later version.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/pci.h>
> +#include <linux/kdev_t.h>
> +#include <linux/delay.h>
> +#include <linux/interrupt.h>
> +#include <linux/phy.h>
> +
> +#include <asm/time.h>
> +#include <asm/machdep.h>
> +#include <asm/pci-bridge.h>
> +#include <mm/mmu_decl.h>
> +#include <asm/prom.h>
> +#include <asm/udbg.h>
> +#include <asm/mpic.h>
> +
> +#include <linux/of_platform.h>
> +#include <sysdev/fsl_soc.h>
> +#include <sysdev/fsl_pci.h>
> +#include <asm/ehv_pic.h>
> +
> +#include "corenet_ds.h"
> +
> +/*
> + * Called very early, device-tree isn't unflattened
> + */
> +static int __init b4_qds_probe(void)
> +{
> +	unsigned long root =3D of_get_flat_dt_root();
> +#ifdef CONFIG_SMP
> +	extern struct smp_ops_t smp_85xx_ops;
> +#endif
> +
> +	if ((of_flat_dt_is_compatible(root, "fsl,B4860QDS")) ||
> +		(of_flat_dt_is_compatible(root, "fsl,B4420QDS")) ||
> +			(of_flat_dt_is_compatible(root, =
"fsl,B4220QDS")))
> +		return 1;
> +
> +	/* Check if we're running under the Freescale hypervisor */
> +	if ((of_flat_dt_is_compatible(root, "fsl,B4860QDS-hv")) ||
> +		(of_flat_dt_is_compatible(root, "fsl,B4420QDS-hv")) ||
> +			(of_flat_dt_is_compatible(root, =
"fsl,B4220QDS-hv"))) {
> +		ppc_md.init_IRQ =3D ehv_pic_init;
> +		ppc_md.get_irq =3D ehv_pic_get_irq;
> +		ppc_md.restart =3D fsl_hv_restart;
> +		ppc_md.power_off =3D fsl_hv_halt;
> +		ppc_md.halt =3D fsl_hv_halt;
> +#ifdef CONFIG_SMP
> +		/*
> +		 * Disable the timebase sync operations because we can't =
write
> +		 * to the timebase registers under the hypervisor.
> +		  */
> +		smp_85xx_ops.give_timebase =3D NULL;
> +		smp_85xx_ops.take_timebase =3D NULL;
> +#endif
> +		return 1;
> +	}
> +
> +	return 0;
> +}
> +
> +define_machine(b4_qds) {
> +	.name			=3D "B4 QDS",
> +	.probe			=3D b4_qds_probe,
> +	.setup_arch		=3D corenet_ds_setup_arch,
> +	.init_IRQ		=3D corenet_ds_pic_init,
> +#ifdef CONFIG_PCI
> +	.pcibios_fixup_bus	=3D fsl_pcibios_fixup_bus,
> +#endif
> +/* coreint doesn't play nice with lazy EE, use legacy mpic for now */
> +#ifdef CONFIG_PPC64
> +	.get_irq		=3D mpic_get_irq,
> +#else
> +	.get_irq		=3D mpic_get_coreint_irq,
> +#endif
> +	.restart		=3D fsl_rstcr_restart,
> +	.calibrate_decr		=3D generic_calibrate_decr,
> +	.progress		=3D udbg_progress,
> +#ifdef CONFIG_PPC64
> +	.power_save		=3D book3e_idle,
> +#else
> +	.power_save		=3D e500_idle,
> +#endif
> +};
> +
> +machine_arch_initcall(b4_qds, corenet_ds_publish_devices);
> +
> +#ifdef CONFIG_SWIOTLB
> +machine_arch_initcall(b4_qds, swiotlb_setup_bus_notifier);
> +#endif
> --=20
> 1.7.6.GIT
>=20
>=20
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev

^ permalink raw reply

* Re: [PATCH 1/6] powerpc/fsl-booke: Add initial silicon device tree files for B4860QDS
From: Kumar Gala @ 2013-03-15 15:54 UTC (permalink / raw)
  To: Shaveta Leekha
  Cc: Zhao Chenhui, Minghuan Lian, Tang Yuantian, Andy Fleming,
	Ramneek Mehresh, Varun Sethi, linuxppc-dev
In-Reply-To: <1363334109-21922-1-git-send-email-shaveta@freescale.com>


On Mar 15, 2013, at 2:55 AM, Shaveta Leekha wrote:

> Signed-off-by: Shaveta Leekha <shaveta@freescale.com>
> Signed-off-by: Zhao Chenhui <chenhui.zhao@freescale.com>
> Signed-off-by: Li Yang <leoli@freescale.com>
> Signed-off-by: Tang Yuantian <Yuantian.Tang@freescale.com>
> Signed-off-by: Varun Sethi <Varun.Sethi@freescale.com>
> Signed-off-by: Minghuan Lian <Minghuan.Lian@freescale.com>
> Signed-off-by: Ramneek Mehresh <ramneek.mehresh@freescale.com>
> Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
> Signed-off-by: Andy Fleming <afleming@freescale.com>
> ---
> arch/powerpc/boot/dts/fsl/b4860si-post.dtsi |  184 =
+++++++++++++++++++++++++++
> arch/powerpc/boot/dts/fsl/b4860si-pre.dtsi  |   80 ++++++++++++
> 2 files changed, 264 insertions(+), 0 deletions(-)
> create mode 100644 arch/powerpc/boot/dts/fsl/b4860si-post.dtsi
> create mode 100644 arch/powerpc/boot/dts/fsl/b4860si-pre.dtsi

Commit description should convey what hw isn't yet covered as well.

DPAA, DSPs, etc.

- k

>=20
> diff --git a/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi =
b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi
> new file mode 100644
> index 0000000..2db68b2
> --- /dev/null
> +++ b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi
> @@ -0,0 +1,184 @@
> +/*
> + * B4860 Silicon/SoC Device Tree Source (post include)
> + *
> + * Copyright 2012 Freescale Semiconductor Inc.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions =
are met:
> + *     * Redistributions of source code must retain the above =
copyright
> + *       notice, this list of conditions and the following =
disclaimer.
> + *     * Redistributions in binary form must reproduce the above =
copyright
> + *       notice, this list of conditions and the following disclaimer =
in the
> + *       documentation and/or other materials provided with the =
distribution.
> + *     * Neither the name of Freescale Semiconductor nor the
> + *       names of its contributors may be used to endorse or promote =
products
> + *       derived from this software without specific prior written =
permission.
> + *
> + *
> + * ALTERNATIVELY, this software may be distributed under the terms of =
the
> + * GNU General Public License ("GPL") as published by the Free =
Software
> + * Foundation, either version 2 of that License or (at your option) =
any
> + * later version.
> + *
> + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND =
ANY
> + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE =
IMPLIED
> + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE =
ARE
> + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE =
FOR ANY
> + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL =
DAMAGES
> + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR =
SERVICES;
> + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER =
CAUSED AND
> + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, =
OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE =
USE OF THIS
> + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +&ifc {
> +	#address-cells =3D <2>;
> +	#size-cells =3D <1>;
> +	compatible =3D "fsl,ifc", "simple-bus";
> +	interrupts =3D <25 2 0 0>;
> +};
> +
> +/* controller at 0x200000 */
> +&pci0 {
> +	compatible =3D "fsl,b4860-pcie", "fsl,qoriq-pcie-v2.4";
> +	device_type =3D "pci";
> +	#size-cells =3D <2>;
> +	#address-cells =3D <3>;
> +	bus-range =3D <0x0 0xff>;
> +	interrupts =3D <20 2 0 0>;
> +	pcie@0 {
> +		#interrupt-cells =3D <1>;
> +		#size-cells =3D <2>;
> +		#address-cells =3D <3>;
> +		device_type =3D "pci";
> +		interrupts =3D <20 2 0 0>;
> +		interrupt-map-mask =3D <0xf800 0 0 7>;
> +		interrupt-map =3D <
> +			/* IDSEL 0x0 */
> +			0000 0 0 1 &mpic 40 1 0 0
> +			0000 0 0 2 &mpic 1 1 0 0
> +			0000 0 0 3 &mpic 2 1 0 0
> +			0000 0 0 4 &mpic 3 1 0 0
> +			>;
> +	};
> +};
> +
> +&rio {
> +	compatible =3D "fsl,srio";
> +	interrupts =3D <16 2 1 11>;
> +	#address-cells =3D <2>;
> +	#size-cells =3D <2>;
> +	ranges;
> +
> +	port1 {
> +		#address-cells =3D <2>;
> +		#size-cells =3D <2>;
> +		cell-index =3D <1>;
> +	};
> +
> +	port2 {
> +		#address-cells =3D <2>;
> +		#size-cells =3D <2>;
> +		cell-index =3D <2>;
> +	};
> +};
> +
> +&soc {
> +	#address-cells =3D <1>;
> +	#size-cells =3D <1>;
> +	device_type =3D "soc";
> +	compatible =3D "simple-bus";
> +
> +	soc-sram-error {
> +		compatible =3D "fsl,soc-sram-error";
> +		interrupts =3D <16 2 1 2>;
> +	};
> +
> +	corenet-law@0 {
> +		compatible =3D "fsl,corenet-law";
> +		reg =3D <0x0 0x1000>;
> +		fsl,num-laws =3D <32>;
> +	};
> +
> +	ddr1: memory-controller@8000 {
> +		compatible =3D "fsl,qoriq-memory-controller-v4.5", =
"fsl,qoriq-memory-controller";
> +		reg =3D <0x8000 0x1000>;
> +		interrupts =3D <16 2 1 8>;
> +	};
> +
> +	ddr2: memory-controller@9000 {
> +		compatible =3D =
"fsl,qoriq-memory-controller-v4.5","fsl,qoriq-memory-controller";
> +		reg =3D <0x9000 0x1000>;
> +		interrupts =3D <16 2 1 9>;
> +	};
> +
> +	cpc: l3-cache-controller@10000 {
> +		compatible =3D "fsl,p5020-l3-cache-controller", =
"fsl,p4080-l3-cache-controller", "cache";
> +		reg =3D <0x10000 0x1000
> +		       0x11000 0x1000>;
> +		interrupts =3D <16 2 1 4
> +			      16 2 1 5>;
> +	};
> +
> +	corenet-cf@18000 {
> +		compatible =3D "fsl,corenet-cf";
> +		reg =3D <0x18000 0x1000>;
> +		interrupts =3D <16 2 1 0>;
> +		fsl,ccf-num-csdids =3D <32>;
> +		fsl,ccf-num-snoopids =3D <32>;
> +	};
> +
> +	iommu@20000 {
> +		compatible =3D "fsl,pamu-v1.0", "fsl,pamu";
> +		reg =3D <0x20000 0x4000>;
> +		interrupts =3D <
> +			24 2 0 0
> +			16 2 1 1>;
> +	};
> +
> +/include/ "qoriq-mpic.dtsi"
> +
> +	guts: global-utilities@e0000 {
> +		compatible =3D "fsl,b4860-device-config";
> +		reg =3D <0xe0000 0xe00>;
> +		fsl,has-rstcr;
> +		fsl,liodn-bits =3D <12>;
> +	};
> +
> +	clockgen: global-utilities@e1000 {
> +		compatible =3D "fsl,b4860-clockgen", =
"fsl,qoriq-clockgen-2";
> +		reg =3D <0xe1000 0x1000>;
> +	};
> +
> +	rcpm: global-utilities@e2000 {
> +		compatible =3D "fsl,b4860-rcpm", "fsl,qoriq-rcpm-2";
> +		reg =3D <0xe2000 0x1000>;
> +	};
> +
> +/include/ "qoriq-dma-0.dtsi"
> +/include/ "qoriq-dma-1.dtsi"
> +
> +/include/ "qonverge-usb2-dr-0.dtsi"
> +	usb0: usb@210000 {
> +		compatible =3D "fsl-usb2-dr-v2.4", "fsl-usb2-dr";
> +	};
> +
> +/include/ "qoriq-espi-0.dtsi"
> +	spi@110000 {
> +		fsl,espi-num-chipselects =3D <4>;
> +	};
> +
> +/include/ "qoriq-esdhc-0.dtsi"
> +	sdhc@114000 {
> +		sdhci,auto-cmd12;
> +	};
> +/include/ "qoriq-i2c-0.dtsi"
> +/include/ "qoriq-i2c-1.dtsi"
> +/include/ "qoriq-duart-0.dtsi"
> +/include/ "qoriq-duart-1.dtsi"
> +
> +	L2: l2-cache-controller@c20000 {
> +		next-level-cache =3D <&cpc>;
> +	};
> +};
> diff --git a/arch/powerpc/boot/dts/fsl/b4860si-pre.dtsi =
b/arch/powerpc/boot/dts/fsl/b4860si-pre.dtsi
> new file mode 100644
> index 0000000..33bc600
> --- /dev/null
> +++ b/arch/powerpc/boot/dts/fsl/b4860si-pre.dtsi
> @@ -0,0 +1,80 @@
> +/*
> + * B4860 Silicon/SoC Device Tree Source (pre include)
> + *
> + * Copyright 2012 Freescale Semiconductor Inc.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions =
are met:
> + *     * Redistributions of source code must retain the above =
copyright
> + *       notice, this list of conditions and the following =
disclaimer.
> + *     * Redistributions in binary form must reproduce the above =
copyright
> + *       notice, this list of conditions and the following disclaimer =
in the
> + *       documentation and/or other materials provided with the =
distribution.
> + *     * Neither the name of Freescale Semiconductor nor the
> + *       names of its contributors may be used to endorse or promote =
products
> + *       derived from this software without specific prior written =
permission.
> + *
> + *
> + * ALTERNATIVELY, this software may be distributed under the terms of =
the
> + * GNU General Public License ("GPL") as published by the Free =
Software
> + * Foundation, either version 2 of that License or (at your option) =
any
> + * later version.
> + *
> + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND =
ANY
> + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE =
IMPLIED
> + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE =
ARE
> + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE =
FOR ANY
> + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL =
DAMAGES
> + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR =
SERVICES;
> + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER =
CAUSED AND
> + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, =
OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE =
USE OF THIS
> + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +/dts-v1/;
> +/ {
> +	compatible =3D "fsl,B4860";
> +	#address-cells =3D <2>;
> +	#size-cells =3D <2>;
> +	interrupt-parent =3D <&mpic>;
> +
> +	aliases {
> +		ccsr =3D &soc;
> +
> +		serial0 =3D &serial0;
> +		serial1 =3D &serial1;
> +		serial2 =3D &serial2;
> +		serial3 =3D &serial3;
> +		pci0 =3D &pci0;
> +		dma0 =3D &dma0;
> +		dma1 =3D &dma1;
> +		sdhc =3D &sdhc;
> +	};
> +
> +	cpus {
> +		#address-cells =3D <1>;
> +		#size-cells =3D <0>;
> +
> +		PowerPC,e6500@0 {
> +			device_type =3D "cpu";
> +			reg =3D <0 1>;
> +			next-level-cache =3D <&L2>;
> +		};
> +		PowerPC,e6500@1 {
> +			device_type =3D "cpu";
> +			reg =3D <2 3>;
> +			next-level-cache =3D <&L2>;
> +		};
> +		PowerPC,e6500@2 {
> +			device_type =3D "cpu";
> +			reg =3D <4 5>;
> +			next-level-cache =3D <&L2>;
> +		};
> +		PowerPC,e6500@3 {
> +			device_type =3D "cpu";
> +			reg =3D <6 7>;
> +			next-level-cache =3D <&L2>;
> +		};
> +	};
> +};
> --=20
> 1.7.6.GIT
>=20

^ permalink raw reply

* Re: [PATCH V2] powerpc/85xx: workaround for chips with MSI hardware errata
From: Kumar Gala @ 2013-03-15 15:52 UTC (permalink / raw)
  To: Jia Hongtao-B38951
  Cc: Wood Scott-B07421, linuxppc-dev@lists.ozlabs.org, Li Yang-R58472
In-Reply-To: <412C8208B4A0464FA894C5F0C278CD5D01C18FB3@039-SN1MPN1-003.039d.mgd.msft.net>


On Mar 14, 2013, at 9:00 PM, Jia Hongtao-B38951 wrote:

>=20
>=20
>> -----Original Message-----
>> From: Kumar Gala [mailto:galak@kernel.crashing.org]
>> Sent: Friday, March 15, 2013 4:05 AM
>> To: Jia Hongtao-B38951
>> Cc: linuxppc-dev@lists.ozlabs.org; Wood Scott-B07421;
>> michael@ellerman.id.au; Li Yang-R58472; Jia Hongtao-B38951
>> Subject: Re: [PATCH V2] powerpc/85xx: workaround for chips with MSI
>> hardware errata
>>=20
>>=20
>> On Mar 14, 2013, at 5:35 AM, Jia Hongtao wrote:
>>=20
>>> The MPIC version 2.0 has a MSI errata (errata PIC1 of mpc8544), It
>>> causes that neither MSI nor MSI-X can work fine. This is a =
workaround
>>> to allow MSI-X to function properly.
>>>=20
>>> Signed-off-by: Liu Shuo <soniccat.liu@gmail.com>
>>> Signed-off-by: Li Yang <leoli@freescale.com>
>>> Signed-off-by: Jia Hongtao <hongtao.jia@freescale.com>
>>> ---
>>> Changes for V2:
>>> - Address almost all the comments from Michael Ellerman for V1.
>>> Here is the link:
>>> http://patchwork.ozlabs.org/patch/226833/
>>>=20
>>> arch/powerpc/sysdev/fsl_msi.c | 65
>>> +++++++++++++++++++++++++++++++++++++++++--
>>> arch/powerpc/sysdev/fsl_msi.h |  2 ++
>>> 2 files changed, 64 insertions(+), 3 deletions(-)
>>>=20
>>> diff --git a/arch/powerpc/sysdev/fsl_msi.c
>>> b/arch/powerpc/sysdev/fsl_msi.c index 178c994..54cb83e 100644
>>> --- a/arch/powerpc/sysdev/fsl_msi.c
>>> +++ b/arch/powerpc/sysdev/fsl_msi.c
>>> @@ -98,8 +98,18 @@ static int fsl_msi_init_allocator(struct fsl_msi
>>> *msi_data)
>>>=20
>>> static int fsl_msi_check_device(struct pci_dev *pdev, int nvec, int
>>> type) {
>>> -	if (type =3D=3D PCI_CAP_ID_MSIX)
>>> -		pr_debug("fslmsi: MSI-X untested, trying anyway.\n");
>>> +	struct fsl_msi *msi;
>>> +
>>> +	if (type =3D=3D PCI_CAP_ID_MSI) {
>>> +		/*
>>> +		 * MPIC version 2.0 has erratum PIC1. For now MSI
>>> +		 * could not work. So check to prevent MSI from
>>> +		 * being used on the board with this erratum.
>>> +		 */
>>> +		list_for_each_entry(msi, &msi_head, list)
>>> +			if (msi->feature & MSI_HW_ERRATA_ENDIAN)
>>> +				return -EINVAL;
>>> +	}
>>>=20
>>> 	return 0;
>>> }
>>> @@ -142,7 +152,17 @@ static void fsl_compose_msi_msg(struct pci_dev
>> *pdev, int hwirq,
>>> 	msg->address_lo =3D lower_32_bits(address);
>>> 	msg->address_hi =3D upper_32_bits(address);
>>>=20
>>> -	msg->data =3D hwirq;
>>> +	/*
>>> +	 * MPIC version 2.0 has erratum PIC1. It causes
>>> +	 * that neither MSI nor MSI-X can work fine.
>>> +	 * This is a workaround to allow MSI-X to function
>>> +	 * properly. It only works for MSI-X, we prevent
>>> +	 * MSI on buggy chips in fsl_msi_check_device().
>>> +	 */
>>> +	if (msi_data->feature & MSI_HW_ERRATA_ENDIAN)
>>> +		msg->data =3D __swab32(hwirq);
>>> +	else
>>> +		msg->data =3D hwirq;
>>>=20
>>> 	pr_debug("%s: allocated srs: %d, ibs: %d\n",
>>> 		__func__, hwirq / IRQS_PER_MSI_REG, hwirq % =
IRQS_PER_MSI_REG);
>> @@
>>> -361,6 +381,35 @@ static int fsl_msi_setup_hwirq(struct fsl_msi =
*msi,
>> struct platform_device *dev,
>>> 	return 0;
>>> }
>>>=20
>>> +/* MPIC version 2.0 has erratum PIC1 */ static int
>>> +mpic_has_errata(struct platform_device *dev) {
>>> +	struct device_node *mpic_node;
>>> +
>>> +	mpic_node =3D of_irq_find_parent(dev->dev.of_node);
>>> +	if (mpic_node) {
>>> +		u32 *reg_base, brr1 =3D 0;
>>> +		/* Get the PIC reg base */
>>> +		reg_base =3D of_iomap(mpic_node, 0);
>>> +		of_node_put(mpic_node);
>>> +		if (!reg_base) {
>>> +			dev_err(&dev->dev, "ioremap problem failed.\n");
>>> +			return -EIO;
>>> +		}
>>> +
>>> +		/* Get the mpic version from block revision register 1 =
*/
>>> +		brr1 =3D in_be32(reg_base + MPIC_FSL_BRR1);
>>> +		iounmap(reg_base);
>>> +		if ((brr1 & MPIC_FSL_BRR1_VER) =3D=3D 0x0200)
>>> +			return 1;
>>> +	} else {
>>> +		dev_err(&dev->dev, "MSI can't find his parent mpic =
node.\n");
>>> +		return -ENODEV;
>>> +	}
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> static const struct of_device_id fsl_of_msi_ids[]; static int
>>> fsl_of_msi_probe(struct platform_device *dev) { @@ -423,6 +472,16 @@
>>> static int fsl_of_msi_probe(struct platform_device *dev)
>>>=20
>>> 	msi->feature =3D features->fsl_pic_ip;
>>>=20
>>> +	if ((features->fsl_pic_ip & FSL_PIC_IP_MASK) =3D=3D =
FSL_PIC_IP_MPIC) {
>>> +		rc =3D mpic_has_errata(dev);
>>> +		if (rc > 0) {
>>> +			msi->feature |=3D MSI_HW_ERRATA_ENDIAN;
>>> +		} else if (rc < 0) {
>>> +			err =3D rc;
>>> +			goto error_out;
>>> +		}
>>> +	}
>>> +
>>> 	/*
>>> 	 * Remember the phandle, so that we can match with any PCI nodes
>>> 	 * that have an "fsl,msi" property.
>>> diff --git a/arch/powerpc/sysdev/fsl_msi.h
>>> b/arch/powerpc/sysdev/fsl_msi.h index 8225f86..7389e8e 100644
>>> --- a/arch/powerpc/sysdev/fsl_msi.h
>>> +++ b/arch/powerpc/sysdev/fsl_msi.h
>>> @@ -25,6 +25,8 @@
>>> #define FSL_PIC_IP_IPIC   0x00000002
>>> #define FSL_PIC_IP_VMPIC  0x00000003
>>>=20
>>> +#define MSI_HW_ERRATA_ENDIAN 0x00000010
>>> +
>>=20
>> Is there any reason to put this in fsl_msi.h rather than just in
>> fsl_msi.c itself?
>>=20
>> - k
>=20
> Actually no. This micro is only used by fsl_msi.c.
> Will move it to fsl_msi.c.
>=20
> Thanks.
> -Hongtao.

Also, wondering if we can do the mpic version detection in mpic.c and =
not here.  I'm not sure what means we'd have to get back to the mpic =
struct so we could possible use mpic->flags.

I'll look to see if I can suggest anything along those lines, thus =
reducing the amount of ioremap() and code to find the mpic registers, =
etc.

- k=

^ permalink raw reply

* Re: [PATCH 1/6] powerpc/fsl-booke: Add initial silicon device tree files for B4860QDS
From: Timur Tabi @ 2013-03-15 13:07 UTC (permalink / raw)
  To: Shaveta Leekha
  Cc: Zhao Chenhui, Tang Yuantian, Minghuan Lian, Andy Fleming,
	Ramneek Mehresh, Varun Sethi, linuxppc-dev
In-Reply-To: <1363334109-21922-1-git-send-email-shaveta@freescale.com>

On Fri, Mar 15, 2013 at 2:55 AM, Shaveta Leekha <shaveta@freescale.com> wrote:


> +       iommu@20000 {
> +               compatible = "fsl,pamu-v1.0", "fsl,pamu";
> +               reg = <0x20000 0x4000>;
> +               interrupts = <
> +                       24 2 0 0
> +                       16 2 1 1>;
> +       };

You need to add the PAMU topology.

-- 
Timur Tabi
Linux kernel developer at Freescale

^ permalink raw reply

* Re: [PATCH 0/4] mv643xx_eth: use mvmdio MDIO bus driver
From: David Miller @ 2013-03-15 13:05 UTC (permalink / raw)
  To: florian
  Cc: thomas.petazzoni, andrew, linux, jason, linux-doc,
	devicetree-discuss, linux-kernel, rob.herring, netdev, paulus,
	linux-arm-kernel, rob, gregkh, linuxppc-dev, buytenh
In-Reply-To: <514319B6.7030307@openwrt.org>

RnJvbTogRmxvcmlhbiBGYWluZWxsaSA8ZmxvcmlhbkBvcGVud3J0Lm9yZz4NCkRhdGU6IEZyaSwg
MTUgTWFyIDIwMTMgMTM6NTM6MTAgKzAxMDANCg0KPiBMZSAwMy8xNS8xMyAxMzo1NSwgRGF2aWQg
TWlsbGVyIGEgw6ljcml0IDoNCj4+IEZyb206IERhdmlkIE1pbGxlciA8ZGF2ZW1AZGF2ZW1sb2Z0
Lm5ldD4NCj4+IERhdGU6IEZyaSwgMTUgTWFyIDIwMTMgMDg6NTM6MjEgLTA0MDAgKEVEVCkNCj4+
DQo+Pj4gRnJvbTogRmxvcmlhbiBGYWluZWxsaSA8ZmxvcmlhbkBvcGVud3J0Lm9yZz4NCj4+PiBE
YXRlOiBUaHUsIDE0IE1hciAyMDEzIDE5OjA4OjMxICswMTAwDQo+Pj4NCj4+Pj4gVGhpcyBwYXRj
aCBjb252ZXJ0cyB0aGUgbXY2NDN4eF9ldGggZHJpdmVyIHRvIHVzZSB0aGUgbXZtZGlvIE1ESU8g
YnVzDQo+Pj4+IGRyaXZlcg0KPj4+PiBpbnN0ZWFkIG9mIHJvbGxpbmcgaXRzIG93biBpbXBsZW1l
bnRhdGlvbi4gQXMgYSByZXN1bHQsIGFsbCB1c2VycyBvZg0KPj4+PiB0aGlzDQo+Pj4+IG12NjQz
eHhfZXRoIGRyaXZlciBhcmUgY29udmVydGVkIHRvIHJlZ2lzdGVyIGFuICJvcmlvbi1tZGlvIg0K
Pj4+PiBwbGF0Zm9ybV9kZXZpY2UuDQo+Pj4+IFRoZSBtdm1kaW8gZHJpdmVyIGlzIGFsc28gdXBk
YXRlZCB0byBzdXBwb3J0IGFuIGludGVycnVwdCBsaW5lIHdoaWNoDQo+Pj4+IHJlcG9ydHMNCj4+
Pj4gU01JIGVycm9yL2NvbXBsZXRpb24sIGFuZCB0byBhbGxvdyB0cmFkaXRpb25uYWwgcGxhdGZv
cm0gZGV2aWNlDQo+Pj4+IHJlZ2lzdHJhdGlvbg0KPj4+PiBpbnN0ZWFkIG9mIGp1c3QgZGV2aWNl
IHRyZWUuDQo+Pj4+DQo+Pj4+IERhdmlkLCBJIHRoaW5rIGl0IG1ha2VzIHNlbnNlIGZvciB5b3Ug
dG8gbWVyZ2UgYWxsIG9mIHRoaXMsIHNpbmNlIHdlDQo+Pj4+IGRvDQo+Pj4+IG5vdCB3YW50IHRo
ZSBhcmNoaXRlY3R1cmUgZmlsZXMgdG8gYmUgZGVzeW5jaHJvbml6ZWQgZnJvbSB0aGUNCj4+Pj4g
bXY2NDN4eF9ldGggdG8NCj4+Pj4gYXZvaWQgcnVudGltZSBicmVha2FnZS4gVGhlIHBvdGVudGlh
bCBmb3IgbWVyZ2UgY29uZmxpY3RzIHNob3VsZCBiZQ0KPj4+PiB2ZXJ5IHNtYWxsLg0KPj4+DQo+
Pj4gQWxsIGFwcGxpZWQgdG8gbmV0LW5leHQsIHRoYW5rcy4NCj4+DQo+PiBBY3R1YWxseSwgcmV2
ZXJ0ZWQuICBQbGVhc2Ugc2VuZCBtZSBjb2RlIHdoaWNoIGFjdHVhbGx5IGNvbXBpbGVzOg0KPj4N
Cj4+IGRyaXZlcnMvbmV0L2V0aGVybmV0L21hcnZlbGwvbXZtZGlvLmM6IEluIGZ1bmN0aW9uDQo+
PiDigJhvcmlvbl9tZGlvX3dhaXRfcmVhZHnigJk6DQo+PiBkcml2ZXJzL25ldC9ldGhlcm5ldC9t
YXJ2ZWxsL212bWRpby5jOjcwOjI4OiBlcnJvcjog4oCYTk9fSVJR4oCZDQo+PiB1bmRlY2xhcmVk
IChmaXJzdCB1c2UgaW4gdGhpcyBmdW5jdGlvbikNCj4+IGRyaXZlcnMvbmV0L2V0aGVybmV0L21h
cnZlbGwvbXZtZGlvLmM6NzA6Mjg6IG5vdGU6IGVhY2ggdW5kZWNsYXJlZA0KPj4gaWRlbnRpZmll
ciBpcyByZXBvcnRlZCBvbmx5IG9uY2UgZm9yIGVhY2ggZnVuY3Rpb24gaXQgYXBwZWFycyBpbg0K
Pj4gZHJpdmVycy9uZXQvZXRoZXJuZXQvbWFydmVsbC9tdm1kaW8uYzogSW4gZnVuY3Rpb24g4oCY
b3Jpb25fbWRpb19wcm9iZeKAmToNCj4+IGRyaXZlcnMvbmV0L2V0aGVybmV0L21hcnZlbGwvbXZt
ZGlvLmM6MjQyOjI0OiBlcnJvcjog4oCYTk9fSVJR4oCZDQo+PiB1bmRlY2xhcmVkIChmaXJzdCB1
c2UgaW4gdGhpcyBmdW5jdGlvbikNCj4+IG1ha2VbNF06ICoqKiBbZHJpdmVycy9uZXQvZXRoZXJu
ZXQvbWFydmVsbC9tdm1kaW8ub10gRXJyb3IgMQ0KPj4NCj4+IEFuZCBkb24ndCB1c2UgS2NvbmZp
ZyBkZXBlbmRlbmNpZXMgdG8gd29yayBhcm91bmQgdGhpcywgZml4IGl0DQo+PiBwcm9wZXJseS4N
Cj4gDQo+IElzIHRoZXJlIGFueSBwbGF0Zm9ybSBvdXQgdGhlcmUgZm9yIHdoaWNoIHdlIGRvIG5v
dCBoYXZlIGEgTk9fSVJRDQo+IGRlZmluaXRpb24gYnkgbm93PyBJZiBzbywgd2hhdCBpcyBpdD8N
Cg0KT2J2aW91c2x5IGlmIHg4Nl82NCBkb2Vzbid0IGV2ZW4gYnVpbGQgeW91ciBjaGFuZ2VzLCB0
aGF0IGlzIG9uZSBzdWNoDQpwbGF0Zm9ybS4gIEFsc28sIGlzIGdyZXAgbm90IHdvcmtpbmcgb24g
eW91ciBjb21wdXRlcj8NCg0KUGxhdGZvcm1zIGFyZSBhYnNvbHV0ZWx5IG5vIHJlcXVpcmVkIHRv
IGhhdmUgdGhpcyBkZWZpbmUsIHplcm8gaXMgdGhlDQpvbmx5IHZhbGlkICJubyBJUlEiIHdoaWNo
IGlzIHBvcnRhYmxlIGluIGFueSB3YXkuDQoNClRoaXMgaXMgYW4gb2xkIGFuZCB0aXJlZCB0b3Bp
YywgcG9ydGFibGUgY29kZSBkb2VzIG5vdCB1c2UgTk9fSVJRLCBhbmQNCnRoYXQncyBzaW1wbHkg
dGhlIGVuZCBvZiBpdC4NCg==

^ permalink raw reply

* Re: [PATCH 0/4] mv643xx_eth: use mvmdio MDIO bus driver
From: Florian Fainelli @ 2013-03-15 13:03 UTC (permalink / raw)
  To: David Miller
  Cc: thomas.petazzoni, andrew, linux, jason, linux-doc,
	devicetree-discuss, linux-kernel, rob.herring, netdev, paulus,
	linux-arm-kernel, rob, gregkh, linuxppc-dev, buytenh
In-Reply-To: <20130315.090517.2027403317215151155.davem@davemloft.net>

Le 03/15/13 14:05, David Miller a écrit :
> From: Florian Fainelli <florian@openwrt.org>
> Date: Fri, 15 Mar 2013 13:53:10 +0100
>
>> Le 03/15/13 13:55, David Miller a écrit :
>>> From: David Miller <davem@davemloft.net>
>>> Date: Fri, 15 Mar 2013 08:53:21 -0400 (EDT)
>>>
>>>> From: Florian Fainelli <florian@openwrt.org>
>>>> Date: Thu, 14 Mar 2013 19:08:31 +0100
>>>>
>>>>> This patch converts the mv643xx_eth driver to use the mvmdio MDIO bus
>>>>> driver
>>>>> instead of rolling its own implementation. As a result, all users of
>>>>> this
>>>>> mv643xx_eth driver are converted to register an "orion-mdio"
>>>>> platform_device.
>>>>> The mvmdio driver is also updated to support an interrupt line which
>>>>> reports
>>>>> SMI error/completion, and to allow traditionnal platform device
>>>>> registration
>>>>> instead of just device tree.
>>>>>
>>>>> David, I think it makes sense for you to merge all of this, since we
>>>>> do
>>>>> not want the architecture files to be desynchronized from the
>>>>> mv643xx_eth to
>>>>> avoid runtime breakage. The potential for merge conflicts should be
>>>>> very small.
>>>>
>>>> All applied to net-next, thanks.
>>>
>>> Actually, reverted.  Please send me code which actually compiles:
>>>
>>> drivers/net/ethernet/marvell/mvmdio.c: In function
>>> ‘orion_mdio_wait_ready’:
>>> drivers/net/ethernet/marvell/mvmdio.c:70:28: error: ‘NO_IRQ’
>>> undeclared (first use in this function)
>>> drivers/net/ethernet/marvell/mvmdio.c:70:28: note: each undeclared
>>> identifier is reported only once for each function it appears in
>>> drivers/net/ethernet/marvell/mvmdio.c: In function ‘orion_mdio_probe’:
>>> drivers/net/ethernet/marvell/mvmdio.c:242:24: error: ‘NO_IRQ’
>>> undeclared (first use in this function)
>>> make[4]: *** [drivers/net/ethernet/marvell/mvmdio.o] Error 1
>>>
>>> And don't use Kconfig dependencies to work around this, fix it
>>> properly.
>>
>> Is there any platform out there for which we do not have a NO_IRQ
>> definition by now? If so, what is it?
>
> Obviously if x86_64 doesn't even build your changes, that is one such
> platform.  Also, is grep not working on your computer?

I built tested on PowerPC and ARM which are the platforms actually 
*using* these drivers and forgot that you build for x86_64.

>
> Platforms are absolutely no required to have this define, zero is the
> only valid "no IRQ" which is portable in any way.
>
> This is an old and tired topic, portable code does not use NO_IRQ, and
> that's simply the end of it.

I changed not to rely on NO_IRQ anymore. Thanks!
--
Florian

^ permalink raw reply

* Re: [PATCH 0/4] mv643xx_eth: use mvmdio MDIO bus driver
From: David Miller @ 2013-03-15 12:55 UTC (permalink / raw)
  To: florian
  Cc: thomas.petazzoni, andrew, linux, jason, linux-doc,
	devicetree-discuss, linux-kernel, rob.herring, netdev, paulus,
	linux-arm-kernel, rob, gregkh, linuxppc-dev, buytenh
In-Reply-To: <20130315.085321.1047694772636447477.davem@davemloft.net>

RnJvbTogRGF2aWQgTWlsbGVyIDxkYXZlbUBkYXZlbWxvZnQubmV0Pg0KRGF0ZTogRnJpLCAxNSBN
YXIgMjAxMyAwODo1MzoyMSAtMDQwMCAoRURUKQ0KDQo+IEZyb206IEZsb3JpYW4gRmFpbmVsbGkg
PGZsb3JpYW5Ab3BlbndydC5vcmc+DQo+IERhdGU6IFRodSwgMTQgTWFyIDIwMTMgMTk6MDg6MzEg
KzAxMDANCj4gDQo+PiBUaGlzIHBhdGNoIGNvbnZlcnRzIHRoZSBtdjY0M3h4X2V0aCBkcml2ZXIg
dG8gdXNlIHRoZSBtdm1kaW8gTURJTyBidXMgZHJpdmVyDQo+PiBpbnN0ZWFkIG9mIHJvbGxpbmcg
aXRzIG93biBpbXBsZW1lbnRhdGlvbi4gQXMgYSByZXN1bHQsIGFsbCB1c2VycyBvZiB0aGlzDQo+
PiBtdjY0M3h4X2V0aCBkcml2ZXIgYXJlIGNvbnZlcnRlZCB0byByZWdpc3RlciBhbiAib3Jpb24t
bWRpbyIgcGxhdGZvcm1fZGV2aWNlLg0KPj4gVGhlIG12bWRpbyBkcml2ZXIgaXMgYWxzbyB1cGRh
dGVkIHRvIHN1cHBvcnQgYW4gaW50ZXJydXB0IGxpbmUgd2hpY2ggcmVwb3J0cw0KPj4gU01JIGVy
cm9yL2NvbXBsZXRpb24sIGFuZCB0byBhbGxvdyB0cmFkaXRpb25uYWwgcGxhdGZvcm0gZGV2aWNl
IHJlZ2lzdHJhdGlvbg0KPj4gaW5zdGVhZCBvZiBqdXN0IGRldmljZSB0cmVlLg0KPj4gDQo+PiBE
YXZpZCwgSSB0aGluayBpdCBtYWtlcyBzZW5zZSBmb3IgeW91IHRvIG1lcmdlIGFsbCBvZiB0aGlz
LCBzaW5jZSB3ZSBkbw0KPj4gbm90IHdhbnQgdGhlIGFyY2hpdGVjdHVyZSBmaWxlcyB0byBiZSBk
ZXN5bmNocm9uaXplZCBmcm9tIHRoZSBtdjY0M3h4X2V0aCB0bw0KPj4gYXZvaWQgcnVudGltZSBi
cmVha2FnZS4gVGhlIHBvdGVudGlhbCBmb3IgbWVyZ2UgY29uZmxpY3RzIHNob3VsZCBiZSB2ZXJ5
IHNtYWxsLg0KPiANCj4gQWxsIGFwcGxpZWQgdG8gbmV0LW5leHQsIHRoYW5rcy4NCg0KQWN0dWFs
bHksIHJldmVydGVkLiAgUGxlYXNlIHNlbmQgbWUgY29kZSB3aGljaCBhY3R1YWxseSBjb21waWxl
czoNCg0KZHJpdmVycy9uZXQvZXRoZXJuZXQvbWFydmVsbC9tdm1kaW8uYzogSW4gZnVuY3Rpb24g
oW9yaW9uX21kaW9fd2FpdF9yZWFkeaI6DQpkcml2ZXJzL25ldC9ldGhlcm5ldC9tYXJ2ZWxsL212
bWRpby5jOjcwOjI4OiBlcnJvcjogoU5PX0lSUaIgdW5kZWNsYXJlZCAoZmlyc3QgdXNlIGluIHRo
aXMgZnVuY3Rpb24pDQpkcml2ZXJzL25ldC9ldGhlcm5ldC9tYXJ2ZWxsL212bWRpby5jOjcwOjI4
OiBub3RlOiBlYWNoIHVuZGVjbGFyZWQgaWRlbnRpZmllciBpcyByZXBvcnRlZCBvbmx5IG9uY2Ug
Zm9yIGVhY2ggZnVuY3Rpb24gaXQgYXBwZWFycyBpbg0KZHJpdmVycy9uZXQvZXRoZXJuZXQvbWFy
dmVsbC9tdm1kaW8uYzogSW4gZnVuY3Rpb24goW9yaW9uX21kaW9fcHJvYmWiOg0KZHJpdmVycy9u
ZXQvZXRoZXJuZXQvbWFydmVsbC9tdm1kaW8uYzoyNDI6MjQ6IGVycm9yOiChTk9fSVJRoiB1bmRl
Y2xhcmVkIChmaXJzdCB1c2UgaW4gdGhpcyBmdW5jdGlvbikNCm1ha2VbNF06ICoqKiBbZHJpdmVy
cy9uZXQvZXRoZXJuZXQvbWFydmVsbC9tdm1kaW8ub10gRXJyb3IgMQ0KDQpBbmQgZG9uJ3QgdXNl
IEtjb25maWcgZGVwZW5kZW5jaWVzIHRvIHdvcmsgYXJvdW5kIHRoaXMsIGZpeCBpdCBwcm9wZXJs
eS4NCg==

^ permalink raw reply

* Re: [PATCH 0/4] mv643xx_eth: use mvmdio MDIO bus driver
From: David Miller @ 2013-03-15 12:53 UTC (permalink / raw)
  To: florian
  Cc: thomas.petazzoni, andrew, linux, jason, linux-doc,
	devicetree-discuss, linux-kernel, rob.herring, netdev, paulus,
	linux-arm-kernel, rob, gregkh, linuxppc-dev, buytenh
In-Reply-To: <1363284515-9865-1-git-send-email-florian@openwrt.org>

From: Florian Fainelli <florian@openwrt.org>
Date: Thu, 14 Mar 2013 19:08:31 +0100

> This patch converts the mv643xx_eth driver to use the mvmdio MDIO bus driver
> instead of rolling its own implementation. As a result, all users of this
> mv643xx_eth driver are converted to register an "orion-mdio" platform_device.
> The mvmdio driver is also updated to support an interrupt line which reports
> SMI error/completion, and to allow traditionnal platform device registration
> instead of just device tree.
> 
> David, I think it makes sense for you to merge all of this, since we do
> not want the architecture files to be desynchronized from the mv643xx_eth to
> avoid runtime breakage. The potential for merge conflicts should be very small.

All applied to net-next, thanks.

^ permalink raw reply

* Re: [PATCH 0/4] mv643xx_eth: use mvmdio MDIO bus driver
From: Florian Fainelli @ 2013-03-15 12:53 UTC (permalink / raw)
  To: David Miller
  Cc: thomas.petazzoni, andrew, linux, jason, linux-doc,
	devicetree-discuss, linux-kernel, rob.herring, netdev, paulus,
	linux-arm-kernel, rob, gregkh, linuxppc-dev, buytenh
In-Reply-To: <20130315.085510.974056254781971889.davem@davemloft.net>

Le 03/15/13 13:55, David Miller a écrit :
> From: David Miller <davem@davemloft.net>
> Date: Fri, 15 Mar 2013 08:53:21 -0400 (EDT)
>
>> From: Florian Fainelli <florian@openwrt.org>
>> Date: Thu, 14 Mar 2013 19:08:31 +0100
>>
>>> This patch converts the mv643xx_eth driver to use the mvmdio MDIO bus driver
>>> instead of rolling its own implementation. As a result, all users of this
>>> mv643xx_eth driver are converted to register an "orion-mdio" platform_device.
>>> The mvmdio driver is also updated to support an interrupt line which reports
>>> SMI error/completion, and to allow traditionnal platform device registration
>>> instead of just device tree.
>>>
>>> David, I think it makes sense for you to merge all of this, since we do
>>> not want the architecture files to be desynchronized from the mv643xx_eth to
>>> avoid runtime breakage. The potential for merge conflicts should be very small.
>>
>> All applied to net-next, thanks.
>
> Actually, reverted.  Please send me code which actually compiles:
>
> drivers/net/ethernet/marvell/mvmdio.c: In function ‘orion_mdio_wait_ready’:
> drivers/net/ethernet/marvell/mvmdio.c:70:28: error: ‘NO_IRQ’ undeclared (first use in this function)
> drivers/net/ethernet/marvell/mvmdio.c:70:28: note: each undeclared identifier is reported only once for each function it appears in
> drivers/net/ethernet/marvell/mvmdio.c: In function ‘orion_mdio_probe’:
> drivers/net/ethernet/marvell/mvmdio.c:242:24: error: ‘NO_IRQ’ undeclared (first use in this function)
> make[4]: *** [drivers/net/ethernet/marvell/mvmdio.o] Error 1
>
> And don't use Kconfig dependencies to work around this, fix it properly.

Is there any platform out there for which we do not have a NO_IRQ 
definition by now? If so, what is it?
--
Florian

^ permalink raw reply

* Re: [PATCH 0/4] mv643xx_eth: use mvmdio MDIO bus driver
From: Florian Fainelli @ 2013-03-15 12:51 UTC (permalink / raw)
  To: David Miller
  Cc: thomas.petazzoni, andrew, linux, jason, linux-doc,
	devicetree-discuss, linux-kernel, rob.herring, netdev, paulus,
	linux-arm-kernel, rob, gregkh, linuxppc-dev, buytenh
In-Reply-To: <20130315.085321.1047694772636447477.davem@davemloft.net>

Le 03/15/13 13:53, David Miller a écrit :
> From: Florian Fainelli <florian@openwrt.org>
> Date: Thu, 14 Mar 2013 19:08:31 +0100
>
>> This patch converts the mv643xx_eth driver to use the mvmdio MDIO bus driver
>> instead of rolling its own implementation. As a result, all users of this
>> mv643xx_eth driver are converted to register an "orion-mdio" platform_device.
>> The mvmdio driver is also updated to support an interrupt line which reports
>> SMI error/completion, and to allow traditionnal platform device registration
>> instead of just device tree.
>>
>> David, I think it makes sense for you to merge all of this, since we do
>> not want the architecture files to be desynchronized from the mv643xx_eth to
>> avoid runtime breakage. The potential for merge conflicts should be very small.
>
> All applied to net-next, thanks.
>

Oh woah that was fast, maybe too fast, I will submit a follow-up patch 
for patch 4 to address the issues I mentionned earlier.
--
Florian

^ permalink raw reply

* Re: [PATCH 4/4 v2] mv643xx_eth: convert to use the Marvell Orion MDIO driver
From: Thomas Petazzoni @ 2013-03-15 11:42 UTC (permalink / raw)
  To: Florian Fainelli
  Cc: Andrew Lunn, Russell King, Jason Cooper, linux-doc,
	devicetree-discuss, linux-kernel, Rob Herring, netdev,
	Paul Mackerras, linux-arm-kernel, Rob Landley, Greg Kroah-Hartman,
	linuxppc-dev, davem, Lennert Buytenhek
In-Reply-To: <514300E0.2030108@openwrt.org>

Dear Florian Fainelli,

On Fri, 15 Mar 2013 12:07:12 +0100, Florian Fainelli wrote:

> Thanks to the help of Andrew Lunn, there is at least two known issues 
> with this patch version:
> 
> - we need to move up the mvmdio line in 
> drivers/net/ethernet/marvell/Makefile to make sure that configs having 
> both mvmdio and mv643xx_eth built-in get the probing order right

I don't think it's the right way of fixing the problem. If there is no
dependency on the two devices through the device model (i.e they don't
have a parent->child relationship), then the mv643xx_eth driver should
probably return -EPROBE_DEFER when it can't find its PHY so that its
->probe() operation gets called once again by the kernel when other
drivers (including mvmdio) have been probed.

Best regards,

Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com

^ permalink raw reply

* Re: [PATCH 4/4 v2] mv643xx_eth: convert to use the Marvell Orion MDIO driver
From: Florian Fainelli @ 2013-03-15 11:07 UTC (permalink / raw)
  To: Florian Fainelli
  Cc: Thomas Petazzoni, Andrew Lunn, Russell King, Jason Cooper,
	linux-doc, devicetree-discuss, linux-kernel, Rob Herring, netdev,
	Paul Mackerras, linux-arm-kernel, Rob Landley, Greg Kroah-Hartman,
	linuxppc-dev, davem, Lennert Buytenhek
In-Reply-To: <1363284515-9865-5-git-send-email-florian@openwrt.org>

Le 03/14/13 19:08, Florian Fainelli a écrit :
> This patch converts the Marvell MV643XX ethernet driver to use the
> Marvell Orion MDIO driver. As a result, PowerPC and ARM platforms
> registering the Marvell MV643XX ethernet driver are also updated to
> register a Marvell Orion MDIO driver. This driver voluntarily overlaps
> with the Marvell Ethernet shared registers because it will use a subset
> of this shared register (shared_base + 0x4 - shared_base + 0x84). The
> Ethernet driver is also updated to look up for a PHY device using the
> Orion MDIO bus driver.

Thanks to the help of Andrew Lunn, there is at least two known issues 
with this patch version:

- we need to move up the mvmdio line in 
drivers/net/ethernet/marvell/Makefile to make sure that configs having 
both mvmdio and mv643xx_eth built-in get the probing order right
- the bus name used by mv643xx_eth is not the right now (orion-mdio.0 vs 
expected orion-mdio) so the PHY device will not be found during 
phy_connect()

I will fix these two issues in the next version of the patchset.
--
Florian

^ permalink raw reply

* [PATCH -V3 04/25] powerpc: Reduce the PTE_INDEX_SIZE
From: Aneesh Kumar K.V @ 2013-03-15  9:39 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1363340407-22619-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

This make one PMD cover 16MB range. That helps in easier implementation of THP
on power. THP core code make use of one pmd entry to track the hugepage and
the range mapped by a single pmd entry should be equal to the hugepage size
supported by the hardware.

Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/pgtable-ppc64-64k.h |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/pgtable-ppc64-64k.h b/arch/powerpc/include/asm/pgtable-ppc64-64k.h
index be4e287..3c529b4 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64-64k.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64-64k.h
@@ -4,10 +4,10 @@
 #include <asm-generic/pgtable-nopud.h>
 
 
-#define PTE_INDEX_SIZE  12
+#define PTE_INDEX_SIZE  8
 #define PMD_INDEX_SIZE  12
 #define PUD_INDEX_SIZE	0
-#define PGD_INDEX_SIZE  6
+#define PGD_INDEX_SIZE  10
 
 #ifndef __ASSEMBLY__
 #define PTE_TABLE_SIZE	(sizeof(real_pte_t) << PTE_INDEX_SIZE)
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V3 03/25] powerpc: Don't hard code the size of pte page
From: Aneesh Kumar K.V @ 2013-03-15  9:39 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1363340407-22619-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

USE PTRS_PER_PTE to indicate the size of pte page. To support THP,
later patches will be changing PTRS_PER_PTE value.

Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/pgtable.h |    6 ++++++
 arch/powerpc/mm/hash_low_64.S      |    4 ++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index a9cbd3b..4b52726 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -17,6 +17,12 @@ struct mm_struct;
 #  include <asm/pgtable-ppc32.h>
 #endif
 
+/*
+ * We save the slot number & secondary bit in the second half of the
+ * PTE page. We use the 8 bytes per each pte entry.
+ */
+#define PTE_PAGE_HIDX_OFFSET (PTRS_PER_PTE * 8)
+
 #ifndef __ASSEMBLY__
 
 #include <asm/tlbflush.h>
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index 7443481..abdd5e2 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -490,7 +490,7 @@ END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
 	beq	htab_inval_old_hpte
 
 	ld	r6,STK_PARAM(R6)(r1)
-	ori	r26,r6,0x8000		/* Load the hidx mask */
+	ori	r26,r6,PTE_PAGE_HIDX_OFFSET /* Load the hidx mask. */
 	ld	r26,0(r26)
 	addi	r5,r25,36		/* Check actual HPTE_SUB bit, this */
 	rldcr.	r0,r31,r5,0		/* must match pgtable.h definition */
@@ -607,7 +607,7 @@ htab_pte_insert_ok:
 	sld	r4,r4,r5
 	andc	r26,r26,r4
 	or	r26,r26,r3
-	ori	r5,r6,0x8000
+	ori	r5,r6,PTE_PAGE_HIDX_OFFSET
 	std	r26,0(r5)
 	lwsync
 	std	r30,0(r6)
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V3 24/25] powerpc: Optimize hugepage invalidate
From: Aneesh Kumar K.V @ 2013-03-15  9:40 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1363340407-22619-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Hugepage invalidate involves invalidating multiple hpte entries.
Optimize the operation using H_BULK_REMOVE on lpar platforms.
On native, reduce the number of tlb flush.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/machdep.h    |    3 +
 arch/powerpc/mm/hash_native_64.c      |   78 ++++++++++++++++++++
 arch/powerpc/mm/pgtable.c             |   13 +++-
 arch/powerpc/platforms/pseries/lpar.c |  126 +++++++++++++++++++++++++++++++--
 4 files changed, 210 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 6cee6e0..3bc7816 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -56,6 +56,9 @@ struct machdep_calls {
 	void            (*hpte_removebolted)(unsigned long ea,
 					     int psize, int ssize);
 	void		(*flush_hash_range)(unsigned long number, int local);
+	void		(*hugepage_invalidate)(struct mm_struct *mm,
+					       unsigned char *hpte_slot_array,
+					       unsigned long addr, int psize);
 
 	/* special for kexec, to be called in real mode, linear mapping is
 	 * destroyed as well */
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 409c916..f8f6387 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -439,6 +439,83 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
 	local_irq_restore(flags);
 }
 
+static void native_hugepage_invalidate(struct mm_struct *mm,
+				       unsigned char *hpte_slot_array,
+				       unsigned long addr, int psize)
+{
+	int ssize = 0, i;
+	int lock_tlbie;
+	struct hash_pte *hptep;
+	int actual_psize = MMU_PAGE_16M;
+	unsigned int max_hpte_count, valid;
+	unsigned long flags, s_addr = addr;
+	unsigned long hpte_v, want_v, shift;
+	unsigned long hidx, vpn = 0, vsid, hash, slot;
+
+	shift = mmu_psize_defs[psize].shift;
+	max_hpte_count = HUGE_PAGE_SIZE/(1ul << shift);
+
+	local_irq_save(flags);
+	for (i = 0; i < max_hpte_count; i++) {
+		/*
+		 * 8 bits per each hpte entries
+		 * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
+		 */
+		valid = hpte_slot_array[i] & 0x1;
+		if (!valid)
+			continue;
+		hidx =  hpte_slot_array[i]  >> 1;
+
+		/* get the vpn */
+		addr = s_addr + (i * (1ul << shift));
+		if (!is_kernel_addr(addr)) {
+			ssize = user_segment_size(addr);
+			vsid = get_vsid(mm->context.id, addr, ssize);
+			WARN_ON(vsid == 0);
+		} else {
+			vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
+			ssize = mmu_kernel_ssize;
+		}
+
+		vpn = hpt_vpn(addr, vsid, ssize);
+		hash = hpt_hash(vpn, shift, ssize);
+		if (hidx & _PTEIDX_SECONDARY)
+			hash = ~hash;
+
+		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+		slot += hidx & _PTEIDX_GROUP_IX;
+
+		hptep = htab_address + slot;
+		want_v = hpte_encode_avpn(vpn, psize, ssize);
+		native_lock_hpte(hptep);
+		hpte_v = hptep->v;
+
+		/* Even if we miss, we need to invalidate the TLB */
+		if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
+			native_unlock_hpte(hptep);
+		else
+			/* Invalidate the hpte. NOTE: this also unlocks it */
+			hptep->v = 0;
+	}
+	/*
+	 * Since this is a hugepage, we just need a single tlbie.
+	 * use the last vpn.
+	 */
+	lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+	if (lock_tlbie)
+		raw_spin_lock(&native_tlbie_lock);
+
+	asm volatile("ptesync":::"memory");
+	__tlbie(vpn, psize, actual_psize, ssize);
+	asm volatile("eieio; tlbsync; ptesync":::"memory");
+
+	if (lock_tlbie)
+		raw_spin_unlock(&native_tlbie_lock);
+
+	local_irq_restore(flags);
+}
+
+
 static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
 			int *psize, int *apsize, int *ssize, unsigned long *vpn)
 {
@@ -667,4 +744,5 @@ void __init hpte_init_native(void)
 	ppc_md.hpte_remove	= native_hpte_remove;
 	ppc_md.hpte_clear_all	= native_hpte_clear;
 	ppc_md.flush_hash_range = native_flush_hash_range;
+	ppc_md.hugepage_invalidate   = native_hugepage_invalidate;
 }
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index fbff062..386cab8 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -433,6 +433,7 @@ void hpte_need_hugepage_flush(struct mm_struct *mm, unsigned long addr,
 {
 	int ssize, i;
 	unsigned long s_addr;
+	int max_hpte_count;
 	unsigned int psize, valid;
 	unsigned char *hpte_slot_array;
 	unsigned long hidx, vpn, vsid, hash, shift, slot;
@@ -446,12 +447,18 @@ void hpte_need_hugepage_flush(struct mm_struct *mm, unsigned long addr,
 	 * second half of the PMD
 	 */
 	hpte_slot_array = *(char **)(pmdp + PTRS_PER_PMD);
-
 	/* get the base page size */
 	psize = get_slice_psize(mm, s_addr);
-	shift = mmu_psize_defs[psize].shift;
 
-	for (i = 0; i < HUGE_PAGE_SIZE/(1ul << shift); i++) {
+	if (ppc_md.hugepage_invalidate)
+		return ppc_md.hugepage_invalidate(mm, hpte_slot_array,
+						  s_addr, psize);
+	/*
+	 * No bluk hpte removal support, invalidate each entry
+	 */
+	shift = mmu_psize_defs[psize].shift;
+	max_hpte_count = HUGE_PAGE_SIZE/(1ul << shift);
+	for (i = 0; i < max_hpte_count; i++) {
 		/*
 		 * 8 bits per each hpte entries
 		 * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 3daced3..5fcc621 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -45,6 +45,13 @@
 #include "plpar_wrappers.h"
 #include "pseries.h"
 
+/* Flag bits for H_BULK_REMOVE */
+#define HBR_REQUEST	0x4000000000000000UL
+#define HBR_RESPONSE	0x8000000000000000UL
+#define HBR_END		0xc000000000000000UL
+#define HBR_AVPN	0x0200000000000000UL
+#define HBR_ANDCOND	0x0100000000000000UL
+
 
 /* in hvCall.S */
 EXPORT_SYMBOL(plpar_hcall);
@@ -339,6 +346,117 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
 	BUG_ON(lpar_rc != H_SUCCESS);
 }
 
+/*
+ * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need
+ * to make sure that we avoid bouncing the hypervisor tlbie lock.
+ */
+#define PPC64_HUGE_HPTE_BATCH 12
+
+static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
+					     unsigned long *vpn, int count,
+					     int psize, int ssize)
+{
+	unsigned long param[9];
+	int i = 0, pix = 0, rc;
+	unsigned long flags = 0;
+	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+
+	if (lock_tlbie)
+		spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
+
+	for (i = 0; i < count; i++) {
+
+		if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
+			pSeries_lpar_hpte_invalidate(slot[i], vpn[i], psize,
+						     ssize, 0);
+		} else {
+			param[pix] = HBR_REQUEST | HBR_AVPN | slot[i];
+			param[pix+1] = hpte_encode_avpn(vpn[i], psize, ssize);
+			pix += 2;
+			if (pix == 8) {
+				rc = plpar_hcall9(H_BULK_REMOVE, param,
+						  param[0], param[1], param[2],
+						  param[3], param[4], param[5],
+						  param[6], param[7]);
+				BUG_ON(rc != H_SUCCESS);
+				pix = 0;
+			}
+		}
+	}
+	if (pix) {
+		param[pix] = HBR_END;
+		rc = plpar_hcall9(H_BULK_REMOVE, param, param[0], param[1],
+				  param[2], param[3], param[4], param[5],
+				  param[6], param[7]);
+		BUG_ON(rc != H_SUCCESS);
+	}
+
+	if (lock_tlbie)
+		spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
+}
+
+static void pSeries_lpar_hugepage_invalidate(struct mm_struct *mm,
+				       unsigned char *hpte_slot_array,
+				       unsigned long addr, int psize)
+{
+	int ssize = 0, i, index = 0;
+	unsigned long s_addr = addr;
+	unsigned int max_hpte_count, valid;
+	unsigned long vpn_array[PPC64_HUGE_HPTE_BATCH];
+	unsigned long slot_array[PPC64_HUGE_HPTE_BATCH];
+	unsigned long shift, hidx, vpn = 0, vsid, hash, slot;
+
+	shift = mmu_psize_defs[psize].shift;
+	max_hpte_count = HUGE_PAGE_SIZE/(1ul << shift);
+
+	for (i = 0; i < max_hpte_count; i++) {
+		/*
+		 * 8 bits per each hpte entries
+		 * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
+		 */
+		valid = hpte_slot_array[i] & 0x1;
+		if (!valid)
+			continue;
+		hidx =  hpte_slot_array[i]  >> 1;
+
+		/* get the vpn */
+		addr = s_addr + (i * (1ul << shift));
+		if (!is_kernel_addr(addr)) {
+			ssize = user_segment_size(addr);
+			vsid = get_vsid(mm->context.id, addr, ssize);
+			WARN_ON(vsid == 0);
+		} else {
+			vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
+			ssize = mmu_kernel_ssize;
+		}
+
+		vpn = hpt_vpn(addr, vsid, ssize);
+		hash = hpt_hash(vpn, shift, ssize);
+		if (hidx & _PTEIDX_SECONDARY)
+			hash = ~hash;
+
+		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+		slot += hidx & _PTEIDX_GROUP_IX;
+
+		slot_array[index] = slot;
+		vpn_array[index] = vpn;
+		if (index == PPC64_HUGE_HPTE_BATCH - 1) {
+			/*
+			 * Now do a bluk invalidate
+			 */
+			__pSeries_lpar_hugepage_invalidate(slot_array,
+							   vpn_array,
+							   PPC64_HUGE_HPTE_BATCH,
+							   psize, ssize);
+			index = 0;
+		} else
+			index++;
+	}
+	if (index)
+		__pSeries_lpar_hugepage_invalidate(slot_array, vpn_array,
+						   index, psize, ssize);
+}
+
 static void pSeries_lpar_hpte_removebolted(unsigned long ea,
 					   int psize, int ssize)
 {
@@ -354,13 +472,6 @@ static void pSeries_lpar_hpte_removebolted(unsigned long ea,
 	pSeries_lpar_hpte_invalidate(slot, vpn, psize, ssize, 0);
 }
 
-/* Flag bits for H_BULK_REMOVE */
-#define HBR_REQUEST	0x4000000000000000UL
-#define HBR_RESPONSE	0x8000000000000000UL
-#define HBR_END		0xc000000000000000UL
-#define HBR_AVPN	0x0200000000000000UL
-#define HBR_ANDCOND	0x0100000000000000UL
-
 /*
  * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
  * lock.
@@ -446,6 +557,7 @@ void __init hpte_init_lpar(void)
 	ppc_md.hpte_removebolted = pSeries_lpar_hpte_removebolted;
 	ppc_md.flush_hash_range	= pSeries_lpar_flush_hash_range;
 	ppc_md.hpte_clear_all   = pSeries_lpar_hptab_clear;
+	ppc_md.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
 }
 
 #ifdef CONFIG_PPC_SMLPAR
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V3 25/25] powerpc: Handle hugepages in kvm
From: Aneesh Kumar K.V @ 2013-03-15  9:40 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1363340407-22619-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

We could possibly avoid some of these changes because most of the HUGE PMD bits
map to PTE bits.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/kvm_book3s_64.h |   31 ++++++++++++
 arch/powerpc/kvm/book3s_64_mmu_hv.c      |   12 ++++-
 arch/powerpc/kvm/book3s_hv_rm_mmu.c      |   75 ++++++++++++++++++++++--------
 3 files changed, 97 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 38bec1d..1c5c799 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -110,6 +110,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
 	return rb;
 }
 
+/* FIXME !! should we use hpte_actual_psize or hpte decode ? */
 static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
 {
 	/* only handle 4k, 64k and 16M pages for now */
@@ -189,6 +190,36 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *p, int writing)
 	return pte;
 }
 
+/*
+ * Lock and read a linux hugepage PMD.  If it's present and writable, atomically
+ * set dirty and referenced bits and return the PMD, otherwise return 0.
+ */
+static inline pmd_t kvmppc_read_update_linux_hugepmd(pmd_t *p, int writing)
+{
+	pmd_t pmd, tmp;
+
+	/* wait until _PAGE_BUSY is clear then set it atomically */
+	__asm__ __volatile__ (
+		"1:	ldarx	%0,0,%3\n"
+		"	andi.	%1,%0,%4\n"
+		"	bne-	1b\n"
+		"	ori	%1,%0,%4\n"
+		"	stdcx.	%1,0,%3\n"
+		"	bne-	1b"
+		: "=&r" (pmd), "=&r" (tmp), "=m" (*p)
+		: "r" (p), "i" (PMD_HUGE_BUSY)
+		: "cc");
+
+	if (pmd_large(pmd)) {
+		pmd = pmd_mkyoung(pmd);
+		if (writing && pmd_write(pmd))
+			pmd = pte_mkdirty(pmd);
+	}
+
+	*p = pmd;	/* clears PMD_HUGE_BUSY */
+	return pmd;
+}
+
 /* Return HPTE cache control bits corresponding to Linux pte bits */
 static inline unsigned long hpte_cache_bits(unsigned long pte_val)
 {
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 4f2a7dc..da006da 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -675,6 +675,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		}
 		/* if the guest wants write access, see if that is OK */
 		if (!writing && hpte_is_writable(r)) {
+			int hugepage;
 			pte_t *ptep, pte;
 
 			/*
@@ -683,11 +684,18 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			 */
 			rcu_read_lock_sched();
 			ptep = find_linux_pte_or_hugepte(current->mm->pgd,
-							 hva, NULL, NULL);
-			if (ptep && pte_present(*ptep)) {
+							 hva, NULL, &hugepage);
+			if (!hugepage && ptep && pte_present(*ptep)) {
 				pte = kvmppc_read_update_linux_pte(ptep, 1);
 				if (pte_write(pte))
 					write_ok = 1;
+			} else if (hugepage && ptep) {
+				pmd_t pmd = *(pmd_t *)ptep;
+				if (pmd_large(pmd)) {
+					pmd = kvmppc_read_update_linux_hugepmd((pmd_t *)ptep, 1);
+					if (pmd_write(pmd))
+						write_ok = 1;
+				}
 			}
 			rcu_read_unlock_sched();
 		}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 7c8e1ed..e9d4e3a 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -146,24 +146,37 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
 }
 
 static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
-			      int writing, unsigned long *pte_sizep)
+			      int writing, unsigned long *pte_sizep,
+			      int *hugepage)
 {
 	pte_t *ptep;
 	unsigned long ps = *pte_sizep;
 	unsigned int shift;
 
-	ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift, NULL);
+	ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift, hugepage);
 	if (!ptep)
 		return __pte(0);
-	if (shift)
-		*pte_sizep = 1ul << shift;
-	else
-		*pte_sizep = PAGE_SIZE;
+	if (*hugepage) {
+		*pte_sizep = 1ul << 24;
+	} else {
+		if (shift)
+			*pte_sizep = 1ul << shift;
+		else
+			*pte_sizep = PAGE_SIZE;
+	}
 	if (ps > *pte_sizep)
 		return __pte(0);
-	if (!pte_present(*ptep))
-		return __pte(0);
-	return kvmppc_read_update_linux_pte(ptep, writing);
+
+	if (*hugepage) {
+		pmd_t *pmdp = (pmd_t *)ptep;
+		if (!pmd_large(*pmdp))
+			return __pmd(0);
+		return kvmppc_read_update_linux_hugepmd(pmdp, writing);
+	} else {
+		if (!pte_present(*ptep))
+			return __pte(0);
+		return kvmppc_read_update_linux_pte(ptep, writing);
+	}
 }
 
 static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
@@ -239,18 +252,34 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 		pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
 		pa &= PAGE_MASK;
 	} else {
+		int hugepage;
+
 		/* Translate to host virtual address */
 		hva = __gfn_to_hva_memslot(memslot, gfn);
 
 		/* Look up the Linux PTE for the backing page */
 		pte_size = psize;
-		pte = lookup_linux_pte(pgdir, hva, writing, &pte_size);
-		if (pte_present(pte)) {
-			if (writing && !pte_write(pte))
-				/* make the actual HPTE be read-only */
-				ptel = hpte_make_readonly(ptel);
-			is_io = hpte_cache_bits(pte_val(pte));
-			pa = pte_pfn(pte) << PAGE_SHIFT;
+		pte = lookup_linux_pte(pgdir, hva, writing, &pte_size, &hugepage);
+		if (hugepage) {
+			pmd_t pmd = (pmd_t)pte;
+			if (!pmd_large(pmd)) {
+				if (writing && !pmd_write(pmd))
+					/* make the actual HPTE be read-only */
+					ptel = hpte_make_readonly(ptel);
+				/*
+				 * we support hugepage only for RAM
+				 */
+				is_io = 0;
+				pa = pmd_pfn(pmd) << PAGE_SHIFT;
+			}
+		} else {
+			if (pte_present(pte)) {
+				if (writing && !pte_write(pte))
+					/* make the actual HPTE be read-only */
+					ptel = hpte_make_readonly(ptel);
+				is_io = hpte_cache_bits(pte_val(pte));
+				pa = pte_pfn(pte) << PAGE_SHIFT;
+			}
 		}
 	}
 
@@ -645,10 +674,18 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 			gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
 			memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
 			if (memslot) {
+				int hugepage;
 				hva = __gfn_to_hva_memslot(memslot, gfn);
-				pte = lookup_linux_pte(pgdir, hva, 1, &psize);
-				if (pte_present(pte) && !pte_write(pte))
-					r = hpte_make_readonly(r);
+				pte = lookup_linux_pte(pgdir, hva, 1,
+						       &psize, &hugepage);
+				if (hugepage) {
+					pmd_t pmd = (pmd_t)pte;
+					if (pmd_large(pmd) && !pmd_write(pmd))
+						r = hpte_make_readonly(r);
+				} else {
+					if (pte_present(pte) && !pte_write(pte))
+						r = hpte_make_readonly(r);
+				}
 			}
 		}
 	}
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V3 02/25] powerpc: Save DAR and DSISR in pt_regs on MCE
From: Aneesh Kumar K.V @ 2013-03-15  9:39 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1363340407-22619-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

We were not saving DAR and DSISR on MCE. Save then and also print the values
along with exception details in xmon.

Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/exceptions-64s.S |    9 +++++++++
 arch/powerpc/xmon/xmon.c             |    2 +-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 0e9c48c..d02e730 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -640,9 +640,18 @@ slb_miss_user_pseries:
 	.align	7
 	.globl machine_check_common
 machine_check_common:
+
+	mfspr	r10,SPRN_DAR
+	std	r10,PACA_EXGEN+EX_DAR(r13)
+	mfspr	r10,SPRN_DSISR
+	stw	r10,PACA_EXGEN+EX_DSISR(r13)
 	EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
 	FINISH_NAP
 	DISABLE_INTS
+	ld	r3,PACA_EXGEN+EX_DAR(r13)
+	lwz	r4,PACA_EXGEN+EX_DSISR(r13)
+	std	r3,_DAR(r1)
+	std	r4,_DSISR(r1)
 	bl	.save_nvgprs
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	.machine_check_exception
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 1f8d2f1..a72e490 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1423,7 +1423,7 @@ static void excprint(struct pt_regs *fp)
 	printf("    sp: %lx\n", fp->gpr[1]);
 	printf("   msr: %lx\n", fp->msr);
 
-	if (trap == 0x300 || trap == 0x380 || trap == 0x600) {
+	if (trap == 0x300 || trap == 0x380 || trap == 0x600 || trap == 0x200) {
 		printf("   dar: %lx\n", fp->dar);
 		if (trap != 0x380)
 			printf(" dsisr: %lx\n", fp->dsisr);
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V3 07/25] powerpc: Use encode avpn where we need only avpn values
From: Aneesh Kumar K.V @ 2013-03-15  9:39 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1363340407-22619-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

In all these cases we are doing something similar to

HPTE_V_COMPARE(hpte_v, want_v) which ignores the HPTE_V_LARGE bit

With MPSS support we would need actual page size to set HPTE_V_LARGE
bit and that won't be available in most of these cases. Since we are ignoring
HPTE_V_LARGE bit, use the  avpn value instead. There should not be any change
in behaviour after this patch.

Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/mm/hash_native_64.c        |    8 ++++----
 arch/powerpc/platforms/cell/beat_htab.c |   10 +++++-----
 arch/powerpc/platforms/ps3/htab.c       |    2 +-
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index ffc1e00..9d8983a 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -252,7 +252,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
 	unsigned long hpte_v, want_v;
 	int ret = 0;
 
-	want_v = hpte_encode_v(vpn, psize, ssize);
+	want_v = hpte_encode_avpn(vpn, psize, ssize);
 
 	DBG_LOW("    update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)",
 		vpn, want_v & HPTE_V_AVPN, slot, newpp);
@@ -288,7 +288,7 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize)
 	unsigned long want_v, hpte_v;
 
 	hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
-	want_v = hpte_encode_v(vpn, psize, ssize);
+	want_v = hpte_encode_avpn(vpn, psize, ssize);
 
 	/* Bolted mappings are only ever in the primary group */
 	slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
@@ -348,7 +348,7 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
 
 	DBG_LOW("    invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot);
 
-	want_v = hpte_encode_v(vpn, psize, ssize);
+	want_v = hpte_encode_avpn(vpn, psize, ssize);
 	native_lock_hpte(hptep);
 	hpte_v = hptep->v;
 
@@ -520,7 +520,7 @@ static void native_flush_hash_range(unsigned long number, int local)
 			slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 			slot += hidx & _PTEIDX_GROUP_IX;
 			hptep = htab_address + slot;
-			want_v = hpte_encode_v(vpn, psize, ssize);
+			want_v = hpte_encode_avpn(vpn, psize, ssize);
 			native_lock_hpte(hptep);
 			hpte_v = hptep->v;
 			if (!HPTE_V_COMPARE(hpte_v, want_v) ||
diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c
index 0f6f839..472f9a7 100644
--- a/arch/powerpc/platforms/cell/beat_htab.c
+++ b/arch/powerpc/platforms/cell/beat_htab.c
@@ -191,7 +191,7 @@ static long beat_lpar_hpte_updatepp(unsigned long slot,
 	u64 dummy0, dummy1;
 	unsigned long want_v;
 
-	want_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M);
+	want_v = hpte_encode_avpn(vpn, psize, MMU_SEGSIZE_256M);
 
 	DBG_LOW("    update: "
 		"avpnv=%016lx, slot=%016lx, psize: %d, newpp %016lx ... ",
@@ -228,7 +228,7 @@ static long beat_lpar_hpte_find(unsigned long vpn, int psize)
 	unsigned long want_v, hpte_v;
 
 	hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, MMU_SEGSIZE_256M);
-	want_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M);
+	want_v = hpte_encode_avpn(vpn, psize, MMU_SEGSIZE_256M);
 
 	for (j = 0; j < 2; j++) {
 		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
@@ -283,7 +283,7 @@ static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
 
 	DBG_LOW("    inval : slot=%lx, va=%016lx, psize: %d, local: %d\n",
 		slot, va, psize, local);
-	want_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M);
+	want_v = hpte_encode_avpn(vpn, psize, MMU_SEGSIZE_256M);
 
 	raw_spin_lock_irqsave(&beat_htab_lock, flags);
 	dummy1 = beat_lpar_hpte_getword0(slot);
@@ -372,7 +372,7 @@ static long beat_lpar_hpte_updatepp_v3(unsigned long slot,
 	unsigned long want_v;
 	unsigned long pss;
 
-	want_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M);
+	want_v = hpte_encode_avpn(vpn, psize, MMU_SEGSIZE_256M);
 	pss = (psize == MMU_PAGE_4K) ? -1UL : mmu_psize_defs[psize].penc;
 
 	DBG_LOW("    update: "
@@ -402,7 +402,7 @@ static void beat_lpar_hpte_invalidate_v3(unsigned long slot, unsigned long vpn,
 
 	DBG_LOW("    inval : slot=%lx, vpn=%016lx, psize: %d, local: %d\n",
 		slot, vpn, psize, local);
-	want_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M);
+	want_v = hpte_encode_avpn(vpn, psize, MMU_SEGSIZE_256M);
 	pss = (psize == MMU_PAGE_4K) ? -1UL : mmu_psize_defs[psize].penc;
 
 	lpar_rc = beat_invalidate_htab_entry3(0, slot, want_v, pss);
diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c
index d00d7b0..07a4bba 100644
--- a/arch/powerpc/platforms/ps3/htab.c
+++ b/arch/powerpc/platforms/ps3/htab.c
@@ -115,7 +115,7 @@ static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp,
 	unsigned long flags;
 	long ret;
 
-	want_v = hpte_encode_v(vpn, psize, ssize);
+	want_v = hpte_encode_avpn(vpn, psize, ssize);
 
 	spin_lock_irqsave(&ps3_htab_lock, flags);
 
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V3 09/25] powerpc: Fix hpte_decode to use the correct decoding for page sizes
From: Aneesh Kumar K.V @ 2013-03-15  9:39 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1363340407-22619-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

As per ISA doc, we encode base and actual page size in the LP bits of
PTE. The number of bit used to encode the page sizes depend on actual
page size.  ISA doc lists this as

   PTE LP     actual page size
rrrr rrrz 	≥8KB
rrrr rrzz	≥16KB
rrrr rzzz 	≥32KB
rrrr zzzz 	≥64KB
rrrz zzzz 	≥128KB
rrzz zzzz 	≥256KB
rzzz zzzz	≥512KB
zzzz zzzz 	≥1MB

ISA doc also says
"The values of the “z” bits used to specify each size, along with all possible
values of “r” bits in the LP field, must result in LP values distinct from
other LP values for other sizes."

based on the above update hpte_decode to use the correct decoding for LP bits.

Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/mm/hash_native_64.c |   38 ++++++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 910b50d..af9eb1c 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -415,41 +415,51 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
 			int *psize, int *apsize, int *ssize, unsigned long *vpn)
 {
 	unsigned long avpn, pteg, vpi;
-	unsigned long hpte_r = hpte->r;
 	unsigned long hpte_v = hpte->v;
 	unsigned long vsid, seg_off;
-	int i, size, a_size, shift, penc;
+	int size, a_size, shift, mask;
+	/* Look at the 8 bit LP value */
+	unsigned int lp = (hpte->r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
 
 	if (!(hpte_v & HPTE_V_LARGE)) {
 		size   = MMU_PAGE_4K;
 		a_size = MMU_PAGE_4K;
 	} else {
-		for (i = 0; i < LP_BITS; i++) {
-			if ((hpte_r & LP_MASK(i+1)) == LP_MASK(i+1))
-				break;
-		}
-		penc = LP_MASK(i+1) >> LP_SHIFT;
 		for (size = 0; size < MMU_PAGE_COUNT; size++) {
 
 			/* valid entries have a shift value */
 			if (!mmu_psize_defs[size].shift)
 				continue;
-			for (a_size = 0; a_size < MMU_PAGE_COUNT; a_size++) {
-
-				/* 4K pages are not represented by LP */
-				if (a_size == MMU_PAGE_4K)
-					continue;
 
+			/* start from 1 ignoring MMU_PAGE_4K */
+			for (a_size = 1; a_size < MMU_PAGE_COUNT; a_size++) {
 				/* valid entries have a shift value */
 				if (!mmu_psize_defs[a_size].shift)
 					continue;
 
-				if (penc == mmu_psize_defs[size].penc[a_size])
+				/* invalid penc */
+				if (mmu_psize_defs[size].penc[a_size] == -1)
+					continue;
+				/*
+				 * encoding bits per actual page size
+				 *        PTE LP     actual page size
+				 *    rrrr rrrz		≥8KB
+				 *    rrrr rrzz		≥16KB
+				 *    rrrr rzzz		≥32KB
+				 *    rrrr zzzz		≥64KB
+				 * .......
+				 */
+				shift = mmu_psize_defs[a_size].shift - LP_SHIFT;
+				if (shift > LP_BITS)
+					shift = LP_BITS;
+				mask = (1 << shift) - 1;
+				if ((lp & mask) ==
+				    mmu_psize_defs[size].penc[a_size]) {
 					goto out;
+				}
 			}
 		}
 	}
-
 out:
 	/* This works for all page sizes, and for 256M and 1T segments */
 	*ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V3 05/25] powerpc: Move the pte free routines from common header
From: Aneesh Kumar K.V @ 2013-03-15  9:39 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1363340407-22619-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

This patch moves the common code to 32/64 bit headers and also duplicate
4K_PAGES and 64K_PAGES section. We will later change the 64 bit 64K_PAGES
version to support smaller PTE fragments. The patch doesn't introduce
any functional changes.

Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/pgalloc-32.h |   45 ++++++++++
 arch/powerpc/include/asm/pgalloc-64.h |  157 ++++++++++++++++++++++++++++++---
 arch/powerpc/include/asm/pgalloc.h    |   46 +---------
 3 files changed, 189 insertions(+), 59 deletions(-)

diff --git a/arch/powerpc/include/asm/pgalloc-32.h b/arch/powerpc/include/asm/pgalloc-32.h
index 580cf73..27b2386 100644
--- a/arch/powerpc/include/asm/pgalloc-32.h
+++ b/arch/powerpc/include/asm/pgalloc-32.h
@@ -37,6 +37,17 @@ extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
 extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
 
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+	free_page((unsigned long)pte);
+}
+
+static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
+{
+	pgtable_page_dtor(ptepage);
+	__free_page(ptepage);
+}
+
 static inline void pgtable_free(void *table, unsigned index_size)
 {
 	BUG_ON(index_size); /* 32-bit doesn't use this */
@@ -45,4 +56,38 @@ static inline void pgtable_free(void *table, unsigned index_size)
 
 #define check_pgt_cache()	do { } while (0)
 
+#ifdef CONFIG_SMP
+static inline void pgtable_free_tlb(struct mmu_gather *tlb,
+				    void *table, int shift)
+{
+	unsigned long pgf = (unsigned long)table;
+	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+	pgf |= shift;
+	tlb_remove_table(tlb, (void *)pgf);
+}
+
+static inline void __tlb_remove_table(void *_table)
+{
+	void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
+	unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
+
+	pgtable_free(table, shift);
+}
+#else
+static inline void pgtable_free_tlb(struct mmu_gather *tlb,
+				    void *table, int shift)
+{
+	pgtable_free(table, shift);
+}
+#endif
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
+				  unsigned long address)
+{
+	struct page *page = page_address(table);
+
+	tlb_flush_pgtable(tlb, address);
+	pgtable_page_dtor(page);
+	pgtable_free_tlb(tlb, page, 0);
+}
 #endif /* _ASM_POWERPC_PGALLOC_32_H */
diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h
index 292725c..cdbf555 100644
--- a/arch/powerpc/include/asm/pgalloc-64.h
+++ b/arch/powerpc/include/asm/pgalloc-64.h
@@ -72,8 +72,83 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 #define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, (unsigned long)(pte))
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+					  unsigned long address)
+{
+	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
+}
+
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
+				      unsigned long address)
+{
+	struct page *page;
+	pte_t *pte;
+
+	pte = pte_alloc_one_kernel(mm, address);
+	if (!pte)
+		return NULL;
+	page = virt_to_page(pte);
+	pgtable_page_ctor(page);
+	return page;
+}
+
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+	free_page((unsigned long)pte);
+}
+
+static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
+{
+	pgtable_page_dtor(ptepage);
+	__free_page(ptepage);
+}
+
+static inline void pgtable_free(void *table, unsigned index_size)
+{
+	if (!index_size)
+		free_page((unsigned long)table);
+	else {
+		BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
+		kmem_cache_free(PGT_CACHE(index_size), table);
+	}
+}
+
+#ifdef CONFIG_SMP
+static inline void pgtable_free_tlb(struct mmu_gather *tlb,
+				    void *table, int shift)
+{
+	unsigned long pgf = (unsigned long)table;
+	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+	pgf |= shift;
+	tlb_remove_table(tlb, (void *)pgf);
+}
+
+static inline void __tlb_remove_table(void *_table)
+{
+	void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
+	unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
+
+	pgtable_free(table, shift);
+}
+#else /* !CONFIG_SMP */
+static inline void pgtable_free_tlb(struct mmu_gather *tlb,
+				    void *table, int shift)
+{
+	pgtable_free(table, shift);
+}
+#endif /* CONFIG_SMP */
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
+				  unsigned long address)
+{
+	struct page *page = page_address(table);
+
+	tlb_flush_pgtable(tlb, address);
+	pgtable_page_dtor(page);
+	pgtable_free_tlb(tlb, page, 0);
+}
 
-#else /* CONFIG_PPC_64K_PAGES */
+#else /* if CONFIG_PPC_64K_PAGES */
 
 #define pud_populate(mm, pud, pmd)	pud_set(pud, (unsigned long)pmd)
 
@@ -83,31 +158,25 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
 	pmd_set(pmd, (unsigned long)pte);
 }
 
-#define pmd_populate(mm, pmd, pte_page) \
-	pmd_populate_kernel(mm, pmd, page_address(pte_page))
-#define pmd_pgtable(pmd) pmd_page(pmd)
-
-#endif /* CONFIG_PPC_64K_PAGES */
-
-static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
+				pgtable_t pte_page)
 {
-	return kmem_cache_alloc(PGT_CACHE(PMD_INDEX_SIZE),
-				GFP_KERNEL|__GFP_REPEAT);
+	pmd_populate_kernel(mm, pmd, page_address(pte_page));
 }
 
-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+static inline pgtable_t pmd_pgtable(pmd_t pmd)
 {
-	kmem_cache_free(PGT_CACHE(PMD_INDEX_SIZE), pmd);
+	return pmd_page(pmd);
 }
 
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
-        return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
+	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
 }
 
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
-					unsigned long address)
+				      unsigned long address)
 {
 	struct page *page;
 	pte_t *pte;
@@ -120,6 +189,17 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
 	return page;
 }
 
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+	free_page((unsigned long)pte);
+}
+
+static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
+{
+	pgtable_page_dtor(ptepage);
+	__free_page(ptepage);
+}
+
 static inline void pgtable_free(void *table, unsigned index_size)
 {
 	if (!index_size)
@@ -130,6 +210,55 @@ static inline void pgtable_free(void *table, unsigned index_size)
 	}
 }
 
+#ifdef CONFIG_SMP
+static inline void pgtable_free_tlb(struct mmu_gather *tlb,
+				    void *table, int shift)
+{
+	unsigned long pgf = (unsigned long)table;
+	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+	pgf |= shift;
+	tlb_remove_table(tlb, (void *)pgf);
+}
+
+static inline void __tlb_remove_table(void *_table)
+{
+	void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
+	unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
+
+	pgtable_free(table, shift);
+}
+#else /* !CONFIG_SMP */
+static inline void pgtable_free_tlb(struct mmu_gather *tlb,
+				    void *table, int shift)
+{
+	pgtable_free(table, shift);
+}
+#endif /* CONFIG_SMP */
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
+				  unsigned long address)
+{
+	struct page *page = page_address(table);
+
+	tlb_flush_pgtable(tlb, address);
+	pgtable_page_dtor(page);
+	pgtable_free_tlb(tlb, page, 0);
+}
+
+#endif /* CONFIG_PPC_64K_PAGES */
+
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	return kmem_cache_alloc(PGT_CACHE(PMD_INDEX_SIZE),
+				GFP_KERNEL|__GFP_REPEAT);
+}
+
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+{
+	kmem_cache_free(PGT_CACHE(PMD_INDEX_SIZE), pmd);
+}
+
+
 #define __pmd_free_tlb(tlb, pmd, addr)		      \
 	pgtable_free_tlb(tlb, pmd, PMD_INDEX_SIZE)
 #ifndef CONFIG_PPC_64K_PAGES
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
index bf301ac..e9a9f60 100644
--- a/arch/powerpc/include/asm/pgalloc.h
+++ b/arch/powerpc/include/asm/pgalloc.h
@@ -3,6 +3,7 @@
 #ifdef __KERNEL__
 
 #include <linux/mm.h>
+#include <asm-generic/tlb.h>
 
 #ifdef CONFIG_PPC_BOOK3E
 extern void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address);
@@ -13,56 +14,11 @@ static inline void tlb_flush_pgtable(struct mmu_gather *tlb,
 }
 #endif /* !CONFIG_PPC_BOOK3E */
 
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-	free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
-{
-	pgtable_page_dtor(ptepage);
-	__free_page(ptepage);
-}
-
 #ifdef CONFIG_PPC64
 #include <asm/pgalloc-64.h>
 #else
 #include <asm/pgalloc-32.h>
 #endif
 
-#ifdef CONFIG_SMP
-struct mmu_gather;
-extern void tlb_remove_table(struct mmu_gather *, void *);
-
-static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
-{
-	unsigned long pgf = (unsigned long)table;
-	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
-	pgf |= shift;
-	tlb_remove_table(tlb, (void *)pgf);
-}
-
-static inline void __tlb_remove_table(void *_table)
-{
-	void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
-	unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
-
-	pgtable_free(table, shift);
-}
-#else /* CONFIG_SMP */
-static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
-{
-	pgtable_free(table, shift);
-}
-#endif /* !CONFIG_SMP */
-
-static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage,
-				  unsigned long address)
-{
-	tlb_flush_pgtable(tlb, address);
-	pgtable_page_dtor(ptepage);
-	pgtable_free_tlb(tlb, page_address(ptepage), 0);
-}
-
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_PGALLOC_H */
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V3 18/25] powerpc/THP: Double the PMD table size for THP
From: Aneesh Kumar K.V @ 2013-03-15  9:40 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1363340407-22619-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

THP code does PTE page allocation along with large page request and deposit them
for later use. This is to ensure that we won't have any failures when we split
hugepages to regular pages.

On powerpc we want to use the deposited PTE page for storing hash pte slot and
secondary bit information for the HPTEs. We use the second half
of the pmd table to save the deposted PTE page.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/pgalloc-64.h    |    6 +++---
 arch/powerpc/include/asm/pgtable-ppc64.h |    6 +++++-
 arch/powerpc/mm/init_64.c                |    9 ++++++---
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h
index 3418989..46c6ffa 100644
--- a/arch/powerpc/include/asm/pgalloc-64.h
+++ b/arch/powerpc/include/asm/pgalloc-64.h
@@ -208,17 +208,17 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return kmem_cache_alloc(PGT_CACHE(PMD_INDEX_SIZE),
+	return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
 				GFP_KERNEL|__GFP_REPEAT);
 }
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
-	kmem_cache_free(PGT_CACHE(PMD_INDEX_SIZE), pmd);
+	kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd);
 }
 
 #define __pmd_free_tlb(tlb, pmd, addr)		      \
-	pgtable_free_tlb(tlb, pmd, PMD_INDEX_SIZE)
+	pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX)
 #ifndef CONFIG_PPC_64K_PAGES
 #define __pud_free_tlb(tlb, pud, addr)		      \
 	pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index c0747c7..d4e845c 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -20,7 +20,11 @@
                 	    PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT)
 #define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE)
 
-
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define PMD_CACHE_INDEX	(PMD_INDEX_SIZE + 1)
+#else
+#define PMD_CACHE_INDEX	PMD_INDEX_SIZE
+#endif
 /*
  * Define the address range of the kernel non-linear virtual area
  */
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 95a4529..7608b0d 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -88,7 +88,11 @@ static void pgd_ctor(void *addr)
 
 static void pmd_ctor(void *addr)
 {
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	memset(addr, 0, PMD_TABLE_SIZE * 2);
+#else
 	memset(addr, 0, PMD_TABLE_SIZE);
+#endif
 }
 
 struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];
@@ -138,10 +142,9 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
 void pgtable_cache_init(void)
 {
 	pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
-	pgtable_cache_add(PMD_INDEX_SIZE, pmd_ctor);
-	if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_INDEX_SIZE))
+	pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor);
+	if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_CACHE_INDEX))
 		panic("Couldn't allocate pgtable caches");
-
 	/* In all current configs, when the PUD index exists it's the
 	 * same size as either the pgd or pmd index.  Verify that the
 	 * initialization above has also created a PUD cache.  This
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V3 16/25] mm/THP: withdraw the pgtable after pmdp related operations
From: Aneesh Kumar K.V @ 2013-03-15  9:39 UTC (permalink / raw)
  To: benh, paulus; +Cc: Andrea Arcangeli, linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1363340407-22619-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

For architectures like ppc64 we look at deposited pgtable when
calling pmdp_get_and_clear. So do the pgtable_trans_huge_withdraw
after finishing pmdp related operations.

Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 mm/huge_memory.c |    3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index e91b763..5c7cd7d 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1380,9 +1380,10 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		struct page *page;
 		pgtable_t pgtable;
 		pmd_t orig_pmd;
-		pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
+
 		orig_pmd = pmdp_get_and_clear(tlb->mm, addr, pmd);
 		tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
+		pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
 		if (is_huge_zero_pmd(orig_pmd)) {
 			tlb->mm->nr_ptes--;
 			spin_unlock(&tlb->mm->page_table_lock);
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V3 13/25] powerpc: Print page size info during boot
From: Aneesh Kumar K.V @ 2013-03-15  9:39 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1363340407-22619-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

This gives hint about different base and actual page size combination
supported by the platform.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/mm/hash_utils_64.c |   10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 56ff4bb..1f2ebbd 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -315,7 +315,7 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
 	prop = (u32 *)of_get_flat_dt_prop(node,
 					  "ibm,segment-page-sizes", &size);
 	if (prop != NULL) {
-		DBG("Page sizes from device-tree:\n");
+		pr_info("Page sizes from device-tree:\n");
 		size /= 4;
 		cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE);
 		while(size > 0) {
@@ -369,10 +369,10 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
 					       "shift=%d\n", base_shift, shift);
 
 				def->penc[idx] = penc;
-				DBG(" %d: shift=%02x, sllp=%04lx, "
-				    "avpnm=%08lx, tlbiel=%d, penc=%d\n",
-				    idx, shift, def->sllp, def->avpnm,
-				    def->tlbiel, def->penc[idx]);
+				pr_info("base_shift=%d: shift=%d, sllp=0x%04lx,"
+					" avpnm=0x%08lx, tlbiel=%d, penc=%d\n",
+					base_shift, shift, def->sllp,
+					def->avpnm, def->tlbiel, def->penc[idx]);
 			}
 		}
 		return 1;
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V3 20/25] powerpc/THP: Add code to handle HPTE faults for large pages
From: Aneesh Kumar K.V @ 2013-03-15  9:40 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1363340407-22619-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

We now have pmd entries covering to 16MB range. To implement THP on powerpc,
we double the size of PMD. The second half is used to deposit the pgtable (PTE page).
We also use the depoisted PTE page for tracking the HPTE information. The information
include [ secondary group | 3 bit hidx | valid ]. We use one byte per each HPTE entry.
With 16MB hugepage and 64K HPTE we need 256 entries and with 4K HPTE we need
4096 entries. Both will fit in a 4K PTE page.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/mmu-hash64.h    |    5 +
 arch/powerpc/include/asm/pgtable-ppc64.h |   31 +----
 arch/powerpc/kernel/io-workarounds.c     |    3 +-
 arch/powerpc/kvm/book3s_64_mmu_hv.c      |    2 +-
 arch/powerpc/kvm/book3s_hv_rm_mmu.c      |    4 +-
 arch/powerpc/mm/Makefile                 |    1 +
 arch/powerpc/mm/hash_utils_64.c          |   16 ++-
 arch/powerpc/mm/hugepage-hash64.c        |  185 ++++++++++++++++++++++++++++++
 arch/powerpc/mm/hugetlbpage.c            |   31 ++++-
 arch/powerpc/mm/pgtable.c                |   38 ++++++
 arch/powerpc/mm/tlb_hash64.c             |    5 +-
 arch/powerpc/perf/callchain.c            |    2 +-
 arch/powerpc/platforms/pseries/eeh.c     |    5 +-
 13 files changed, 286 insertions(+), 42 deletions(-)
 create mode 100644 arch/powerpc/mm/hugepage-hash64.c

diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index e187254..a74a3de 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -322,6 +322,11 @@ extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
 int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 		     pte_t *ptep, unsigned long trap, int local, int ssize,
 		     unsigned int shift, unsigned int mmu_psize);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern int __hash_page_thp(unsigned long ea, unsigned long access,
+			   unsigned long vsid, pmd_t *pmdp, unsigned long trap,
+			   int local, int ssize, unsigned int psize);
+#endif
 extern void hash_failure_debug(unsigned long ea, unsigned long access,
 			       unsigned long vsid, unsigned long trap,
 			       int ssize, int psize, int lpsize,
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index d4e845c..9b81283 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -345,39 +345,18 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
 void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
 void pgtable_cache_init(void);
 
-/*
- * find_linux_pte returns the address of a linux pte for a given
- * effective address and directory.  If not found, it returns zero.
- */
-static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea)
-{
-	pgd_t *pg;
-	pud_t *pu;
-	pmd_t *pm;
-	pte_t *pt = NULL;
-
-	pg = pgdir + pgd_index(ea);
-	if (!pgd_none(*pg)) {
-		pu = pud_offset(pg, ea);
-		if (!pud_none(*pu)) {
-			pm = pmd_offset(pu, ea);
-			if (pmd_present(*pm))
-				pt = pte_offset_kernel(pm, ea);
-		}
-	}
-	return pt;
-}
-
+pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea, unsigned int *thp);
 #ifdef CONFIG_HUGETLB_PAGE
 pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
-				 unsigned *shift);
+				 unsigned *shift, unsigned int *hugepage);
 #else
 static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
-					       unsigned *shift)
+					       unsigned *shift,
+					       unsigned int *hugepage)
 {
 	if (shift)
 		*shift = 0;
-	return find_linux_pte(pgdir, ea);
+	return find_linux_pte(pgdir, ea, hugepage);
 }
 #endif /* !CONFIG_HUGETLB_PAGE */
 
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
index 50e90b7..a9c904f 100644
--- a/arch/powerpc/kernel/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -70,7 +70,8 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
 		if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END)
 			return NULL;
 
-		ptep = find_linux_pte(init_mm.pgd, vaddr);
+		/* we won't find hugepages here */
+		ptep = find_linux_pte(init_mm.pgd, vaddr, NULL);
 		if (ptep == NULL)
 			paddr = 0;
 		else
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 8cc18ab..4f2a7dc 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -683,7 +683,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			 */
 			rcu_read_lock_sched();
 			ptep = find_linux_pte_or_hugepte(current->mm->pgd,
-							 hva, NULL);
+							 hva, NULL, NULL);
 			if (ptep && pte_present(*ptep)) {
 				pte = kvmppc_read_update_linux_pte(ptep, 1);
 				if (pte_write(pte))
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 19c93ba..7c8e1ed 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -27,7 +27,7 @@ static void *real_vmalloc_addr(void *x)
 	unsigned long addr = (unsigned long) x;
 	pte_t *p;
 
-	p = find_linux_pte(swapper_pg_dir, addr);
+	p = find_linux_pte(swapper_pg_dir, addr, NULL);
 	if (!p || !pte_present(*p))
 		return NULL;
 	/* assume we don't have huge pages in vmalloc space... */
@@ -152,7 +152,7 @@ static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
 	unsigned long ps = *pte_sizep;
 	unsigned int shift;
 
-	ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift);
+	ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift, NULL);
 	if (!ptep)
 		return __pte(0);
 	if (shift)
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 3787b61..997deb4 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -33,6 +33,7 @@ obj-y				+= hugetlbpage.o
 obj-$(CONFIG_PPC_STD_MMU_64)	+= hugetlbpage-hash64.o
 obj-$(CONFIG_PPC_BOOK3E_MMU)	+= hugetlbpage-book3e.o
 endif
+obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hugepage-hash64.o
 obj-$(CONFIG_PPC_SUBPAGE_PROT)	+= subpage-prot.o
 obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
 obj-$(CONFIG_HIGHMEM)		+= highmem.o
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 1f2ebbd..cd3ecd8 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -955,7 +955,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
 	unsigned long vsid;
 	struct mm_struct *mm;
 	pte_t *ptep;
-	unsigned hugeshift;
+	unsigned hugeshift, hugepage;
 	const struct cpumask *tmp;
 	int rc, user_region = 0, local = 0;
 	int psize, ssize;
@@ -1021,7 +1021,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
 #endif /* CONFIG_PPC_64K_PAGES */
 
 	/* Get PTE and page size from page tables */
-	ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugeshift);
+	ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugeshift, &hugepage);
 	if (ptep == NULL || !pte_present(*ptep)) {
 		DBG_LOW(" no PTE !\n");
 		return 1;
@@ -1044,6 +1044,12 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
 					ssize, hugeshift, psize);
 #endif /* CONFIG_HUGETLB_PAGE */
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	if (hugepage)
+		return __hash_page_thp(ea, access, vsid, (pmd_t *)ptep,
+				       trap, local, ssize, psize);
+#endif
+
 #ifndef CONFIG_PPC_64K_PAGES
 	DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep));
 #else
@@ -1149,7 +1155,11 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
 	pgdir = mm->pgd;
 	if (pgdir == NULL)
 		return;
-	ptep = find_linux_pte(pgdir, ea);
+	/*
+	 * We haven't implemented update_mmu_cache_pmd yet. We get called
+	 * only for non hugepages. Hence can ignore THP here
+	 */
+	ptep = find_linux_pte(pgdir, ea, NULL);
 	if (!ptep)
 		return;
 
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
new file mode 100644
index 0000000..3f6140d
--- /dev/null
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright IBM Corporation, 2013
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+/*
+ * PPC64 THP Support for hash based MMUs
+ */
+#include <linux/mm.h>
+#include <asm/machdep.h>
+
+/*
+ * The linux hugepage PMD now include the pmd entries followed by the address
+ * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
+ * [ secondary group | 3 bit hidx | valid ]. We use one byte per each HPTE entry.
+ * With 16MB hugepage and 64K HPTE we need 256 entries and with 4K HPTE we need
+ * 4096 entries. Both will fit in a 4K pgtable_t.
+ */
+int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
+		    pmd_t *pmdp, unsigned long trap, int local, int ssize,
+		    unsigned int psize)
+{
+	unsigned int index, valid;
+	unsigned char *hpte_slot_array;
+	unsigned long rflags, pa, hidx;
+	unsigned long old_pmd, new_pmd;
+	int ret, lpsize = MMU_PAGE_16M;
+	unsigned long vpn, hash, shift, slot;
+
+	/*
+	 * atomically mark the linux large page PMD busy and dirty
+	 */
+	do {
+		old_pmd = pmd_val(*pmdp);
+		/* If PMD busy, retry the access */
+		if (unlikely(old_pmd & PMD_HUGE_BUSY))
+			return 0;
+		/* If PMD permissions don't match, take page fault */
+		if (unlikely(access & ~old_pmd))
+			return 1;
+		/*
+		 * Try to lock the PTE, add ACCESSED and DIRTY if it was
+		 * a write access
+		 */
+		new_pmd = old_pmd | PMD_HUGE_BUSY | PMD_HUGE_ACCESSED;
+		if (access & _PAGE_RW)
+			new_pmd |= PMD_HUGE_DIRTY;
+	} while (old_pmd != __cmpxchg_u64((unsigned long *)pmdp,
+					  old_pmd, new_pmd));
+	/*
+	 * PP bits. PMD_HUGE_USER is already PP bit 0x2, so we only
+	 * need to add in 0x1 if it's a read-only user page
+	 */
+	rflags = new_pmd & PMD_HUGE_USER;
+	if ((new_pmd & PMD_HUGE_USER) && !((new_pmd & PMD_HUGE_RW) &&
+					   (new_pmd & PMD_HUGE_DIRTY)))
+		rflags |= 0x1;
+	/*
+	 * PMD_HUGE_EXEC -> HW_NO_EXEC since it's inverted
+	 */
+	rflags |= ((new_pmd & PMD_HUGE_EXEC) ? 0 : HPTE_R_N);
+
+#if 0 /* FIXME!! */
+	if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
+
+		/*
+		 * No CPU has hugepages but lacks no execute, so we
+		 * don't need to worry about that case
+		 */
+		rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
+	}
+#endif
+	/*
+	 * Find the slot index details for this ea, using base page size.
+	 */
+	shift = mmu_psize_defs[psize].shift;
+	index = (ea & (HUGE_PAGE_SIZE - 1)) >> shift;
+	BUG_ON(index > 4096);
+
+	vpn = hpt_vpn(ea, vsid, ssize);
+	hash = hpt_hash(vpn, shift, ssize);
+	/*
+	 * The hpte hindex are stored in the pgtable whose address is in the
+	 * second half of the PMD
+	 */
+	hpte_slot_array = *(char **)(pmdp + PTRS_PER_PMD);
+
+	valid = hpte_slot_array[index]  & 0x1;
+	if (unlikely(valid)) {
+		/* update the hpte bits */
+		hidx =  hpte_slot_array[index]  >> 1;
+		if (hidx & _PTEIDX_SECONDARY)
+			hash = ~hash;
+		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+		slot += hidx & _PTEIDX_GROUP_IX;
+
+		ret = ppc_md.hpte_updatepp(slot, rflags, vpn,
+					   psize, ssize, local);
+		/*
+		 * We failed to update, try to insert a new entry.
+		 */
+		if (ret == -1) {
+			/*
+			 * large pte is marked busy, so we can be sure
+			 * nobody is looking at hpte_slot_array. hence we can
+			 * safely update this here.
+			 */
+			hpte_slot_array[index] = 0;
+			valid = 0;
+		}
+	}
+
+	if (likely(!valid)) {
+		unsigned long hpte_group;
+
+		/* insert new entry */
+		pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT;
+repeat:
+		hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
+
+		/* clear the busy bits and set the hash pte bits */
+		new_pmd = (new_pmd & ~PMD_HUGE_HPTEFLAGS) | PMD_HUGE_HASHPTE;
+
+		/*
+		 * WIMG bits.
+		 * We always have _PAGE_COHERENT enabled for system RAM
+		 */
+		rflags |= _PAGE_COHERENT;
+
+		if (new_pmd & PMD_HUGE_SAO)
+			rflags |= _PAGE_SAO;
+
+		/* Insert into the hash table, primary slot */
+		slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
+					  psize, lpsize, ssize);
+		/*
+		 * Primary is full, try the secondary
+		 */
+		if (unlikely(slot == -1)) {
+			hpte_group = ((~hash & htab_hash_mask) *
+				      HPTES_PER_GROUP) & ~0x7UL;
+			slot = ppc_md.hpte_insert(hpte_group, vpn, pa,
+						  rflags, HPTE_V_SECONDARY,
+						  psize, lpsize, ssize);
+			if (slot == -1) {
+				if (mftb() & 0x1)
+					hpte_group = ((hash & htab_hash_mask) *
+						      HPTES_PER_GROUP) & ~0x7UL;
+
+				ppc_md.hpte_remove(hpte_group);
+				goto repeat;
+			}
+		}
+		/*
+		 * Hypervisor failure. Restore old pmd and return -1
+		 * similar to __hash_page_*
+		 */
+		if (unlikely(slot == -2)) {
+			*pmdp = __pmd(old_pmd);
+			hash_failure_debug(ea, access, vsid, trap, ssize,
+					   psize, lpsize, old_pmd);
+			return -1;
+		}
+		/*
+		 * large pte is marked busy, so we can be sure
+		 * nobody is looking at hpte_slot_array. hence we can
+		 * safely update this here.
+		 */
+		hpte_slot_array[index] = slot << 1 | 0x1;
+	}
+	/*
+	 * No need to use ldarx/stdcx here
+	 */
+	*pmdp = __pmd(new_pmd & ~PMD_HUGE_BUSY);
+	return 0;
+}
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 1a6de0a..7f11fa0 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -67,7 +67,8 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
 
 #define hugepd_none(hpd)	((hpd).pd == 0)
 
-pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift)
+pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
+				 unsigned *shift, unsigned int *hugepage)
 {
 	pgd_t *pg;
 	pud_t *pu;
@@ -77,6 +78,8 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
 
 	if (shift)
 		*shift = 0;
+	if (hugepage)
+		*hugepage = 0;
 
 	pg = pgdir + pgd_index(ea);
 	if (is_hugepd(pg)) {
@@ -91,12 +94,24 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
 			pm = pmd_offset(pu, ea);
 			if (is_hugepd(pm))
 				hpdp = (hugepd_t *)pm;
-			else if (!pmd_none(*pm)) {
+			else if (pmd_large(*pm)) {
+				/* THP page */
+				if (hugepage) {
+					*hugepage = 1;
+					/*
+					 * This should be ok, except for few
+					 * flags. Most of the pte and hugepage
+					 * pmd bits overlap. We don't use the
+					 * returned value as pte_t in the caller.
+					 */
+					return (pte_t *)pm;
+				} else
+					return NULL;
+			} else if (!pmd_none(*pm)) {
 				return pte_offset_kernel(pm, ea);
 			}
 		}
 	}
-
 	if (!hpdp)
 		return NULL;
 
@@ -108,7 +123,8 @@ EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte);
 
 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 {
-	return find_linux_pte_or_hugepte(mm->pgd, addr, NULL);
+	/* Only called for HugeTLB pages, hence can ignore THP */
+	return find_linux_pte_or_hugepte(mm->pgd, addr, NULL, NULL);
 }
 
 static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
@@ -613,8 +629,11 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
 	struct page *page;
 	unsigned shift;
 	unsigned long mask;
-
-	ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift);
+	/*
+	 * Transparent hugepages are handled by generic code. We can skip them
+	 * here.
+	 */
+	ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift, NULL);
 
 	/* Verify it is a huge page else bail. */
 	if (!ptep || !shift)
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index cf3ca8e..fbff062 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -557,3 +557,41 @@ void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
 }
 
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+/*
+ * find_linux_pte returns the address of a linux pte for a given
+ * effective address and directory.  If not found, it returns zero.
+ */
+pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea, unsigned int *hugepage)
+{
+	pgd_t *pg;
+	pud_t *pu;
+	pmd_t *pm;
+	pte_t *pt = NULL;
+
+	if (hugepage)
+		*hugepage = 0;
+	pg = pgdir + pgd_index(ea);
+	if (!pgd_none(*pg)) {
+		pu = pud_offset(pg, ea);
+		if (!pud_none(*pu)) {
+			pm = pmd_offset(pu, ea);
+			if (pmd_large(*pm)) {
+				/* THP page */
+				if (hugepage) {
+					*hugepage = 1;
+					/*
+					 * This should be ok, except for few
+					 * flags. Most of the pte and hugepage
+					 * pmd bits overlap. We don't use the
+					 * returned value as pte_t in the caller.
+					 */
+					return (pte_t *)pm;
+				} else
+					return NULL;
+			} else if (pmd_present(*pm))
+				pt = pte_offset_kernel(pm, ea);
+		}
+	}
+	return pt;
+}
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 023ec8a..be0066f 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -206,7 +206,10 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
 	local_irq_save(flags);
 	arch_enter_lazy_mmu_mode();
 	for (; start < end; start += PAGE_SIZE) {
-		pte_t *ptep = find_linux_pte(mm->pgd, start);
+		/*
+		 * We won't find hugepages here.
+		 */
+		pte_t *ptep = find_linux_pte(mm->pgd, start, NULL);
 		unsigned long pte;
 
 		if (ptep == NULL)
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index 74d1e78..578cac7 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -125,7 +125,7 @@ static int read_user_stack_slow(void __user *ptr, void *ret, int nb)
 	if (!pgdir)
 		return -EFAULT;
 
-	ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift);
+	ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift, NULL);
 	if (!shift)
 		shift = PAGE_SHIFT;
 
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 9a04322..44c931a 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -261,7 +261,10 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
 	pte_t *ptep;
 	unsigned long pa;
 
-	ptep = find_linux_pte(init_mm.pgd, token);
+	/*
+	 * We won't find hugepages here
+	 */
+	ptep = find_linux_pte(init_mm.pgd, token, NULL);
 	if (!ptep)
 		return token;
 	pa = pte_pfn(*ptep) << PAGE_SHIFT;
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V3 10/25] powerpc: Return all the valid pte ecndoing in KVM_PPC_GET_SMMU_INFO ioctl
From: Aneesh Kumar K.V @ 2013-03-15  9:39 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1363340407-22619-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/kvm/book3s_hv.c |   14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 48f6d99..f472414 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1508,14 +1508,24 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
 static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
 				     int linux_psize)
 {
+	int i, index = 0;
 	struct mmu_psize_def *def = &mmu_psize_defs[linux_psize];
 
 	if (!def->shift)
 		return;
 	(*sps)->page_shift = def->shift;
 	(*sps)->slb_enc = def->sllp;
-	(*sps)->enc[0].page_shift = def->shift;
-	(*sps)->enc[0].pte_enc = def->penc[linux_psize];
+	for (i = 0; i < MMU_PAGE_COUNT; i++) {
+		if (def->penc[i] != -1) {
+			if (index >= KVM_PPC_PAGE_SIZES_MAX_SZ) {
+				WARN_ON(1);
+				break;
+			}
+			(*sps)->enc[index].page_shift = mmu_psize_defs[i].shift;
+			(*sps)->enc[index].pte_enc = def->penc[i];
+			index++;
+		}
+	}
 	(*sps)++;
 }
 
-- 
1.7.10

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox