Linux-ARM-Kernel Archive on lore.kernel.org

Linux-ARM-Kernel Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v2 5/8] ARM: sun9i: mcpm: Support CPU/cluster power down and hotplugging for cpu1~7
From: Chen-Yu Tsai @ 2018-01-04 14:37 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180104143754.2425-1-wens@csie.org>

This patch adds common code used to power down all cores and clusters.
The code is quite long. The common MCPM library does not provide a
callback for doing the actual power down sequence. Instead it assumes
some other part (maybe a power management coprocessor) will handle it.
Since our platform does not have this, we resort to using a single thread
workqueue, based on how our work should be done in the order they were
queued, so the cluster power down code does not execute before the core
power down code. Though the scenario is not catastrophic, it will leave
the cluster on and using power.

This might be a bit racy, as nothing prevents the system from bringing a
core or cluster back before the asynchronous work shuts it down. This would
likely happen under a heavily loaded system with a scheduler that brings
cores in and out of the system frequently. It would either result in a
stall on a single core, or worse, hang the system if a cluster is abruptly
turned off. In simple use-cases it performs OK.

The primary core (cpu0) requires setting flags to have the BROM bounce
execution to the SMP software entry code. This is done in a subsequent
patch to keep the changes cleanly separated.

Signed-off-by: Chen-Yu Tsai <wens@csie.org>
---
 arch/arm/mach-sunxi/mcpm.c | 170 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 165 insertions(+), 5 deletions(-)

diff --git a/arch/arm/mach-sunxi/mcpm.c b/arch/arm/mach-sunxi/mcpm.c
index 30719998f3f0..ddc26b5fec48 100644
--- a/arch/arm/mach-sunxi/mcpm.c
+++ b/arch/arm/mach-sunxi/mcpm.c
@@ -12,9 +12,12 @@
 #include <linux/arm-cci.h>
 #include <linux/delay.h>
 #include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/irqchip/arm-gic.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
+#include <linux/workqueue.h>
 
 #include <asm/cputype.h>
 #include <asm/cp15.h>
@@ -30,6 +33,9 @@
 #define CPUCFG_CX_CTRL_REG0_L2_RST_DISABLE_A15	BIT(0)
 #define CPUCFG_CX_CTRL_REG1(c)		(0x10 * (c) + 0x4)
 #define CPUCFG_CX_CTRL_REG1_ACINACTM	BIT(0)
+#define CPUCFG_CX_STATUS(c)		(0x30 + 0x4 * (c))
+#define CPUCFG_CX_STATUS_STANDBYWFI(n)	BIT(16 + (n))
+#define CPUCFG_CX_STATUS_STANDBYWFIL2	BIT(0)
 #define CPUCFG_CX_RST_CTRL(c)		(0x80 + 0x4 * (c))
 #define CPUCFG_CX_RST_CTRL_DBG_SOC_RST	BIT(24)
 #define CPUCFG_CX_RST_CTRL_ETM_RST(n)	BIT(20 + (n))
@@ -237,6 +243,30 @@ static int sunxi_cluster_powerup(unsigned int cluster)
 	return 0;
 }
 
+struct sunxi_mcpm_work {
+	struct work_struct work;
+	unsigned int cpu;
+	unsigned int cluster;
+};
+
+static struct workqueue_struct *sunxi_mcpm_wq;
+static struct sunxi_mcpm_work sunxi_mcpm_cpu_down_work[2][4];
+static struct sunxi_mcpm_work sunxi_mcpm_cluster_down_work[2];
+
+static void sunxi_cpu_powerdown_prepare(unsigned int cpu, unsigned int cluster)
+{
+	gic_cpu_if_down(0);
+
+	queue_work(sunxi_mcpm_wq,
+		   &sunxi_mcpm_cpu_down_work[cluster][cpu].work);
+}
+
+static void sunxi_cluster_powerdown_prepare(unsigned int cluster)
+{
+	queue_work(sunxi_mcpm_wq,
+		   &sunxi_mcpm_cluster_down_work[cluster].work);
+}
+
 static void sunxi_cpu_cache_disable(void)
 {
 	/* Disable and flush the local CPU cache. */
@@ -286,11 +316,116 @@ static void sunxi_cluster_cache_disable(void)
 	writel(reg, cpucfg_base + CPUCFG_CX_CTRL_REG1(cluster));
 }
 
+static int sunxi_do_cpu_powerdown(unsigned int cpu, unsigned int cluster)
+{
+	u32 reg;
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	if (cpu >= SUNXI_CPUS_PER_CLUSTER || cluster >= SUNXI_NR_CLUSTERS)
+		return -EINVAL;
+
+	/* gate processor power */
+	reg = readl(prcm_base + PRCM_PWROFF_GATING_REG(cluster));
+	reg |= PRCM_PWROFF_GATING_REG_CORE(cpu);
+	writel(reg, prcm_base + PRCM_PWROFF_GATING_REG(cluster));
+	udelay(20);
+
+	/* close power switch */
+	sunxi_cpu_power_switch_set(cpu, cluster, false);
+
+	return 0;
+}
+
+static int sunxi_do_cluster_powerdown(unsigned int cluster)
+{
+	u32 reg;
+
+	pr_debug("%s: cluster %u\n", __func__, cluster);
+	if (cluster >= SUNXI_NR_CLUSTERS)
+		return -EINVAL;
+
+	/* assert cluster resets */
+	pr_debug("%s: assert cluster reset\n", __func__);
+	reg = readl(cpucfg_base + CPUCFG_CX_RST_CTRL(cluster));
+	reg &= ~CPUCFG_CX_RST_CTRL_DBG_SOC_RST;
+	reg &= ~CPUCFG_CX_RST_CTRL_H_RST;
+	reg &= ~CPUCFG_CX_RST_CTRL_L2_RST;
+	writel(reg, cpucfg_base + CPUCFG_CX_RST_CTRL(cluster));
+
+	/* gate cluster power */
+	pr_debug("%s: gate cluster power\n", __func__);
+	reg = readl(prcm_base + PRCM_PWROFF_GATING_REG(cluster));
+	reg |= PRCM_PWROFF_GATING_REG_CLUSTER;
+	writel(reg, prcm_base + PRCM_PWROFF_GATING_REG(cluster));
+	udelay(20);
+
+	return 0;
+}
+
+static struct sunxi_mcpm_work *to_sunxi_mcpm_work(struct work_struct *work)
+{
+	return container_of(work, struct sunxi_mcpm_work, work);
+}
+
+/* async. work functions to bring down cpus and clusters */
+static void sunxi_cpu_powerdown(struct work_struct *_work)
+{
+	struct sunxi_mcpm_work *work = to_sunxi_mcpm_work(_work);
+	unsigned int cluster = work->cluster, cpu = work->cpu;
+	int ret;
+	u32 reg;
+
+	/* wait for CPU core to enter WFI */
+	ret = readl_poll_timeout(cpucfg_base + CPUCFG_CX_STATUS(cluster), reg,
+				 reg & CPUCFG_CX_STATUS_STANDBYWFI(cpu),
+				 1000, 100000);
+
+	if (ret)
+		return;
+
+	/* power down CPU core */
+	sunxi_do_cpu_powerdown(cpu, cluster);
+}
+
+static void sunxi_cluster_powerdown(struct work_struct *_work)
+{
+	struct sunxi_mcpm_work *work = to_sunxi_mcpm_work(_work);
+	unsigned int cluster = work->cluster;
+	int ret;
+	u32 reg;
+
+	pr_debug("%s: cluster %u\n", __func__, cluster);
+
+	/* wait for cluster L2 WFI */
+	ret = readl_poll_timeout(cpucfg_base + CPUCFG_CX_STATUS(cluster), reg,
+				 reg & CPUCFG_CX_STATUS_STANDBYWFIL2,
+				 1000, 100000);
+	if (ret)
+		return;
+
+	sunxi_do_cluster_powerdown(cluster);
+}
+
+static int sunxi_wait_for_powerdown(unsigned int cpu, unsigned int cluster)
+{
+	int ret;
+	u32 reg;
+
+	ret = readl_poll_timeout(prcm_base + PRCM_PWR_SWITCH_REG(cluster, cpu),
+				 reg, reg == 0xff, 1000, 100000);
+	pr_debug("%s: cpu %u cluster %u powerdown: %s\n", __func__,
+		 cpu, cluster, ret ? "timed out" : "done");
+	return ret;
+}
+
 static const struct mcpm_platform_ops sunxi_power_ops = {
-	.cpu_powerup		= sunxi_cpu_powerup,
-	.cluster_powerup	= sunxi_cluster_powerup,
-	.cpu_cache_disable	= sunxi_cpu_cache_disable,
-	.cluster_cache_disable	= sunxi_cluster_cache_disable,
+	.cpu_powerup		   = sunxi_cpu_powerup,
+	.cpu_powerdown_prepare	   = sunxi_cpu_powerdown_prepare,
+	.cluster_powerup	   = sunxi_cluster_powerup,
+	.cluster_powerdown_prepare = sunxi_cluster_powerdown_prepare,
+	.cpu_cache_disable	   = sunxi_cpu_cache_disable,
+	.cluster_cache_disable	   = sunxi_cluster_cache_disable,
+	.wait_for_powerdown	   = sunxi_wait_for_powerdown,
 };
 
 /*
@@ -352,6 +487,7 @@ static int __init sunxi_mcpm_init(void)
 	struct device_node *cpucfg_node, *node;
 	struct resource res;
 	int ret;
+	int i, j;
 
 	if (!of_machine_is_compatible("allwinner,sun9i-a80"))
 		return -ENODEV;
@@ -389,6 +525,28 @@ static int __init sunxi_mcpm_init(void)
 		goto err_put_cpucfg_node;
 	}
 
+	/* Initialize our strictly ordered workqueue */
+	sunxi_mcpm_wq = alloc_ordered_workqueue("%s", 0, "sunxi-mcpm");
+	if (!sunxi_mcpm_wq) {
+		ret = -ENOMEM;
+		pr_err("%s: failed to create our workqueue\n", __func__);
+		goto err_unmap_release_cpucfg;
+	}
+
+	/* Initialize power down work */
+	for (i = 0; i < SUNXI_NR_CLUSTERS; i++) {
+		for (j = 0; j < SUNXI_CPUS_PER_CLUSTER; j++) {
+			sunxi_mcpm_cpu_down_work[i][j].cluster = i;
+			sunxi_mcpm_cpu_down_work[i][j].cpu = j;
+			INIT_WORK(&sunxi_mcpm_cpu_down_work[i][j].work,
+				  sunxi_cpu_powerdown);
+		}
+
+		sunxi_mcpm_cluster_down_work[i].cluster = i;
+		INIT_WORK(&sunxi_mcpm_cluster_down_work[i].work,
+			  sunxi_cluster_powerdown);
+	}
+
 	ret = mcpm_platform_register(&sunxi_power_ops);
 	if (!ret)
 		ret = mcpm_sync_init(sunxi_power_up_setup);
@@ -396,7 +554,7 @@ static int __init sunxi_mcpm_init(void)
 		/* do not disable AXI master as no one will re-enable it */
 		ret = mcpm_loopback(sunxi_cluster_cache_disable_without_axi);
 	if (ret)
-		goto err_unmap_release_cpucfg;
+		goto err_destroy_workqueue;
 
 	/* We don't need the CPUCFG device node anymore */
 	of_node_put(cpucfg_node);
@@ -411,6 +569,8 @@ static int __init sunxi_mcpm_init(void)
 
 	return ret;
 
+err_destroy_workqueue:
+	destroy_workqueue(sunxi_mcpm_wq);
 err_unmap_release_cpucfg:
 	iounmap(cpucfg_base);
 	of_address_to_resource(cpucfg_node, 0, &res);
-- 
2.15.1

^ permalink raw reply related

* [PATCH v2 6/8] dt-bindings: ARM: sunxi: Document A80 SoC secure SRAM usage by SMP hotplug
From: Chen-Yu Tsai @ 2018-01-04 14:37 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180104143754.2425-1-wens@csie.org>

On the Allwinner A80 SoC the BROM supports hotplugging the primary core
(cpu0) by checking two 32bit values at a specific location within the
secure SRAM block. This region needs to be reserved and accessible to
the SMP code.

Document its usage.

Signed-off-by: Chen-Yu Tsai <wens@csie.org>
---
 .../devicetree/bindings/arm/sunxi/smp-sram.txt     | 44 ++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/arm/sunxi/smp-sram.txt

diff --git a/Documentation/devicetree/bindings/arm/sunxi/smp-sram.txt b/Documentation/devicetree/bindings/arm/sunxi/smp-sram.txt
new file mode 100644
index 000000000000..082e6a9382d3
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/sunxi/smp-sram.txt
@@ -0,0 +1,44 @@
+Allwinner SRAM for smp bringup:
+------------------------------------------------
+
+Allwinner's A80 SoC uses part of the secure sram for hotplugging of the
+primary core (cpu0). Once the core gets powered up it checks if a magic
+value is set at a specific location. If it is then the BROM will jump
+to the software entry address, instead of executing a standard boot.
+
+Therefore a reserved section sub-node has to be added to the mmio-sram
+declaration.
+
+Note that this is separate from the Allwinner SRAM controller found in
+../../sram/sunxi-sram.txt. This SRAM is secure only and not mappable to
+any device.
+
+Also there are no "secure-only" properties. The implementation should
+check if this SRAM is usable first.
+
+Required sub-node properties:
+- compatible : depending on the SoC this should be one of:
+		"allwinner,sun9i-a80-smp-sram"
+
+The rest of the properties should follow the generic mmio-sram discription
+found in ../../misc/sram.txt
+
+Example:
+
+	sram_b: sram at 20000 {
+		/* 256 KiB secure SRAM at 0x20000 */
+		compatible = "mmio-sram";
+		reg = <0x00020000 0x40000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges = <0 0x00020000 0x40000>;
+
+		smp-sram at 1000 {
+			/*
+			 * This is checked by BROM to determine if
+			 * cpu0 should jump to SMP entry vector
+			 */
+			compatible = "allwinner,sun9i-a80-smp-sram";
+			reg = <0x1000 0x8>;
+		};
+	};
-- 
2.15.1

^ permalink raw reply related

* [PATCH v2 7/8] ARM: sun9i: mcpm: Support cpu0 hotplug
From: Chen-Yu Tsai @ 2018-01-04 14:37 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180104143754.2425-1-wens@csie.org>

The BROM has a branch that checks if the primary core is hotplugging.
If the magic flag is set, execution jumps to the address set in the
software entry register. (Secondary cores always branch to the that
address.)

This patch sets the flags that makes BROM jump execution on the
primary core (cpu0) to the SMP software entry code when the core is
powered back up. After it is re-integrated into the system, the flag
is cleared.

Signed-off-by: Chen-Yu Tsai <wens@csie.org>
---
 arch/arm/mach-sunxi/mcpm.c | 54 +++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 51 insertions(+), 3 deletions(-)

diff --git a/arch/arm/mach-sunxi/mcpm.c b/arch/arm/mach-sunxi/mcpm.c
index ddc26b5fec48..c8d74c783644 100644
--- a/arch/arm/mach-sunxi/mcpm.c
+++ b/arch/arm/mach-sunxi/mcpm.c
@@ -56,8 +56,12 @@
 #define PRCM_PWR_SWITCH_REG(c, cpu)	(0x140 + 0x10 * (c) + 0x4 * (cpu))
 #define PRCM_CPU_SOFT_ENTRY_REG		0x164
 
+#define CPU0_SUPPORT_HOTPLUG_MAGIC0	0xFA50392F
+#define CPU0_SUPPORT_HOTPLUG_MAGIC1	0x790DCA3A
+
 static void __iomem *cpucfg_base;
 static void __iomem *prcm_base;
+static void __iomem *sram_b_smp_base;
 
 static bool sunxi_core_is_cortex_a15(unsigned int core, unsigned int cluster)
 {
@@ -116,6 +120,17 @@ static int sunxi_cpu_power_switch_set(unsigned int cpu, unsigned int cluster,
 	return 0;
 }
 
+static void sunxi_cpu0_hotplug_support_set(bool enable)
+{
+	if (enable) {
+		writel(CPU0_SUPPORT_HOTPLUG_MAGIC0, sram_b_smp_base);
+		writel(CPU0_SUPPORT_HOTPLUG_MAGIC1, sram_b_smp_base + 0x4);
+	} else {
+		writel(0x0, sram_b_smp_base);
+		writel(0x0, sram_b_smp_base + 0x4);
+	}
+}
+
 static int sunxi_cpu_powerup(unsigned int cpu, unsigned int cluster)
 {
 	u32 reg;
@@ -124,6 +139,10 @@ static int sunxi_cpu_powerup(unsigned int cpu, unsigned int cluster)
 	if (cpu >= SUNXI_CPUS_PER_CLUSTER || cluster >= SUNXI_NR_CLUSTERS)
 		return -EINVAL;
 
+	/* Set hotplug support magic flags for cpu0 */
+	if (cluster == 0 && cpu == 0)
+		sunxi_cpu0_hotplug_support_set(true);
+
 	/* assert processor power-on reset */
 	reg = readl(prcm_base + PRCM_CPU_PO_RST_CTRL(cluster));
 	reg &= ~PRCM_CPU_PO_RST_CTRL_CORE(cpu);
@@ -406,6 +425,13 @@ static void sunxi_cluster_powerdown(struct work_struct *_work)
 	sunxi_do_cluster_powerdown(cluster);
 }
 
+static void sunxi_cpu_is_up(unsigned int cpu, unsigned int cluster)
+{
+	/* Clear hotplug support magic flags for cpu0 */
+	if (cluster == 0 && cpu == 0)
+		sunxi_cpu0_hotplug_support_set(false);
+}
+
 static int sunxi_wait_for_powerdown(unsigned int cpu, unsigned int cluster)
 {
 	int ret;
@@ -425,6 +451,7 @@ static const struct mcpm_platform_ops sunxi_power_ops = {
 	.cluster_powerdown_prepare = sunxi_cluster_powerdown_prepare,
 	.cpu_cache_disable	   = sunxi_cpu_cache_disable,
 	.cluster_cache_disable	   = sunxi_cluster_cache_disable,
+	.cpu_is_up		   = sunxi_cpu_is_up,
 	.wait_for_powerdown	   = sunxi_wait_for_powerdown,
 };
 
@@ -484,7 +511,7 @@ static void sunxi_mcpm_setup_entry_point(void)
 
 static int __init sunxi_mcpm_init(void)
 {
-	struct device_node *cpucfg_node, *node;
+	struct device_node *cpucfg_node, *sram_node, *node;
 	struct resource res;
 	int ret;
 	int i, j;
@@ -525,12 +552,26 @@ static int __init sunxi_mcpm_init(void)
 		goto err_put_cpucfg_node;
 	}
 
+	sram_node = of_find_compatible_node(NULL, NULL,
+					    "allwinner,sun9i-a80-smp-sram");
+	if (!sram_node) {
+		ret = -ENODEV;
+		goto err_unmap_release_cpucfg;
+	}
+
+	sram_b_smp_base = of_io_request_and_map(sram_node, 0, "sunxi-mcpm");
+	if (IS_ERR(sram_b_smp_base)) {
+		ret = PTR_ERR(sram_b_smp_base);
+		pr_err("%s: failed to map secure SRAM\n", __func__);
+		goto err_put_sram_node;
+	}
+
 	/* Initialize our strictly ordered workqueue */
 	sunxi_mcpm_wq = alloc_ordered_workqueue("%s", 0, "sunxi-mcpm");
 	if (!sunxi_mcpm_wq) {
 		ret = -ENOMEM;
 		pr_err("%s: failed to create our workqueue\n", __func__);
-		goto err_unmap_release_cpucfg;
+		goto err_unmap_release_secure_sram;
 	}
 
 	/* Initialize power down work */
@@ -556,8 +597,9 @@ static int __init sunxi_mcpm_init(void)
 	if (ret)
 		goto err_destroy_workqueue;
 
-	/* We don't need the CPUCFG device node anymore */
+	/* We don't need the CPUCFG and SRAM device nodes anymore */
 	of_node_put(cpucfg_node);
+	of_node_put(sram_node);
 
 	/* Set the hardware entry point address */
 	sunxi_mcpm_setup_entry_point();
@@ -571,6 +613,12 @@ static int __init sunxi_mcpm_init(void)
 
 err_destroy_workqueue:
 	destroy_workqueue(sunxi_mcpm_wq);
+err_unmap_release_secure_sram:
+	iounmap(sram_b_smp_base);
+	of_address_to_resource(sram_node, 0, &res);
+	release_mem_region(res.start, resource_size(&res));
+err_put_sram_node:
+	of_node_put(sram_node);
 err_unmap_release_cpucfg:
 	iounmap(cpucfg_base);
 	of_address_to_resource(cpucfg_node, 0, &res);
-- 
2.15.1

^ permalink raw reply related

* [PATCH v2 8/8] ARM: dts: sun9i: Add secure SRAM node used for MCPM SMP hotplug
From: Chen-Yu Tsai @ 2018-01-04 14:37 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180104143754.2425-1-wens@csie.org>

The A80 stores some magic flags in a portion of the secure SRAM. The
BROM jumps directly to the software entry point set by the SMP code
if the flags are set. This is required for CPU0 hotplugging.

Signed-off-by: Chen-Yu Tsai <wens@csie.org>
---
 arch/arm/boot/dts/sun9i-a80.dtsi | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/arch/arm/boot/dts/sun9i-a80.dtsi b/arch/arm/boot/dts/sun9i-a80.dtsi
index bf4d40e8359f..b1c86b76ac3c 100644
--- a/arch/arm/boot/dts/sun9i-a80.dtsi
+++ b/arch/arm/boot/dts/sun9i-a80.dtsi
@@ -250,6 +250,25 @@
 		 */
 		ranges = <0 0 0 0x20000000>;
 
+		sram_b: sram at 20000 {
+			/* 256 KiB secure SRAM at 0x20000 */
+			compatible = "mmio-sram";
+			reg = <0x00020000 0x40000>;
+
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0 0x00020000 0x40000>;
+
+			smp-sram at 1000 {
+				/*
+				 * This is checked by BROM to determine if
+				 * cpu0 should jump to SMP entry vector
+				 */
+				compatible = "allwinner,sun9i-a80-smp-sram";
+				reg = <0x1000 0x8>;
+			};
+		};
+
 		ehci0: usb at a00000 {
 			compatible = "allwinner,sun9i-a80-ehci", "generic-ehci";
 			reg = <0x00a00000 0x100>;
-- 
2.15.1

^ permalink raw reply related

* [PATCH] fbdev: arm64 use __raw I/O memory api
From: Bartlomiej Zolnierkiewicz @ 2018-01-04 14:40 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1512029842-10939-1-git-send-email-ji.zhang@mediatek.com>

On Thursday, November 30, 2017 04:17:22 PM Ji Zhang wrote:
> Since arm64 also has __raw I/O accessors, add __aarch64__ in fb.h.
> This is a supplement for commmit
> 981409b25e2a99409b26daa67293ca1cfd5ea0a0
> 
> Signed-off-by: Ji Zhang <ji.zhang@mediatek.com>

Patch queued for 4.16, thanks.

Best regards,
--
Bartlomiej Zolnierkiewicz
Samsung R&D Institute Poland
Samsung Electronics

^ permalink raw reply

* [PATCH 01/11] clk: sunxi-ng: Don't set k if width is 0 for nkmp plls
From: Chen-Yu Tsai @ 2018-01-04 14:45 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20171230210203.24115-2-jernej.skrabec@siol.net>

On Sun, Dec 31, 2017 at 5:01 AM, Jernej Skrabec <jernej.skrabec@siol.net> wrote:
> For example, A83T have nmp plls which are modelled as nkmp plls. Since k
> is not specified, it has offset 0, shift 0 and lowest value 1. This
> means that LSB bit is always set to 1, which may change clock rate.
>
> Fix that by applying k factor only if k width is greater than 0.
>
> Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net>
> ---
>  drivers/clk/sunxi-ng/ccu_nkmp.c | 21 +++++++++++++--------
>  1 file changed, 13 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/clk/sunxi-ng/ccu_nkmp.c b/drivers/clk/sunxi-ng/ccu_nkmp.c
> index e58c95787f94..709f528af2b3 100644
> --- a/drivers/clk/sunxi-ng/ccu_nkmp.c
> +++ b/drivers/clk/sunxi-ng/ccu_nkmp.c
> @@ -81,7 +81,7 @@ static unsigned long ccu_nkmp_recalc_rate(struct clk_hw *hw,
>                                         unsigned long parent_rate)
>  {
>         struct ccu_nkmp *nkmp = hw_to_ccu_nkmp(hw);
> -       unsigned long n, m, k, p;
> +       unsigned long n, m, k = 1, p;
>         u32 reg;
>
>         reg = readl(nkmp->common.base + nkmp->common.reg);
> @@ -92,11 +92,13 @@ static unsigned long ccu_nkmp_recalc_rate(struct clk_hw *hw,
>         if (!n)
>                 n++;
>
> -       k = reg >> nkmp->k.shift;
> -       k &= (1 << nkmp->k.width) - 1;
> -       k += nkmp->k.offset;
> -       if (!k)
> -               k++;
> +       if (nkmp->k.width) {
> +               k = reg >> nkmp->k.shift;
> +               k &= (1 << nkmp->k.width) - 1;
> +               k += nkmp->k.offset;
> +               if (!k)
> +                       k++;
> +       }

The conditional shouldn't be necessary. With nkmp->k.width = 0,
you'd simply get k & 0, which is 0, which then gets bumped up to 1,
unless k.offset > 1, which would be a bug.

>
>         m = reg >> nkmp->m.shift;
>         m &= (1 << nkmp->m.width) - 1;
> @@ -153,12 +155,15 @@ static int ccu_nkmp_set_rate(struct clk_hw *hw, unsigned long rate,
>
>         reg = readl(nkmp->common.base + nkmp->common.reg);
>         reg &= ~GENMASK(nkmp->n.width + nkmp->n.shift - 1, nkmp->n.shift);
> -       reg &= ~GENMASK(nkmp->k.width + nkmp->k.shift - 1, nkmp->k.shift);
> +       if (nkmp->k.width)
> +               reg &= ~GENMASK(nkmp->k.width + nkmp->k.shift - 1,
> +                               nkmp->k.shift);
>         reg &= ~GENMASK(nkmp->m.width + nkmp->m.shift - 1, nkmp->m.shift);
>         reg &= ~GENMASK(nkmp->p.width + nkmp->p.shift - 1, nkmp->p.shift);
>
>         reg |= (_nkmp.n - nkmp->n.offset) << nkmp->n.shift;
> -       reg |= (_nkmp.k - nkmp->k.offset) << nkmp->k.shift;
> +       if (nkmp->k.width)
> +               reg |= (_nkmp.k - nkmp->k.offset) << nkmp->k.shift;

I think a better way would be

        reg |= ((_nkmp.k - nkmp->k.offset) << nkmp->k.shift) &
               GENMASK(nkmp->k.width + nkmp->k.shift - 1, nkmp->k.shift);

And do this for all the factors, not just k. This pattern is what
regmap_update_bits does, which seems much safer. I wonder what
GENMASK() with a negative value would do though...

ChenYu

>         reg |= (_nkmp.m - nkmp->m.offset) << nkmp->m.shift;
>         reg |= ilog2(_nkmp.p) << nkmp->p.shift;
>
> --
> 2.15.1
>

^ permalink raw reply

* [PATCH v2 10/12] drm/sun4i: backend: Use runtime_pm variant of atomic_commit_tail
From: Chen-Yu Tsai @ 2018-01-04 14:50 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <dc22ddb2833718f05a761cb35a4d5416c93fa7c6.1513609024.git-series.maxime.ripard@free-electrons.com>

On Mon, Dec 18, 2017 at 10:57 PM, Maxime Ripard
<maxime.ripard@free-electrons.com> wrote:
> During a hardware commit, the commit bit in the backend will only be
> cleared if the TCON is enabled. Use the runtime_pm variant of the
> atomic_commit_tail hook that makes sure that the CRTC, our TCON, is enabled
> when we perform an atomic_commit.
>
> Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>

Reviewed-by: Chen-Yu Tsai <wens@csie.org>

^ permalink raw reply

* [PATCH v2 11/12] drm/sun4i: backend: Make sure we don't have a commit pending
From: Chen-Yu Tsai @ 2018-01-04 14:50 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <b25003511097304444302ba56d26ee9e47966b33.1513609024.git-series.maxime.ripard@free-electrons.com>

On Mon, Dec 18, 2017 at 10:57 PM, Maxime Ripard
<maxime.ripard@free-electrons.com> wrote:
> If we try to read the backend registers while it fetches the new values, we
> end up with the value of some random register instead of the one we asked
> for.
>
> In order to prevent that, let's make sure that the very first thing we do
> during our atomic modesetting is to let the commit bit come to a rest.
>
> We don't have to worry about anything else since the only time we will
> trigger a new transaction is during the atomic_commit which comes much
> later.
>
> Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>

Reviewed-by: Chen-Yu Tsai <wens@csie.org>

^ permalink raw reply

* [PATCH v2 12/12] ARM: dts: sun8i: a33 Enable our display frontend
From: Chen-Yu Tsai @ 2018-01-04 14:51 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <7e28da6c07bf90e9fb0afd14975d48b7d204e3a1.1513609024.git-series.maxime.ripard@free-electrons.com>

On Mon, Dec 18, 2017 at 10:57 PM, Maxime Ripard
<maxime.ripard@free-electrons.com> wrote:
> The display frontend can be used to do hardware scaling, colorspaces
> conversion or to implement the buffer format output by the Cedar VPU.
>
> Since we're starting to have some support for it in the DRM driver, let's
> enable its DT node.
>
> Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>

Acked-by: Chen-Yu Tsai <wens@csie.org>

^ permalink raw reply

* [PATCH v2 0/8] ARM: sun9i: SMP support with Multi-Cluster Power Management
From: Maxime Ripard @ 2018-01-04 14:58 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180104143754.2425-1-wens@csie.org>

On Thu, Jan 04, 2018 at 10:37:46PM +0800, Chen-Yu Tsai wrote:
> This is v2 of my sun9i SMP support with MCPM series which was started
> over two years ago [1]. We've tried to implement PSCI for both the A80
> and A83T. Results were not promising. The issue is that these two chips
> have a broken security extensions implementation. If a specific bit is
> not burned in its e-fuse, most if not all security protections don't
> work [2]. Even worse, non-secure access to the GIC become secure. This
> requires a crazy workaround in the GIC driver which probably doesn't work
> in all cases [3].
> 
> Nicolas mentioned that the MCPM framework is likely overkill in our
> case [4]. However the framework does provide cluster/core state tracking
> and proper sequencing of cache related operations. We could rework
> the code to use standard smp_ops, but I would like to actually get
> a working version in first.
> 
> Much of the sunxi-specific MCPM code is derived from Allwinner code and
> documentation, with some references to the other MCPM implementations,
> as well as the Cortex's Technical Reference Manuals for the power
> sequencing info.
> 
> One major difference compared to other platforms is we currently do not
> have a standalone PMU or other embedded firmware to do the actually power
> sequencing. All power/reset control is done by the kernel. Nicolas
> mentioned that a new optional callback should be added in cases where the
> kernel has to do the actual power down [5]. For now however I'm using a
> dedicated single thread workqueue. CPU and cluster power off work is
> queued from the .{cpu,cluster}_powerdown_prepare callbacks. This solution
> is somewhat heavy, as I have a total of 10 static work structs. It might
> also be a bit racy, as nothing prevents the system from bringing a core
> back before the asynchronous work shuts it down. This would likely
> happen under a heavily loaded system with a scheduler that brings cores
> in and out of the system frequently. In simple use-cases it performs OK.

It all looks sane to me
Acked-by: Maxime Ripard <maxime.ripard@free-electrons.com>

Maxime

-- 
Maxime Ripard, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20180104/8ea853e2/attachment.sig>

^ permalink raw reply

* [PATCH 1/2] ARM: dts: sunxi: Switch MMC nodes away from cd-inverted property
From: Maxime Ripard @ 2018-01-04 15:03 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20171222205738.2389-1-tuomas@tuxera.com>

On Fri, Dec 22, 2017 at 10:57:37PM +0200, Tuomas Tynkkynen wrote:
> Using the cd-inverted property is not useful when GPIOs are used as card
> detects since the polarity can be specified with the usual
> GPIO_ACTIVE_(HIGH|LOW) GPIO flags. It has also caused confusion for
> U-Boot developers, so migrate all sunxi boards away from cd-inverted.
> 
> Signed-off-by: Tuomas Tynkkynen <tuomas@tuxera.com>

Acked-by: Maxime Ripard <maxime.ripard@free-electrons.com>

Thanks!
Maxime

-- 
Maxime Ripard, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20180104/e901dee6/attachment.sig>

^ permalink raw reply

* [PATCH 2/2] arm64: dts: sunxi: Switch MMC nodes away from cd-inverted property
From: Maxime Ripard @ 2018-01-04 15:04 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20171222205738.2389-2-tuomas@tuxera.com>

On Fri, Dec 22, 2017 at 10:57:38PM +0200, Tuomas Tynkkynen wrote:
> Using the cd-inverted property is not useful when GPIOs are used as card
> detects since the polarity can be specified with the usual
> GPIO_ACTIVE_(HIGH|LOW) GPIO flags. It has also caused confusion for
> U-Boot developers, so migrate all sunxi boards away from cd-inverted.
> 
> Signed-off-by: Tuomas Tynkkynen <tuomas@tuxera.com>

Acked-by: Maxime Ripard <maxime.ripard@free-electrons.com>

Thanks!
Maxime

-- 
Maxime Ripard, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20180104/4ed5a34e/attachment.sig>

^ permalink raw reply

* [PATCH 1/2] clk: imx: imx7d: add the snvs clock
From: Anson Huang @ 2018-01-04 15:06 UTC (permalink / raw)
  To: linux-arm-kernel

According to the i.MX7D Reference Manual,
SNVS block has a clock gate, accessing SNVS block
would need this clock gate to be enabled, add it
into clock tree so that SNVS module driver can
operate this clock gate.

Signed-off-by: Anson Huang <Anson.Huang@nxp.com>
---
 drivers/clk/imx/clk-imx7d.c             | 1 +
 include/dt-bindings/clock/imx7d-clock.h | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/clk/imx/clk-imx7d.c b/drivers/clk/imx/clk-imx7d.c
index 992938b..a284c6f 100644
--- a/drivers/clk/imx/clk-imx7d.c
+++ b/drivers/clk/imx/clk-imx7d.c
@@ -799,6 +799,7 @@ static void __init imx7d_clocks_init(struct device_node *ccm_node)
 	clks[IMX7D_DRAM_PHYM_ALT_ROOT_CLK] = imx_clk_gate4("dram_phym_alt_root_clk", "dram_phym_alt_post_div", base + 0x4130, 0);
 	clks[IMX7D_DRAM_ALT_ROOT_CLK] = imx_clk_gate4("dram_alt_root_clk", "dram_alt_post_div", base + 0x4130, 0);
 	clks[IMX7D_OCOTP_CLK] = imx_clk_gate4("ocotp_clk", "ipg_root_clk", base + 0x4230, 0);
+	clks[IMX7D_SNVS_CLK] = imx_clk_gate4("snvs_clk", "ipg_root_clk", base + 0x4250, 0);
 	clks[IMX7D_USB_HSIC_ROOT_CLK] = imx_clk_gate4("usb_hsic_root_clk", "usb_hsic_post_div", base + 0x4420, 0);
 	clks[IMX7D_SDMA_CORE_CLK] = imx_clk_gate4("sdma_root_clk", "ahb_root_clk", base + 0x4480, 0);
 	clks[IMX7D_PCIE_CTRL_ROOT_CLK] = imx_clk_gate4("pcie_ctrl_root_clk", "pcie_ctrl_post_div", base + 0x4600, 0);
diff --git a/include/dt-bindings/clock/imx7d-clock.h b/include/dt-bindings/clock/imx7d-clock.h
index e2f99ae..dc51904 100644
--- a/include/dt-bindings/clock/imx7d-clock.h
+++ b/include/dt-bindings/clock/imx7d-clock.h
@@ -452,5 +452,6 @@
 #define IMX7D_OCOTP_CLK			439
 #define IMX7D_NAND_RAWNAND_CLK		440
 #define IMX7D_NAND_USDHC_BUS_RAWNAND_CLK 441
-#define IMX7D_CLK_END			442
+#define IMX7D_SNVS_CLK			442
+#define IMX7D_CLK_END			443
 #endif /* __DT_BINDINGS_CLOCK_IMX7D_H */
-- 
1.9.1

^ permalink raw reply related

* [PATCH 2/2] ARM: dts: imx7s: add snvs rtc clock
From: Anson Huang @ 2018-01-04 15:06 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1515078402-22135-1-git-send-email-Anson.Huang@nxp.com>

Add i.MX7 SNVS RTC clock.

Signed-off-by: Anson Huang <Anson.Huang@nxp.com>
---
 arch/arm/boot/dts/imx7s.dtsi | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi
index e718fd2..f9b97f3 100644
--- a/arch/arm/boot/dts/imx7s.dtsi
+++ b/arch/arm/boot/dts/imx7s.dtsi
@@ -534,6 +534,8 @@
 					offset = <0x34>;
 					interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>,
 						     <GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>;
+					clocks = <&clks IMX7D_SNVS_CLK>;
+					clock-names = "snvs-rtc";
 				};
 
 				snvs_poweroff: snvs-poweroff {
-- 
1.9.1

^ permalink raw reply related

* [PATCH 00/11] arm64 kpti hardening and variant 2 workarounds
From: Will Deacon @ 2018-01-04 15:08 UTC (permalink / raw)
  To: linux-arm-kernel

Hi all,

This set of patches builds on top of the arm64 kpti patches[1] queued for
4.16 and further hardens the arm64 Linux kernel against the side-channel
attacks recently published by Google Project Zero.

In particular, the series does the following:

  * Enable kpti by default on arm64, based on the value of ID_AA64PFR0_EL1.CSV3

  * Prevent speculative resteering of the indirect branch in the exception
    trampoline page, which resides at a fixed virtual address to avoid a
    KASLR leak

  * Add hooks to changes of context where the branch predictor could
    theoretically resteer the speculative instruction stream having been trained
    by userspace or a guest OS. These hooks are signal delivery (to prevent
    training the branch predictor on kernel addresses from userspace),
    switch_mm (return to user if SW PAN is enabled) and exit from a guest VM.

  * Implement a dummy PSCI "VERSION" call as the hook for affected Cortex-A
    CPUs. This will invalidate the predictor state with the latest Arm Trusted
    Firmware patches which will appear at [2] and SoC vendors with affected
    CPUs are strongly encouraged to update. We plan to switch to a more
    efficient, special-purpose call when it is available and the PSCI spec
    has been updated accordingly.

I'd like to get this in for 4.16, but that doesn't mean we can't improve
it further once it's merged.

For more information about the impact of this issue and the software migitations
for Arm processors, please see http://www.arm.com/security-update.

Thanks,

Will

[1] https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git/tag/?h=kpti-base
[2] https://github.com/ARM-software/arm-trusted-firmware/wiki/ARM-Trusted-Firmware-Security-Advisory-TFV-6

--->8

Marc Zyngier (3):
  arm64: Move post_ttbr_update_workaround to C code
  arm64: KVM: Use per-CPU vector when BP hardening is enabled
  arm64: KVM: Make PSCI_VERSION a fast path

Will Deacon (8):
  arm64: use RET instruction for exiting the trampoline
  arm64: Kconfig: Reword UNMAP_KERNEL_AT_EL0 kconfig entry
  arm64: Take into account ID_AA64PFR0_EL1.CSV3
  arm64: cpufeature: Pass capability structure to ->enable callback
  drivers/firmware: Expose psci_get_version through psci_ops structure
  arm64: Add skeleton to harden the branch predictor against aliasing
    attacks
  arm64: cputype: Add missing MIDR values for Cortex-A72 and Cortex-A75
  arm64: Implement branch predictor hardening for affected Cortex-A CPUs

 arch/arm/include/asm/kvm_mmu.h     |  10 ++++
 arch/arm64/Kconfig                 |  30 +++++++---
 arch/arm64/include/asm/assembler.h |  13 -----
 arch/arm64/include/asm/cpucaps.h   |   3 +-
 arch/arm64/include/asm/cputype.h   |   4 ++
 arch/arm64/include/asm/kvm_mmu.h   |  38 ++++++++++++
 arch/arm64/include/asm/mmu.h       |  37 ++++++++++++
 arch/arm64/include/asm/sysreg.h    |   2 +
 arch/arm64/kernel/Makefile         |   4 ++
 arch/arm64/kernel/bpi.S            |  79 +++++++++++++++++++++++++
 arch/arm64/kernel/cpu_errata.c     | 116 +++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/cpufeature.c     |  12 +++-
 arch/arm64/kernel/entry.S          |   7 ++-
 arch/arm64/kvm/hyp/switch.c        |  15 ++++-
 arch/arm64/mm/context.c            |  11 ++++
 arch/arm64/mm/fault.c              |   1 +
 arch/arm64/mm/proc.S               |   3 +-
 drivers/firmware/psci.c            |   2 +
 include/linux/psci.h               |   1 +
 virt/kvm/arm/arm.c                 |   8 ++-
 20 files changed, 366 insertions(+), 30 deletions(-)
 create mode 100644 arch/arm64/kernel/bpi.S

-- 
2.1.4

^ permalink raw reply

* [PATCH 01/11] arm64: use RET instruction for exiting the trampoline
From: Will Deacon @ 2018-01-04 15:08 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1515078515-13723-1-git-send-email-will.deacon@arm.com>

Speculation attacks against the entry trampoline can potentially resteer
the speculative instruction stream through the indirect branch and into
arbitrary gadgets within the kernel.

This patch defends against these attacks by forcing a misprediction
through the return stack: a dummy BL instruction loads an entry into
the stack, so that the predicted program flow of the subsequent RET
instruction is to a branch-to-self instruction which is finally resolved
as a branch to the kernel vectors with speculation suppressed.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/entry.S | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 031392ee5f47..b9feb587294d 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -1029,6 +1029,9 @@ alternative_else_nop_endif
 	.if	\regsize == 64
 	msr	tpidrro_el0, x30	// Restored in kernel_ventry
 	.endif
+	bl	2f
+	b	.
+2:
 	tramp_map_kernel	x30
 #ifdef CONFIG_RANDOMIZE_BASE
 	adr	x30, tramp_vectors + PAGE_SIZE
@@ -1041,7 +1044,7 @@ alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003
 	msr	vbar_el1, x30
 	add	x30, x30, #(1b - tramp_vectors)
 	isb
-	br	x30
+	ret
 	.endm

 	.macro tramp_exit, regsize = 64
-- 
2.1.4

^ permalink raw reply related

* [PATCH 02/11] arm64: Kconfig: Reword UNMAP_KERNEL_AT_EL0 kconfig entry
From: Will Deacon @ 2018-01-04 15:08 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1515078515-13723-1-git-send-email-will.deacon@arm.com>

Although CONFIG_UNMAP_KERNEL_AT_EL0 does make KASLR more robust, it's
actually more useful as a mitigation against speculation attacks that
can leak arbitrary kernel data to userspace through speculation.

Reword the Kconfig help message to reflect this, and make the option
depend on EXPERT so that it is on by default for the majority of users.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/Kconfig | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 3af1657fcac3..efaaa3a66b95 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -834,15 +834,14 @@ config FORCE_MAX_ZONEORDER
 	  4M allocations matching the default size used by generic code.
 
 config UNMAP_KERNEL_AT_EL0
-	bool "Unmap kernel when running in userspace (aka \"KAISER\")"
+	bool "Unmap kernel when running in userspace (aka \"KAISER\")" if EXPERT
 	default y
 	help
-	  Some attacks against KASLR make use of the timing difference between
-	  a permission fault which could arise from a page table entry that is
-	  present in the TLB, and a translation fault which always requires a
-	  page table walk. This option defends against these attacks by unmapping
-	  the kernel whilst running in userspace, therefore forcing translation
-	  faults for all of kernel space.
+	  Speculation attacks against some high-performance processors can
+	  be used to bypass MMU permission checks and leak kernel data to
+	  userspace. This can be defended against by unmapping the kernel
+	  when running in userspace, mapping it back in on exception entry
+	  via a trampoline page in the vector table.
 
 	  If unsure, say Y.
 
-- 
2.1.4

^ permalink raw reply related

* [PATCH 03/11] arm64: Take into account ID_AA64PFR0_EL1.CSV3
From: Will Deacon @ 2018-01-04 15:08 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1515078515-13723-1-git-send-email-will.deacon@arm.com>

For non-KASLR kernels where the KPTI behaviour has not been overridden
on the command line we can use ID_AA64PFR0_EL1.CSV3 to determine whether
or not we should unmap the kernel whilst running at EL0.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/sysreg.h | 1 +
 arch/arm64/kernel/cpufeature.c  | 7 ++++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 08cc88574659..ae519bbd3f9e 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -437,6 +437,7 @@
 #define ID_AA64ISAR1_DPB_SHIFT		0
 
 /* id_aa64pfr0 */
+#define ID_AA64PFR0_CSV3_SHIFT		60
 #define ID_AA64PFR0_SVE_SHIFT		32
 #define ID_AA64PFR0_GIC_SHIFT		24
 #define ID_AA64PFR0_ASIMD_SHIFT		20
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 9f0545dfe497..e11c11bb5b02 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -145,6 +145,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0),
 	S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
@@ -851,6 +852,8 @@ static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */
 static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
 				int __unused)
 {
+	u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
+
 	/* Forced on command line? */
 	if (__kpti_forced) {
 		pr_info_once("kernel page table isolation forced %s by command line option\n",
@@ -862,7 +865,9 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
 	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
 		return true;
 
-	return false;
+	/* Defer to CPU feature registers */
+	return !cpuid_feature_extract_unsigned_field(pfr0,
+						     ID_AA64PFR0_CSV3_SHIFT);
 }
 
 static int __init parse_kpti(char *str)
-- 
2.1.4

^ permalink raw reply related

* [PATCH 04/11] arm64: cpufeature: Pass capability structure to ->enable callback
From: Will Deacon @ 2018-01-04 15:08 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1515078515-13723-1-git-send-email-will.deacon@arm.com>

In order to invoke the CPU capability ->matches callback from the ->enable
callback for applying local-CPU workarounds, we need a handle on the
capability structure.

This patch passes a pointer to the capability structure to the ->enable
callback.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/cpufeature.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index e11c11bb5b02..6133c14b9b01 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1151,7 +1151,7 @@ void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
 			 * uses an IPI, giving us a PSTATE that disappears when
 			 * we return.
 			 */
-			stop_machine(caps->enable, NULL, cpu_online_mask);
+			stop_machine(caps->enable, (void *)caps, cpu_online_mask);
 		}
 	}
 }
@@ -1194,7 +1194,7 @@ verify_local_cpu_features(const struct arm64_cpu_capabilities *caps)
 			cpu_die_early();
 		}
 		if (caps->enable)
-			caps->enable(NULL);
+			caps->enable((void *)caps);
 	}
 }
 
-- 
2.1.4

^ permalink raw reply related

* [PATCH 05/11] drivers/firmware: Expose psci_get_version through psci_ops structure
From: Will Deacon @ 2018-01-04 15:08 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1515078515-13723-1-git-send-email-will.deacon@arm.com>

Entry into recent versions of ARM Trusted Firmware will invalidate the CPU
branch predictor state in order to protect against aliasing attacks.

This patch exposes the PSCI "VERSION" function via psci_ops, so that it
can be invoked outside of the PSCI driver where necessary.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/firmware/psci.c | 2 ++
 include/linux/psci.h    | 1 +
 2 files changed, 3 insertions(+)

diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci.c
index d687ca3d5049..8b25d31e8401 100644
--- a/drivers/firmware/psci.c
+++ b/drivers/firmware/psci.c
@@ -496,6 +496,8 @@ static void __init psci_init_migrate(void)
 static void __init psci_0_2_set_functions(void)
 {
 	pr_info("Using standard PSCI v0.2 function IDs\n");
+	psci_ops.get_version = psci_get_version;
+
 	psci_function_id[PSCI_FN_CPU_SUSPEND] =
 					PSCI_FN_NATIVE(0_2, CPU_SUSPEND);
 	psci_ops.cpu_suspend = psci_cpu_suspend;
diff --git a/include/linux/psci.h b/include/linux/psci.h
index bdea1cb5e1db..6306ab10af18 100644
--- a/include/linux/psci.h
+++ b/include/linux/psci.h
@@ -26,6 +26,7 @@ int psci_cpu_init_idle(unsigned int cpu);
 int psci_cpu_suspend_enter(unsigned long index);
 
 struct psci_operations {
+	u32 (*get_version)(void);
 	int (*cpu_suspend)(u32 state, unsigned long entry_point);
 	int (*cpu_off)(u32 state);
 	int (*cpu_on)(unsigned long cpuid, unsigned long entry_point);
-- 
2.1.4

^ permalink raw reply related

* [PATCH 06/11] arm64: Move post_ttbr_update_workaround to C code
From: Will Deacon @ 2018-01-04 15:08 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1515078515-13723-1-git-send-email-will.deacon@arm.com>

From: Marc Zyngier <marc.zyngier@arm.com>

We will soon need to invoke a CPU-specific function pointer after changing
page tables, so move post_ttbr_update_workaround out into C code to make
this possible.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/assembler.h | 13 -------------
 arch/arm64/kernel/entry.S          |  2 +-
 arch/arm64/mm/context.c            |  9 +++++++++
 arch/arm64/mm/proc.S               |  3 +--
 4 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index c45bc94f15d0..cee60ce0da52 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -476,17 +476,4 @@ alternative_endif
 	mrs	\rd, sp_el0
 	.endm
 
-/*
- * Errata workaround post TTBRx_EL1 update.
- */
-	.macro	post_ttbr_update_workaround
-#ifdef CONFIG_CAVIUM_ERRATUM_27456
-alternative_if ARM64_WORKAROUND_CAVIUM_27456
-	ic	iallu
-	dsb	nsh
-	isb
-alternative_else_nop_endif
-#endif
-	.endm
-
 #endif	/* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index b9feb587294d..6aa112baf601 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -277,7 +277,7 @@ alternative_else_nop_endif
 	 * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache
 	 * corruption).
 	 */
-	post_ttbr_update_workaround
+	bl	post_ttbr_update_workaround
 	.endif
 1:
 	.if	\el != 0
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 1cb3bc92ae5c..c1e3b6479c8f 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -239,6 +239,15 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
 		cpu_switch_mm(mm->pgd, mm);
 }
 
+/* Errata workaround post TTBRx_EL1 update. */
+asmlinkage void post_ttbr_update_workaround(void)
+{
+	asm volatile(ALTERNATIVE("nop; nop; nop",
+				 "ic iallu; dsb nsh; isb",
+				 ARM64_WORKAROUND_CAVIUM_27456,
+				 CONFIG_CAVIUM_ERRATUM_27456));
+}
+
 static int asids_init(void)
 {
 	asid_bits = get_cpu_asid_bits();
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 3146dc96f05b..6affb68a9a14 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -145,8 +145,7 @@ ENTRY(cpu_do_switch_mm)
 	isb
 	msr	ttbr0_el1, x0			// now update TTBR0
 	isb
-	post_ttbr_update_workaround
-	ret
+	b	post_ttbr_update_workaround	// Back to C code...
 ENDPROC(cpu_do_switch_mm)
 
 	.pushsection ".idmap.text", "ax"
-- 
2.1.4

^ permalink raw reply related

* [PATCH 07/11] arm64: Add skeleton to harden the branch predictor against aliasing attacks
From: Will Deacon @ 2018-01-04 15:08 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1515078515-13723-1-git-send-email-will.deacon@arm.com>

Aliasing attacks against CPU branch predictors can allow an attacker to
redirect speculative control flow on some CPUs and potentially divulge
information from one context to another.

This patch adds initial skeleton code behind a new Kconfig option to
enable implementation-specific mitigations against these attacks for
CPUs that are affected.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/Kconfig               | 17 +++++++++
 arch/arm64/include/asm/cpucaps.h |  3 +-
 arch/arm64/include/asm/mmu.h     | 37 ++++++++++++++++++++
 arch/arm64/include/asm/sysreg.h  |  1 +
 arch/arm64/kernel/Makefile       |  4 +++
 arch/arm64/kernel/bpi.S          | 55 +++++++++++++++++++++++++++++
 arch/arm64/kernel/cpu_errata.c   | 74 ++++++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/cpufeature.c   |  1 +
 arch/arm64/mm/context.c          |  2 ++
 arch/arm64/mm/fault.c            |  1 +
 10 files changed, 194 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm64/kernel/bpi.S

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index efaaa3a66b95..cea44b95187c 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -845,6 +845,23 @@ config UNMAP_KERNEL_AT_EL0
 
 	  If unsure, say Y.
 
+config HARDEN_BRANCH_PREDICTOR
+	bool "Harden the branch predictor against aliasing attacks" if EXPERT
+	default y
+	help
+	  Speculation attacks against some high-performance processors rely on
+	  being able to manipulate the branch predictor for a victim context by
+	  executing aliasing branches in the attacker context.  Such attacks
+	  can be partially mitigated against by clearing internal branch
+	  predictor state and limiting the prediction logic in some situations.
+
+	  This config option will take CPU-specific actions to harden the
+	  branch predictor against aliasing attacks and may rely on specific
+	  instruction sequences or control bits being set by the system
+	  firmware.
+
+	  If unsure, say Y.
+
 menuconfig ARMV8_DEPRECATED
 	bool "Emulate deprecated/obsolete ARMv8 instructions"
 	depends on COMPAT
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index b4537ffd1018..51616e77fe6b 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -42,7 +42,8 @@
 #define ARM64_HAS_DCPOP				21
 #define ARM64_SVE				22
 #define ARM64_UNMAP_KERNEL_AT_EL0		23
+#define ARM64_HARDEN_BRANCH_PREDICTOR		24
 
-#define ARM64_NCAPS				24
+#define ARM64_NCAPS				25
 
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 6f7bdb89817f..6dd83d75b82a 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -41,6 +41,43 @@ static inline bool arm64_kernel_unmapped_at_el0(void)
 	       cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0);
 }
 
+typedef void (*bp_hardening_cb_t)(void);
+
+struct bp_hardening_data {
+	int			hyp_vectors_slot;
+	bp_hardening_cb_t	fn;
+};
+
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+extern char __bp_harden_hyp_vecs_start[], __bp_harden_hyp_vecs_end[];
+
+DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
+
+static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void)
+{
+	return this_cpu_ptr(&bp_hardening_data);
+}
+
+static inline void arm64_apply_bp_hardening(void)
+{
+	struct bp_hardening_data *d;
+
+	if (!cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR))
+		return;
+
+	d = arm64_get_bp_hardening_data();
+	if (d->fn)
+		d->fn();
+}
+#else
+static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void)
+{
+	return NULL;
+}
+
+static inline void arm64_apply_bp_hardening(void)	{ }
+#endif	/* CONFIG_HARDEN_BRANCH_PREDICTOR */
+
 extern void paging_init(void);
 extern void bootmem_init(void);
 extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index ae519bbd3f9e..871744973ece 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -438,6 +438,7 @@
 
 /* id_aa64pfr0 */
 #define ID_AA64PFR0_CSV3_SHIFT		60
+#define ID_AA64PFR0_CSV2_SHIFT		56
 #define ID_AA64PFR0_SVE_SHIFT		32
 #define ID_AA64PFR0_GIC_SHIFT		24
 #define ID_AA64PFR0_ASIMD_SHIFT		20
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 067baace74a0..0c760db04858 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -53,6 +53,10 @@ arm64-obj-$(CONFIG_ARM64_RELOC_TEST)	+= arm64-reloc-test.o
 arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
 arm64-obj-$(CONFIG_CRASH_DUMP)		+= crash_dump.o
 
+ifeq ($(CONFIG_KVM),y)
+arm64-obj-$(CONFIG_HARDEN_BRANCH_PREDICTOR)	+= bpi.o
+endif
+
 obj-y					+= $(arm64-obj-y) vdso/ probes/
 obj-m					+= $(arm64-obj-m)
 head-y					:= head.o
diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S
new file mode 100644
index 000000000000..06a931eb2673
--- /dev/null
+++ b/arch/arm64/kernel/bpi.S
@@ -0,0 +1,55 @@
+/*
+ * Contains CPU specific branch predictor invalidation sequences
+ *
+ * Copyright (C) 2018 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+.macro ventry target
+	.rept 31
+	nop
+	.endr
+	b	\target
+.endm
+
+.macro vectors target
+	ventry \target + 0x000
+	ventry \target + 0x080
+	ventry \target + 0x100
+	ventry \target + 0x180
+
+	ventry \target + 0x200
+	ventry \target + 0x280
+	ventry \target + 0x300
+	ventry \target + 0x380
+
+	ventry \target + 0x400
+	ventry \target + 0x480
+	ventry \target + 0x500
+	ventry \target + 0x580
+
+	ventry \target + 0x600
+	ventry \target + 0x680
+	ventry \target + 0x700
+	ventry \target + 0x780
+.endm
+
+	.align	11
+ENTRY(__bp_harden_hyp_vecs_start)
+	.rept 4
+	vectors __kvm_hyp_vector
+	.endr
+ENTRY(__bp_harden_hyp_vecs_end)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 0e27f86ee709..16ea5c6f314e 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -46,6 +46,80 @@ static int cpu_enable_trap_ctr_access(void *__unused)
 	return 0;
 }
 
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+
+DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
+
+#ifdef CONFIG_KVM
+static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start,
+				const char *hyp_vecs_end)
+{
+	void *dst = lm_alias(__bp_harden_hyp_vecs_start + slot * SZ_2K);
+	int i;
+
+	for (i = 0; i < SZ_2K; i += 0x80)
+		memcpy(dst + i, hyp_vecs_start, hyp_vecs_end - hyp_vecs_start);
+
+	flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K);
+}
+
+static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
+				      const char *hyp_vecs_start,
+				      const char *hyp_vecs_end)
+{
+	static int last_slot = -1;
+	static DEFINE_SPINLOCK(bp_lock);
+	int cpu, slot = -1;
+
+	spin_lock(&bp_lock);
+	for_each_possible_cpu(cpu) {
+		if (per_cpu(bp_hardening_data.fn, cpu) == fn) {
+			slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu);
+			break;
+		}
+	}
+
+	if (slot == -1) {
+		last_slot++;
+		BUG_ON(((__bp_harden_hyp_vecs_end - __bp_harden_hyp_vecs_start)
+			/ SZ_2K) <= last_slot);
+		slot = last_slot;
+		__copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end);
+	}
+
+	__this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot);
+	__this_cpu_write(bp_hardening_data.fn, fn);
+	spin_unlock(&bp_lock);
+}
+#else
+static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
+				      const char *hyp_vecs_start,
+				      const char *hyp_vecs_end)
+{
+	__this_cpu_write(bp_hardening_data.fn, fn);
+}
+#endif	/* CONFIG_KVM */
+
+static void  install_bp_hardening_cb(const struct arm64_cpu_capabilities *entry,
+				     bp_hardening_cb_t fn,
+				     const char *hyp_vecs_start,
+				     const char *hyp_vecs_end)
+{
+	u64 pfr0;
+
+	if (!entry->matches(entry, SCOPE_LOCAL_CPU))
+		return;
+
+	pfr0 = read_cpuid(ID_AA64PFR0_EL1);
+	if (cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_CSV2_SHIFT))
+		return;
+
+	__install_bp_hardening_cb(fn, hyp_vecs_start, hyp_vecs_end);
+}
+#endif	/* CONFIG_HARDEN_BRANCH_PREDICTOR */
+
 #define MIDR_RANGE(model, min, max) \
 	.def_scope = SCOPE_LOCAL_CPU, \
 	.matches = is_affected_midr_range, \
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 6133c14b9b01..19ed09b0bb24 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -146,6 +146,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
 
 static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0),
 	S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index c1e3b6479c8f..f1d99ffc77d1 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -246,6 +246,8 @@ asmlinkage void post_ttbr_update_workaround(void)
 				 "ic iallu; dsb nsh; isb",
 				 ARM64_WORKAROUND_CAVIUM_27456,
 				 CONFIG_CAVIUM_ERRATUM_27456));
+
+	arm64_apply_bp_hardening();
 }
 
 static int asids_init(void)
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 22168cd0dde7..5203b6040cb6 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -318,6 +318,7 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
 		lsb = PAGE_SHIFT;
 	si.si_addr_lsb = lsb;
 
+	arm64_apply_bp_hardening();
 	force_sig_info(sig, &si, tsk);
 }
 
-- 
2.1.4

^ permalink raw reply related

* [PATCH 08/11] arm64: KVM: Use per-CPU vector when BP hardening is enabled
From: Will Deacon @ 2018-01-04 15:08 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1515078515-13723-1-git-send-email-will.deacon@arm.com>

From: Marc Zyngier <marc.zyngier@arm.com>

Now that we have per-CPU vectors, let's plug then in the KVM/arm64 code.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/kvm_mmu.h   | 10 ++++++++++
 arch/arm64/include/asm/kvm_mmu.h | 38 ++++++++++++++++++++++++++++++++++++++
 arch/arm64/kvm/hyp/switch.c      |  2 +-
 virt/kvm/arm/arm.c               |  8 +++++++-
 4 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index fa6f2174276b..eb46fc81a440 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -221,6 +221,16 @@ static inline unsigned int kvm_get_vmid_bits(void)
 	return 8;
 }
 
+static inline void *kvm_get_hyp_vector(void)
+{
+	return kvm_ksym_ref(__kvm_hyp_vector);
+}
+
+static inline int kvm_map_vectors(void)
+{
+	return 0;
+}
+
 #endif	/* !__ASSEMBLY__ */
 
 #endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 672c8684d5c2..2d6d4bd9de52 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -309,5 +309,43 @@ static inline unsigned int kvm_get_vmid_bits(void)
 	return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
 }
 
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+#include <asm/mmu.h>
+
+static inline void *kvm_get_hyp_vector(void)
+{
+	struct bp_hardening_data *data = arm64_get_bp_hardening_data();
+	void *vect = kvm_ksym_ref(__kvm_hyp_vector);
+
+	if (data->fn) {
+		vect = __bp_harden_hyp_vecs_start +
+		       data->hyp_vectors_slot * SZ_2K;
+
+		if (!has_vhe())
+			vect = lm_alias(vect);
+	}
+
+	return vect;
+}
+
+static inline int kvm_map_vectors(void)
+{
+	return create_hyp_mappings(kvm_ksym_ref(__bp_harden_hyp_vecs_start),
+				   kvm_ksym_ref(__bp_harden_hyp_vecs_end),
+				   PAGE_HYP_EXEC);
+}
+
+#else
+static inline void *kvm_get_hyp_vector(void)
+{
+	return kvm_ksym_ref(__kvm_hyp_vector);
+}
+
+static inline int kvm_map_vectors(void)
+{
+	return 0;
+}
+#endif
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index f7c651f3a8c0..8d4f3c9d6dc4 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -52,7 +52,7 @@ static void __hyp_text __activate_traps_vhe(void)
 	val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN);
 	write_sysreg(val, cpacr_el1);
 
-	write_sysreg(__kvm_hyp_vector, vbar_el1);
+	write_sysreg(kvm_get_hyp_vector(), vbar_el1);
 }
 
 static void __hyp_text __activate_traps_nvhe(void)
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 6b60c98a6e22..1c9fdb6db124 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -1158,7 +1158,7 @@ static void cpu_init_hyp_mode(void *dummy)
 	pgd_ptr = kvm_mmu_get_httbr();
 	stack_page = __this_cpu_read(kvm_arm_hyp_stack_page);
 	hyp_stack_ptr = stack_page + PAGE_SIZE;
-	vector_ptr = (unsigned long)kvm_ksym_ref(__kvm_hyp_vector);
+	vector_ptr = (unsigned long)kvm_get_hyp_vector();
 
 	__cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr);
 	__cpu_init_stage2();
@@ -1403,6 +1403,12 @@ static int init_hyp_mode(void)
 		goto out_err;
 	}
 
+	err = kvm_map_vectors();
+	if (err) {
+		kvm_err("Cannot map vectors\n");
+		goto out_err;
+	}
+
 	/*
 	 * Map the Hyp stack pages
 	 */
-- 
2.1.4

^ permalink raw reply related

* [PATCH 09/11] arm64: KVM: Make PSCI_VERSION a fast path
From: Will Deacon @ 2018-01-04 15:08 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1515078515-13723-1-git-send-email-will.deacon@arm.com>

From: Marc Zyngier <marc.zyngier@arm.com>

For those CPUs that require PSCI to perform a BP invalidation,
going all the way to the PSCI code for not much is a waste of
precious cycles. Let's terminate that call as early as possible.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kvm/hyp/switch.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 8d4f3c9d6dc4..4d273f6d0e69 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -17,6 +17,7 @@
 
 #include <linux/types.h>
 #include <linux/jump_label.h>
+#include <uapi/linux/psci.h>
 
 #include <asm/kvm_asm.h>
 #include <asm/kvm_emulate.h>
@@ -341,6 +342,18 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 	if (exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu))
 		goto again;
 
+	if (exit_code == ARM_EXCEPTION_TRAP &&
+	    (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_HVC64 ||
+	     kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_HVC32) &&
+	    vcpu_get_reg(vcpu, 0) == PSCI_0_2_FN_PSCI_VERSION) {
+		u64 val = PSCI_RET_NOT_SUPPORTED;
+		if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features))
+			val = 2;
+
+		vcpu_set_reg(vcpu, 0, val);
+		goto again;
+	}
+
 	if (static_branch_unlikely(&vgic_v2_cpuif_trap) &&
 	    exit_code == ARM_EXCEPTION_TRAP) {
 		bool valid;
-- 
2.1.4

^ permalink raw reply related

* [PATCH 10/11] arm64: cputype: Add missing MIDR values for Cortex-A72 and Cortex-A75
From: Will Deacon @ 2018-01-04 15:08 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1515078515-13723-1-git-send-email-will.deacon@arm.com>

Hook up MIDR values for the Cortex-A72 and Cortex-A75 CPUs, since they
will soon need MIDR matches for hardening the branch predictor.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/cputype.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 235e77d98261..84385b94e70b 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -79,8 +79,10 @@
 #define ARM_CPU_PART_AEM_V8		0xD0F
 #define ARM_CPU_PART_FOUNDATION		0xD00
 #define ARM_CPU_PART_CORTEX_A57		0xD07
+#define ARM_CPU_PART_CORTEX_A72		0xD08
 #define ARM_CPU_PART_CORTEX_A53		0xD03
 #define ARM_CPU_PART_CORTEX_A73		0xD09
+#define ARM_CPU_PART_CORTEX_A75		0xD0A
 
 #define APM_CPU_PART_POTENZA		0x000
 
@@ -94,7 +96,9 @@
 
 #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
 #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
+#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)
 #define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73)
+#define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75)
 #define MIDR_THUNDERX	MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
 #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
 #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
-- 
2.1.4

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox