LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 7/7] ASoC: dt-bindings: imx-rpmsg: Add binding doc for rpmsg machine driver
From: Shengjiu Wang @ 2021-02-07 10:23 UTC (permalink / raw)
  To: lgirdwood, broonie, perex, tiwai, alsa-devel, linux-kernel, timur,
	nicoleotsuka, Xiubo.Lee, festevam, linuxppc-dev, robh+dt,
	devicetree
In-Reply-To: <1612693435-31418-1-git-send-email-shengjiu.wang@nxp.com>

Imx-rpmsg is a new added machine driver for supporting audio on Cortex-M
core. The Cortex-M core will control the audio interface, DMA and audio
codec, setup the pipeline, the audio driver on Cortex-A core side is just
to communitcate with M core, it is a virtual sound card and don't touch
the hardware.

Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
---
 .../bindings/sound/imx-audio-rpmsg.yaml       | 48 +++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/sound/imx-audio-rpmsg.yaml

diff --git a/Documentation/devicetree/bindings/sound/imx-audio-rpmsg.yaml b/Documentation/devicetree/bindings/sound/imx-audio-rpmsg.yaml
new file mode 100644
index 000000000000..b941aeb80678
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/imx-audio-rpmsg.yaml
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/sound/imx-audio-rpmsg.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP i.MX audio complex with rpmsg
+
+maintainers:
+  - Shengjiu Wang <shengjiu.wang@nxp.com>
+
+properties:
+  compatible:
+    enum:
+      - fsl,imx-audio-rpmsg
+
+  model:
+    $ref: /schemas/types.yaml#/definitions/string
+    description: User specified audio sound card name
+
+  audio-cpu:
+    description: The phandle of an CPU DAI controller
+
+  rpmsg-out:
+    description: |
+      This is a boolean property. If present, the transmitting function
+      will be enabled,
+
+  rpmsg-in:
+    description: |
+      This is a boolean property. If present, the receiving function
+      will be enabled.
+
+required:
+  - compatible
+  - model
+  - audio-cpu
+
+additionalProperties: false
+
+examples:
+  - |
+    sound-rpmsg {
+        compatible = "fsl,imx-audio-rpmsg";
+        model = "ak4497-audio";
+        audio-cpu = <&rpmsg_audio>;
+        rpmsg-out;
+    };
-- 
2.27.0


^ permalink raw reply related

* [PATCH v2 6/7] ASoC: imx-rpmsg: Add machine driver for audio base on rpmsg
From: Shengjiu Wang @ 2021-02-07 10:23 UTC (permalink / raw)
  To: lgirdwood, broonie, perex, tiwai, alsa-devel, linux-kernel, timur,
	nicoleotsuka, Xiubo.Lee, festevam, linuxppc-dev, robh+dt,
	devicetree
In-Reply-To: <1612693435-31418-1-git-send-email-shengjiu.wang@nxp.com>

The platform device is not registered by device tree or
cpu dai driver, it is registered by the rpmsg channel,
So add a dedicated machine driver to handle this case.

Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
---
 sound/soc/fsl/Kconfig     |  12 ++++
 sound/soc/fsl/Makefile    |   2 +
 sound/soc/fsl/imx-rpmsg.c | 148 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 162 insertions(+)
 create mode 100644 sound/soc/fsl/imx-rpmsg.c

diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig
index 749c44fc0759..3557866d3fa2 100644
--- a/sound/soc/fsl/Kconfig
+++ b/sound/soc/fsl/Kconfig
@@ -334,6 +334,18 @@ config SND_SOC_IMX_HDMI
 	  Say Y if you want to add support for SoC audio on an i.MX board with
 	  IMX HDMI.
 
+config SND_SOC_IMX_RPMSG
+	tristate "SoC Audio support for i.MX boards with rpmsg"
+	depends on RPMSG
+	select SND_SOC_IMX_PCM_RPMSG
+	select SND_SOC_IMX_AUDIO_RPMSG
+	select SND_SOC_FSL_RPMSG
+	help
+	  SoC Audio support for i.MX boards with rpmsg.
+	  There should be rpmsg devices defined in other core (M core)
+	  Say Y if you want to add support for SoC audio on an i.MX board with
+	  a rpmsg devices.
+
 endif # SND_IMX_SOC
 
 endmenu
diff --git a/sound/soc/fsl/Makefile b/sound/soc/fsl/Makefile
index ce4f4324c3a2..f146ce464acd 100644
--- a/sound/soc/fsl/Makefile
+++ b/sound/soc/fsl/Makefile
@@ -70,6 +70,7 @@ snd-soc-imx-sgtl5000-objs := imx-sgtl5000.o
 snd-soc-imx-spdif-objs := imx-spdif.o
 snd-soc-imx-audmix-objs := imx-audmix.o
 snd-soc-imx-hdmi-objs := imx-hdmi.o
+snd-soc-imx-rpmsg-objs := imx-rpmsg.o
 
 obj-$(CONFIG_SND_SOC_EUKREA_TLV320) += snd-soc-eukrea-tlv320.o
 obj-$(CONFIG_SND_SOC_IMX_ES8328) += snd-soc-imx-es8328.o
@@ -77,3 +78,4 @@ obj-$(CONFIG_SND_SOC_IMX_SGTL5000) += snd-soc-imx-sgtl5000.o
 obj-$(CONFIG_SND_SOC_IMX_SPDIF) += snd-soc-imx-spdif.o
 obj-$(CONFIG_SND_SOC_IMX_AUDMIX) += snd-soc-imx-audmix.o
 obj-$(CONFIG_SND_SOC_IMX_HDMI) += snd-soc-imx-hdmi.o
+obj-$(CONFIG_SND_SOC_IMX_RPMSG) += snd-soc-imx-rpmsg.o
diff --git a/sound/soc/fsl/imx-rpmsg.c b/sound/soc/fsl/imx-rpmsg.c
new file mode 100644
index 000000000000..a87dcbce4f36
--- /dev/null
+++ b/sound/soc/fsl/imx-rpmsg.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017-2020 NXP
+
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/of_reserved_mem.h>
+#include <linux/i2c.h>
+#include <linux/of_gpio.h>
+#include <linux/slab.h>
+#include <linux/gpio.h>
+#include <linux/clk.h>
+#include <sound/soc.h>
+#include <sound/jack.h>
+#include <sound/control.h>
+#include <sound/pcm_params.h>
+#include <sound/soc-dapm.h>
+#include "imx-pcm-rpmsg.h"
+
+struct imx_rpmsg {
+	struct snd_soc_dai_link dai;
+	struct snd_soc_card card;
+};
+
+static int imx_rpmsg_probe(struct platform_device *pdev)
+{
+	struct snd_soc_dai_link_component *dlc;
+	struct platform_device *cpu_pdev;
+	struct of_phandle_args args;
+	struct device_node *cpu_np;
+	struct imx_rpmsg *data;
+	int ret;
+
+	dlc = devm_kzalloc(&pdev->dev, 3 * sizeof(*dlc), GFP_KERNEL);
+	if (!dlc)
+		return -ENOMEM;
+
+	cpu_np = of_parse_phandle(pdev->dev.of_node, "audio-cpu", 0);
+	if (!cpu_np) {
+		dev_err(&pdev->dev, "cpu dai phandle missing or invalid\n");
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
+	if (!data) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	cpu_pdev = of_find_device_by_node(cpu_np);
+	if (!cpu_pdev) {
+		dev_err(&pdev->dev, "failed to find rpmsg platform device\n");
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	ret = of_reserved_mem_device_init_by_idx(&pdev->dev, pdev->dev.of_node, 0);
+	if (ret)
+		dev_warn(&pdev->dev, "no reserved DMA memory\n");
+
+	data->dai.cpus = &dlc[0];
+	data->dai.num_cpus = 1;
+	data->dai.platforms = &dlc[1];
+	data->dai.num_platforms = 1;
+	data->dai.codecs = &dlc[2];
+	data->dai.num_codecs = 1;
+
+	data->dai.name = "rpmsg hifi";
+	data->dai.stream_name = "rpmsg hifi";
+	data->dai.dai_fmt = SND_SOC_DAIFMT_I2S |
+			    SND_SOC_DAIFMT_NB_NF |
+			    SND_SOC_DAIFMT_CBS_CFS;
+
+	/* Optional codec node */
+	ret = of_parse_phandle_with_fixed_args(pdev->dev.of_node,
+					       "audio-codec", 0, 0, &args);
+	if (ret) {
+		data->dai.codecs->dai_name = "snd-soc-dummy-dai";
+		data->dai.codecs->name = "snd-soc-dummy";
+	} else {
+		data->dai.codecs->of_node = args.np;
+		ret = snd_soc_get_dai_name(&args, &data->dai.codecs->dai_name);
+		if (ret) {
+			dev_err(&pdev->dev, "Unable to get codec_dai_name\n");
+			goto fail;
+		}
+	}
+
+	data->dai.cpus->dai_name = dev_name(&cpu_pdev->dev);
+	data->dai.platforms->name = IMX_PCM_DRV_NAME;
+	data->dai.playback_only = true;
+	data->dai.capture_only = true;
+	data->card.num_links = 1;
+	data->card.dai_link = &data->dai;
+
+	if (of_property_read_bool(pdev->dev.of_node, "rpmsg-out"))
+		data->dai.capture_only = false;
+
+	if (of_property_read_bool(pdev->dev.of_node, "rpmsg-in"))
+		data->dai.playback_only = false;
+
+	if (data->dai.playback_only && data->dai.capture_only) {
+		dev_err(&pdev->dev, "no enabled rpmsg DAI link\n");
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	data->card.dev = &pdev->dev;
+	data->card.owner = THIS_MODULE;
+	ret = snd_soc_of_parse_card_name(&data->card, "model");
+	if (ret)
+		goto fail;
+
+	platform_set_drvdata(pdev, &data->card);
+	snd_soc_card_set_drvdata(&data->card, data);
+	ret = devm_snd_soc_register_card(&pdev->dev, &data->card);
+	if (ret) {
+		dev_err(&pdev->dev, "snd_soc_register_card failed (%d)\n", ret);
+		goto fail;
+	}
+
+fail:
+	if (cpu_np)
+		of_node_put(cpu_np);
+	return ret;
+}
+
+static const struct of_device_id imx_rpmsg_dt_ids[] = {
+	{ .compatible = "fsl,imx-audio-rpmsg", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, imx_rpmsg_dt_ids);
+
+static struct platform_driver imx_rpmsg_driver = {
+	.driver = {
+		.name = "imx-audio-rpmsg",
+		.owner = THIS_MODULE,
+		.pm = &snd_soc_pm_ops,
+		.of_match_table = imx_rpmsg_dt_ids,
+	},
+	.probe = imx_rpmsg_probe,
+};
+module_platform_driver(imx_rpmsg_driver);
+
+MODULE_DESCRIPTION("Freescale SoC Audio RPMSG Machine Driver");
+MODULE_AUTHOR("Shengjiu Wang <shengjiu.wang@nxp.com>");
+MODULE_ALIAS("platform:imx-rpmsg");
+MODULE_LICENSE("GPL v2");
-- 
2.27.0


^ permalink raw reply related

* Re: [PATCH] ASoC: fsl: constify static snd_soc_dai_ops structs
From: Shengjiu Wang @ 2021-02-07 10:37 UTC (permalink / raw)
  To: Rikard Falkeborn
  Cc: alsa-devel, Timur Tabi, Xiubo Li, Liam Girdwood, linuxppc-dev,
	Takashi Iwai, Jaroslav Kysela, Nicolin Chen, Mark Brown,
	Fabio Estevam, linux-kernel
In-Reply-To: <20210206225849.51071-1-rikard.falkeborn@gmail.com>

On Sun, Feb 7, 2021 at 6:58 AM Rikard Falkeborn
<rikard.falkeborn@gmail.com> wrote:
>
> The only usage of these is to assign their address to the 'ops' field in
> the snd_soc_dai_driver struct, which is a pointer to const. Make them
> const to allow the compiler to put them in read-only memory.
>
> Signed-off-by: Rikard Falkeborn <rikard.falkeborn@gmail.com>

Acked-by: Shengjiu Wang <shengjiu.wang@gmail.com>

^ permalink raw reply

* Re: [PATCH v7 39/42] powerpc: move NMI entry/exit code into wrapper
From: Nicholas Piggin @ 2021-02-07 12:54 UTC (permalink / raw)
  To: linuxppc-dev, Michael Ellerman; +Cc: Athira Rajeev
In-Reply-To: <1612579435.unncvipdys.astroid@bobo.none>

Excerpts from Nicholas Piggin's message of February 6, 2021 12:46 pm:
> Excerpts from Michael Ellerman's message of February 6, 2021 9:38 am:
>> Nicholas Piggin <npiggin@gmail.com> writes:
>>> Excerpts from Michael Ellerman's message of February 4, 2021 8:15 pm:
>>>> Nicholas Piggin <npiggin@gmail.com> writes:
>>>>> This moves the common NMI entry and exit code into the interrupt handler
>>>>> wrappers.
>>>>>
>>>>> This changes the behaviour of soft-NMI (watchdog) and HMI interrupts, and
>>>>> also MCE interrupts on 64e, by adding missing parts of the NMI entry to
>>>>> them.
>>>>>
>>>>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>>>>> ---
>>>>>  arch/powerpc/include/asm/interrupt.h | 28 ++++++++++++++++++++++
>>>>>  arch/powerpc/kernel/mce.c            | 11 ---------
>>>>>  arch/powerpc/kernel/traps.c          | 35 +++++-----------------------
>>>>>  arch/powerpc/kernel/watchdog.c       | 10 ++++----
>>>>>  4 files changed, 38 insertions(+), 46 deletions(-)
>>>> 
>>>> This is unhappy when injecting SLB multi-hits:
>>>> 
>>>>   root@p86-2:~# echo PPC_SLB_MULTIHIT > /sys/kernel/debug/provoke-crash/DIRECT
>>>>   [  312.496026][ T1344] kernel BUG at arch/powerpc/include/asm/interrupt.h:152!
>>>>   [  312.496037][ T1344] Oops: Exception in kernel mode, sig: 5 [#1]
>>>>   [  312.496045][ T1344] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
>>>
>>> pseries hash. Blast!
>> 
>> The worst kind.
>> 
>>>> 147 static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state)
>>>> 148 {
>>>> 149 	if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64) ||
>>>> 150 			!firmware_has_feature(FW_FEATURE_LPAR) ||
>>>> 151 			radix_enabled() || (mfmsr() & MSR_DR))
>>>> 152 		nmi_exit();
>>>> 
>>>> 
>>>> So presumably it's:
>>>> 
>>>> #define __nmi_exit()						\
>>>> 	do {							\
>>>> 		BUG_ON(!in_nmi());				\
>>>
>>> Yes that would be it, pseries machine check enables MMU half way through 
>>> so only one side of this triggers.
>>>
>>> The MSR_DR check is supposed to catch the other NMIs that run with MMU 
>>> on (perf, watchdog, etc). Suppose it could test TRAP(regs) explicitly
>>> although I wonder if we should also do this to keep things balanced
>> 
>> Yeah I think I like that. I'll give it a test.
> 
> The msr restore? Looking closer, pseries_machine_check_realmode may have
> expected mce_handle_error to enable the MMU, because irq_work_queue uses
> some per-cpu variables I think.
> 
> So the code might have to be rearranged a bit more than the patch below.

Here is a patch, it should go anywhere before this patch. Seems to
work with some test MCE injection on pseries hash.

Thanks,
Nick
--

powerpc/pseries/mce: restore msr before returning from handler

The pseries real-mode machine check handler can enable the MMU, and
return from the handler with the MMU still enabled.

This works, but real-mode handler wrapper exit handlers want to rely
on the MMU being in real-mode. So change the pseries handler to
restore the MSR after it has finished virtual mode tasks.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/platforms/pseries/ras.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 2d9f985fd13a..377439e88598 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -722,6 +722,7 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
 	struct pseries_errorlog *pseries_log;
 	struct pseries_mc_errorlog *mce_log = NULL;
 	int disposition = rtas_error_disposition(errp);
+	unsigned long msr;
 	u8 error_type;
 
 	if (!rtas_error_extended(errp))
@@ -747,9 +748,21 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
 	 *       SLB multihit is done by now.
 	 */
 out:
-	mtmsr(mfmsr() | MSR_IR | MSR_DR);
+	msr = mfmsr();
+	mtmsr(msr | MSR_IR | MSR_DR);
+
 	disposition = mce_handle_err_virtmode(regs, errp, mce_log,
 					      disposition);
+
+	/*
+	 * Queue irq work to log this rtas event later.
+	 * irq_work_queue uses per-cpu variables, so do this in virt
+	 * mode as well.
+	 */
+	irq_work_queue(&mce_errlog_process_work);
+
+	mtmsr(msr);
+
 	return disposition;
 }
 
@@ -865,10 +878,8 @@ long pseries_machine_check_realmode(struct pt_regs *regs)
 		 * virtual mode.
 		 */
 		disposition = mce_handle_error(regs, errp);
-		fwnmi_release_errinfo();
 
-		/* Queue irq work to log this rtas event later. */
-		irq_work_queue(&mce_errlog_process_work);
+		fwnmi_release_errinfo();
 
 		if (disposition == RTAS_DISP_FULLY_RECOVERED)
 			return 1;
-- 
2.23.0


^ permalink raw reply related

* Re: [PATCH v7 28/42] powerpc: convert interrupt handlers to use wrappers
From: Nicholas Piggin @ 2021-02-07 12:56 UTC (permalink / raw)
  To: Christophe Leroy, linuxppc-dev; +Cc: Athira Rajeev
In-Reply-To: <0e319d85-9fa0-ff97-03b2-93637ad89a99@csgroup.eu>

Excerpts from Christophe Leroy's message of February 5, 2021 6:09 pm:
> 
> 
> Le 30/01/2021 à 14:08, Nicholas Piggin a écrit :
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
> 
>> diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
>> index f70d3f6174c8..7ff915aae8ec 100644
>> --- a/arch/powerpc/kernel/traps.c
>> +++ b/arch/powerpc/kernel/traps.c
> 
>> @@ -1462,7 +1474,7 @@ static int emulate_math(struct pt_regs *regs)
>>   static inline int emulate_math(struct pt_regs *regs) { return -1; }
>>   #endif
>>   
>> -void program_check_exception(struct pt_regs *regs)
>> +DEFINE_INTERRUPT_HANDLER(program_check_exception)
>>   {
>>   	enum ctx_state prev_state = exception_enter();
>>   	unsigned int reason = get_reason(regs);
>> @@ -1587,14 +1599,14 @@ NOKPROBE_SYMBOL(program_check_exception);
>>    * This occurs when running in hypervisor mode on POWER6 or later
>>    * and an illegal instruction is encountered.
>>    */
>> -void emulation_assist_interrupt(struct pt_regs *regs)
>> +DEFINE_INTERRUPT_HANDLER(emulation_assist_interrupt)
>>   {
>>   	regs->msr |= REASON_ILLEGAL;
>>   	program_check_exception(regs);
> 
> Is it correct that an INTERRUPT_HANDLER calls another INTERRUPT_HANDLER ?

No, here is a patch for it, should go any time before the interrupt 
wrappers are introduced. It causes some conflicts later but are not
too complex. I can resend the series if necessary.

Thanks,
Nick

---
powerpc/traps: factor common code from program check and emulation assist

Move the program check handling into a function called by both, rather
than have the emulation assist handler call the program check handler.

This allows each of these handlers to be implemented with "interrupt
wrappers" in a later change.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/traps.c | 38 +++++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index f70d3f6174c8..2c5986109412 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1462,9 +1462,8 @@ static int emulate_math(struct pt_regs *regs)
 static inline int emulate_math(struct pt_regs *regs) { return -1; }
 #endif
 
-void program_check_exception(struct pt_regs *regs)
+static void do_program_check(struct pt_regs *regs)
 {
-	enum ctx_state prev_state = exception_enter();
 	unsigned int reason = get_reason(regs);
 
 	/* We can now get here via a FP Unavailable exception if the core
@@ -1473,22 +1472,22 @@ void program_check_exception(struct pt_regs *regs)
 	if (reason & REASON_FP) {
 		/* IEEE FP exception */
 		parse_fpe(regs);
-		goto bail;
+		return;
 	}
 	if (reason & REASON_TRAP) {
 		unsigned long bugaddr;
 		/* Debugger is first in line to stop recursive faults in
 		 * rcu_lock, notify_die, or atomic_notifier_call_chain */
 		if (debugger_bpt(regs))
-			goto bail;
+			return;
 
 		if (kprobe_handler(regs))
-			goto bail;
+			return;
 
 		/* trap exception */
 		if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP)
 				== NOTIFY_STOP)
-			goto bail;
+			return;
 
 		bugaddr = regs->nip;
 		/*
@@ -1500,10 +1499,10 @@ void program_check_exception(struct pt_regs *regs)
 		if (!(regs->msr & MSR_PR) &&  /* not user-mode */
 		    report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) {
 			regs->nip += 4;
-			goto bail;
+			return;
 		}
 		_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
-		goto bail;
+		return;
 	}
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	if (reason & REASON_TM) {
@@ -1524,7 +1523,7 @@ void program_check_exception(struct pt_regs *regs)
 		 */
 		if (user_mode(regs)) {
 			_exception(SIGILL, regs, ILL_ILLOPN, regs->nip);
-			goto bail;
+			return;
 		} else {
 			printk(KERN_EMERG "Unexpected TM Bad Thing exception "
 			       "at %lx (msr 0x%lx) tm_scratch=%llx\n",
@@ -1557,7 +1556,7 @@ void program_check_exception(struct pt_regs *regs)
 	 * pattern to occurrences etc. -dgibson 31/Mar/2003
 	 */
 	if (!emulate_math(regs))
-		goto bail;
+		return;
 
 	/* Try to emulate it if we should. */
 	if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) {
@@ -1565,10 +1564,10 @@ void program_check_exception(struct pt_regs *regs)
 		case 0:
 			regs->nip += 4;
 			emulate_single_step(regs);
-			goto bail;
+			return;
 		case -EFAULT:
 			_exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
-			goto bail;
+			return;
 		}
 	}
 
@@ -1578,7 +1577,14 @@ void program_check_exception(struct pt_regs *regs)
 	else
 		_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
 
-bail:
+}
+
+void program_check_exception(struct pt_regs *regs)
+{
+	enum ctx_state prev_state = exception_enter();
+
+	do_program_check(regs);
+
 	exception_exit(prev_state);
 }
 NOKPROBE_SYMBOL(program_check_exception);
@@ -1589,8 +1595,12 @@ NOKPROBE_SYMBOL(program_check_exception);
  */
 void emulation_assist_interrupt(struct pt_regs *regs)
 {
+	enum ctx_state prev_state = exception_enter();
+
 	regs->msr |= REASON_ILLEGAL;
-	program_check_exception(regs);
+	do_program_check(regs);
+
+	exception_exit(prev_state);
 }
 NOKPROBE_SYMBOL(emulation_assist_interrupt);
 
-- 
2.23.0



^ permalink raw reply related

* Re: [PATCH v7 42/42] powerpc/64s: power4 nap fixup in C
From: Nicholas Piggin @ 2021-02-07 12:58 UTC (permalink / raw)
  To: linuxppc-dev, Michael Ellerman; +Cc: Athira Rajeev
In-Reply-To: <878s86dals.fsf@mpe.ellerman.id.au>

Excerpts from Michael Ellerman's message of February 2, 2021 8:31 pm:
> Nicholas Piggin <npiggin@gmail.com> writes:
>> There is no need for this to be in asm, use the new intrrupt entry wrapper.
>>
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>>  arch/powerpc/include/asm/interrupt.h   | 15 +++++++++
>>  arch/powerpc/include/asm/processor.h   |  1 +
>>  arch/powerpc/include/asm/thread_info.h |  6 ++++
>>  arch/powerpc/kernel/exceptions-64s.S   | 45 --------------------------
>>  arch/powerpc/kernel/idle_book3s.S      |  4 +++
>>  5 files changed, 26 insertions(+), 45 deletions(-)
> 
> Something in here is making my G5 not boot.
> 
> I don't know what the problem is because that machine is in the office,
> and I am not, and it has no serial console.
> 
> I tried turning on pstore but it doesn't record anything :/

Here is an incremental patch (hopefully) should fix it. Should be
folded with this one.

Thanks,
Nick
---
powerpc/64s: nap_adjust_return fix

This is a fix for patch "powerpc/64s: power4 nap fixup in C".

Non-maskable interrupts should not create any exit work, so there
should be no reason to come out of idle. So take the nap fixup out
of NMIs.

The problem with having them here is that an interrupt can come in
and then get interrupted by an NMI, the NMI will then set its return
to the idle wakeup and never complete the regular interrupt handling.

Before the "powerpc/64s: power4 nap fixup in C" patch, this wouldn't
occur because the true NMIs didn't call FIXUP_NAP, and the other
interrupts would call it before running their handler, so they would
not have a chance to call may_hard_irq_enable and allow soft-NMIs
(watchdog, perf) to come through.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/interrupt.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 25f2420b1965..afdf4aba2e6b 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -12,6 +12,7 @@ static inline void nap_adjust_return(struct pt_regs *regs)
 {
 #ifdef CONFIG_PPC_970_NAP
 	if (unlikely(test_thread_local_flags(_TLF_NAPPING))) {
+		/* Can avoid a test-and-clear because NMIs do not call this */
 		clear_thread_local_flags(_TLF_NAPPING);
 		regs->nip = (unsigned long)power4_idle_nap_return;
 	}
@@ -164,7 +165,10 @@ static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct inter
 			radix_enabled() || (mfmsr() & MSR_DR))
 		nmi_exit();
 
-	nap_adjust_return(regs);
+	/*
+	 * nmi does not call nap_adjust_return because nmi should not create
+	 * new work to do (must use irq_work for that).
+	 */
 
 #ifdef CONFIG_PPC64
 	if (TRAP(regs) != 0x900 && TRAP(regs) != 0xf00 && TRAP(regs) != 0x260)
-- 
2.23.0


^ permalink raw reply related

* Re: [PATCH RFC v1 5/6] xen-swiotlb: convert variables to arrays
From: Christoph Hellwig @ 2021-02-07 15:56 UTC (permalink / raw)
  To: Dongli Zhang
  Cc: ulf.hansson, airlied, joonas.lahtinen, dri-devel, linux-kernel,
	bhelgaas, paulus, hpa, hch, m.szyprowski, sstabellini,
	adrian.hunter, x86, joe.jin, mingo, peterz, mingo, bskeggs,
	linux-pci, xen-devel, matthew.auld, thomas.lendacky, konrad.wilk,
	intel-gfx, jani.nikula, bp, rodrigo.vivi, nouveau,
	boris.ostrovsky, chris, jgross, tsbogend, robin.murphy, linux-mmc,
	linux-mips, iommu, tglx, bauerman, daniel, akpm, linuxppc-dev,
	rppt
In-Reply-To: <20210204084023.GA32328@lst.de>

On Thu, Feb 04, 2021 at 09:40:23AM +0100, Christoph Hellwig wrote:
> So one thing that has been on my mind for a while:  I'd really like
> to kill the separate dma ops in Xen swiotlb.  If we compare xen-swiotlb
> to swiotlb the main difference seems to be:
> 
>  - additional reasons to bounce I/O vs the plain DMA capable
>  - the possibility to do a hypercall on arm/arm64
>  - an extra translation layer before doing the phys_to_dma and vice
>    versa
>  - an special memory allocator
> 
> I wonder if inbetween a few jump labels or other no overhead enablement
> options and possibly better use of the dma_range_map we could kill
> off most of swiotlb-xen instead of maintaining all this code duplication?

So I looked at this a bit more.

For x86 with XENFEAT_auto_translated_physmap (how common is that?)
pfn_to_gfn is a nop, so plain phys_to_dma/dma_to_phys do work as-is.

xen_arch_need_swiotlb always returns true for x86, and
range_straddles_page_boundary should never be true for the
XENFEAT_auto_translated_physmap case.

So as far as I can tell the mapping fast path for the
XENFEAT_auto_translated_physmap can be trivially reused from swiotlb.

That leaves us with the next more complicated case, x86 or fully cache
coherent arm{,64} without XENFEAT_auto_translated_physmap.  In that case
we need to patch in a phys_to_dma/dma_to_phys that performs the MFN
lookup, which could be done using alternatives or jump labels.
I think if that is done right we should also be able to let that cover
the foreign pages in is_xen_swiotlb_buffer/is_swiotlb_buffer, but
in that worst case that would need another alternative / jump label.

For non-coherent arm{,64} we'd also need to use alternatives or jump
labels to for the cache maintainance ops, but that isn't a hard problem
either.



^ permalink raw reply

* [PATCH 5/8] xen-swiotlb: remove xen_io_tlb_start and xen_io_tlb_nslabs
From: Christoph Hellwig @ 2021-02-07 16:09 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk
  Cc: iommu, xen-devel, Claire Chang, linuxppc-dev, Dongli Zhang
In-Reply-To: <20210207160934.2955931-1-hch@lst.de>

The xen_io_tlb_start and xen_io_tlb_nslabs variables ar now only used in
xen_swiotlb_init, so replace them with local variables.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/xen/swiotlb-xen.c | 57 +++++++++++++++++----------------------
 1 file changed, 25 insertions(+), 32 deletions(-)

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 57f8d5fadc1fcd..6e0f2c5ecd1a2f 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -40,14 +40,7 @@
 
 #include <trace/events/swiotlb.h>
 #define MAX_DMA_BITS 32
-/*
- * Used to do a quick range check in swiotlb_tbl_unmap_single and
- * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
- * API.
- */
 
-static char *xen_io_tlb_start;
-static unsigned long xen_io_tlb_nslabs;
 /*
  * Quick lookup value of the bus address of the IOTLB.
  */
@@ -169,75 +162,75 @@ int __ref xen_swiotlb_init(int verbose, bool early)
 	int rc = -ENOMEM;
 	enum xen_swiotlb_err m_ret = XEN_SWIOTLB_UNKNOWN;
 	unsigned int repeat = 3;
+	char *start;
+	unsigned long nslabs;
 
-	xen_io_tlb_nslabs = swiotlb_nr_tbl();
+	nslabs = swiotlb_nr_tbl();
 retry:
-	if (!xen_io_tlb_nslabs)
-		xen_io_tlb_nslabs = DEFAULT_NSLABS;
-	bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT;
+	if (!nslabs)
+		nslabs = DEFAULT_NSLABS;
+	bytes = nslabs << IO_TLB_SHIFT;
 	order = get_order(bytes);
 
 	/*
 	 * IO TLB memory already allocated. Just use it.
 	 */
-	if (io_tlb_start != 0) {
-		xen_io_tlb_start = phys_to_virt(io_tlb_start);
+	if (io_tlb_start != 0)
 		goto end;
-	}
 
 	/*
 	 * Get IO TLB memory from any location.
 	 */
 	if (early) {
-		xen_io_tlb_start = memblock_alloc(PAGE_ALIGN(bytes),
+		start = memblock_alloc(PAGE_ALIGN(bytes),
 						  PAGE_SIZE);
-		if (!xen_io_tlb_start)
+		if (!start)
 			panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
 			      __func__, PAGE_ALIGN(bytes), PAGE_SIZE);
 	} else {
 #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
 #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
 		while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
-			xen_io_tlb_start = (void *)xen_get_swiotlb_free_pages(order);
-			if (xen_io_tlb_start)
+			start = (void *)xen_get_swiotlb_free_pages(order);
+			if (start)
 				break;
 			order--;
 		}
 		if (order != get_order(bytes)) {
 			pr_warn("Warning: only able to allocate %ld MB for software IO TLB\n",
 				(PAGE_SIZE << order) >> 20);
-			xen_io_tlb_nslabs = SLABS_PER_PAGE << order;
-			bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT;
+			nslabs = SLABS_PER_PAGE << order;
+			bytes = nslabs << IO_TLB_SHIFT;
 		}
 	}
-	if (!xen_io_tlb_start) {
+	if (!start) {
 		m_ret = XEN_SWIOTLB_ENOMEM;
 		goto error;
 	}
 	/*
 	 * And replace that memory with pages under 4GB.
 	 */
-	rc = xen_swiotlb_fixup(xen_io_tlb_start,
+	rc = xen_swiotlb_fixup(start,
 			       bytes,
-			       xen_io_tlb_nslabs);
+			       nslabs);
 	if (rc) {
 		if (early)
-			memblock_free(__pa(xen_io_tlb_start),
+			memblock_free(__pa(start),
 				      PAGE_ALIGN(bytes));
 		else {
-			free_pages((unsigned long)xen_io_tlb_start, order);
-			xen_io_tlb_start = NULL;
+			free_pages((unsigned long)start, order);
+			start = NULL;
 		}
 		m_ret = XEN_SWIOTLB_EFIXUP;
 		goto error;
 	}
 	if (early) {
-		if (swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs,
+		if (swiotlb_init_with_tbl(start, nslabs,
 			 verbose))
 			panic("Cannot allocate SWIOTLB buffer");
 		rc = 0;
 	} else
-		rc = swiotlb_late_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs);
+		rc = swiotlb_late_init_with_tbl(start, nslabs);
 
 end:
 	if (!rc)
@@ -246,17 +239,17 @@ int __ref xen_swiotlb_init(int verbose, bool early)
 	return rc;
 error:
 	if (repeat--) {
-		xen_io_tlb_nslabs = max(1024UL, /* Min is 2MB */
-					(xen_io_tlb_nslabs >> 1));
+		nslabs = max(1024UL, /* Min is 2MB */
+					(nslabs >> 1));
 		pr_info("Lowering to %luMB\n",
-			(xen_io_tlb_nslabs << IO_TLB_SHIFT) >> 20);
+			(nslabs << IO_TLB_SHIFT) >> 20);
 		goto retry;
 	}
 	pr_err("%s (rc:%d)\n", xen_swiotlb_error(m_ret), rc);
 	if (early)
 		panic("%s (rc:%d)", xen_swiotlb_error(m_ret), rc);
 	else
-		free_pages((unsigned long)xen_io_tlb_start, order);
+		free_pages((unsigned long)start, order);
 	return rc;
 }
 
-- 
2.29.2


^ permalink raw reply related

* swiotlb cleanups
From: Christoph Hellwig @ 2021-02-07 16:09 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk
  Cc: iommu, xen-devel, Claire Chang, linuxppc-dev, Dongli Zhang

Hi Konrad,

this series contains a bunch of swiotlb cleanups, mostly to reduce the
amount of internals exposed to code outside of swiotlb.c, which should
helper to prepare for supporting multiple different bounce buffer pools.

^ permalink raw reply

* [PATCH 4/8] xen-swiotlb: remove xen_set_nslabs
From: Christoph Hellwig @ 2021-02-07 16:09 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk
  Cc: iommu, xen-devel, Claire Chang, linuxppc-dev, Dongli Zhang
In-Reply-To: <20210207160934.2955931-1-hch@lst.de>

The xen_set_nslabs function is a little weird, as it has just one
caller, that caller passes a global variable as the argument,
which is then overriden in the function and a derivative of it
returned.  Just add a cpp symbol for the default size using a readable
constant and open code the remaining three lines in the caller.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/xen/swiotlb-xen.c | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 4298f74a083985..57f8d5fadc1fcd 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -138,16 +138,6 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)
 	} while (i < nslabs);
 	return 0;
 }
-static unsigned long xen_set_nslabs(unsigned long nr_tbl)
-{
-	if (!nr_tbl) {
-		xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT);
-		xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE);
-	} else
-		xen_io_tlb_nslabs = nr_tbl;
-
-	return xen_io_tlb_nslabs << IO_TLB_SHIFT;
-}
 
 enum xen_swiotlb_err {
 	XEN_SWIOTLB_UNKNOWN = 0,
@@ -170,6 +160,9 @@ static const char *xen_swiotlb_error(enum xen_swiotlb_err err)
 	}
 	return "";
 }
+
+#define DEFAULT_NSLABS		ALIGN(SZ_64M >> IO_TLB_SHIFT, IO_TLB_SEGSIZE)
+
 int __ref xen_swiotlb_init(int verbose, bool early)
 {
 	unsigned long bytes, order;
@@ -179,8 +172,10 @@ int __ref xen_swiotlb_init(int verbose, bool early)
 
 	xen_io_tlb_nslabs = swiotlb_nr_tbl();
 retry:
-	bytes = xen_set_nslabs(xen_io_tlb_nslabs);
-	order = get_order(xen_io_tlb_nslabs << IO_TLB_SHIFT);
+	if (!xen_io_tlb_nslabs)
+		xen_io_tlb_nslabs = DEFAULT_NSLABS;
+	bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT;
+	order = get_order(bytes);
 
 	/*
 	 * IO TLB memory already allocated. Just use it.
-- 
2.29.2


^ permalink raw reply related

* [PATCH 1/8] powerpc/svm: stop using io_tlb_start
From: Christoph Hellwig @ 2021-02-07 16:09 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk
  Cc: iommu, xen-devel, Claire Chang, linuxppc-dev, Dongli Zhang
In-Reply-To: <20210207160934.2955931-1-hch@lst.de>

Use the local variable that is passed to swiotlb_init_with_tbl for
freeing the memory in the failure case to isolate the code a little
better from swiotlb internals.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/powerpc/platforms/pseries/svm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c
index 7b739cc7a8a93e..b9968ac7cc0789 100644
--- a/arch/powerpc/platforms/pseries/svm.c
+++ b/arch/powerpc/platforms/pseries/svm.c
@@ -56,7 +56,7 @@ void __init svm_swiotlb_init(void)
 		return;
 
 	if (io_tlb_start)
-		memblock_free_early(io_tlb_start,
+		memblock_free_early(__pa(vstart),
 				    PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
 	panic("SVM: Cannot allocate SWIOTLB buffer");
 }
-- 
2.29.2


^ permalink raw reply related

* [PATCH 3/8] xen-swiotlb: use io_tlb_end in xen_swiotlb_dma_supported
From: Christoph Hellwig @ 2021-02-07 16:09 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk
  Cc: iommu, xen-devel, Claire Chang, linuxppc-dev, Dongli Zhang
In-Reply-To: <20210207160934.2955931-1-hch@lst.de>

Use the existing variable that holds the physical address for
xen_io_tlb_end to simplify xen_swiotlb_dma_supported a bit, and remove
the otherwise unused xen_io_tlb_end variable and the xen_virt_to_bus
helper.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/xen/swiotlb-xen.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index a4026822a889f7..4298f74a083985 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -46,7 +46,7 @@
  * API.
  */
 
-static char *xen_io_tlb_start, *xen_io_tlb_end;
+static char *xen_io_tlb_start;
 static unsigned long xen_io_tlb_nslabs;
 /*
  * Quick lookup value of the bus address of the IOTLB.
@@ -82,11 +82,6 @@ static inline phys_addr_t xen_dma_to_phys(struct device *dev,
 	return xen_bus_to_phys(dev, dma_to_phys(dev, dma_addr));
 }
 
-static inline dma_addr_t xen_virt_to_bus(struct device *dev, void *address)
-{
-	return xen_phys_to_dma(dev, virt_to_phys(address));
-}
-
 static inline int range_straddles_page_boundary(phys_addr_t p, size_t size)
 {
 	unsigned long next_bfn, xen_pfn = XEN_PFN_DOWN(p);
@@ -250,7 +245,6 @@ int __ref xen_swiotlb_init(int verbose, bool early)
 		rc = swiotlb_late_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs);
 
 end:
-	xen_io_tlb_end = xen_io_tlb_start + bytes;
 	if (!rc)
 		swiotlb_set_max_segment(PAGE_SIZE);
 
@@ -558,7 +552,7 @@ xen_swiotlb_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
 static int
 xen_swiotlb_dma_supported(struct device *hwdev, u64 mask)
 {
-	return xen_virt_to_bus(hwdev, xen_io_tlb_end - 1) <= mask;
+	return xen_phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
 }
 
 const struct dma_map_ops xen_swiotlb_dma_ops = {
-- 
2.29.2


^ permalink raw reply related

* [PATCH 6/8] swiotlb: lift the double initialization protection from xen-swiotlb
From: Christoph Hellwig @ 2021-02-07 16:09 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk
  Cc: iommu, xen-devel, Claire Chang, linuxppc-dev, Dongli Zhang
In-Reply-To: <20210207160934.2955931-1-hch@lst.de>

Lift the double initialization protection from xen-swiotlb to the core
code to avoid exposing too many swiotlb internals.  Also upgrade the
check to a warning as it should not happen.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/xen/swiotlb-xen.c | 7 -------
 kernel/dma/swiotlb.c      | 8 ++++++++
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 6e0f2c5ecd1a2f..e6c8556e879ee6 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -172,12 +172,6 @@ int __ref xen_swiotlb_init(int verbose, bool early)
 	bytes = nslabs << IO_TLB_SHIFT;
 	order = get_order(bytes);
 
-	/*
-	 * IO TLB memory already allocated. Just use it.
-	 */
-	if (io_tlb_start != 0)
-		goto end;
-
 	/*
 	 * Get IO TLB memory from any location.
 	 */
@@ -232,7 +226,6 @@ int __ref xen_swiotlb_init(int verbose, bool early)
 	} else
 		rc = swiotlb_late_init_with_tbl(start, nslabs);
 
-end:
 	if (!rc)
 		swiotlb_set_max_segment(PAGE_SIZE);
 
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index adcc3c87e10078..a3737961ae5769 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -224,6 +224,10 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
 	unsigned long i, bytes;
 	size_t alloc_size;
 
+	/* protect against double initialization */
+	if (WARN_ON_ONCE(io_tlb_start))
+		return -ENOMEM;
+
 	bytes = nslabs << IO_TLB_SHIFT;
 
 	io_tlb_nslabs = nslabs;
@@ -355,6 +359,10 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 {
 	unsigned long i, bytes;
 
+	/* protect against double initialization */
+	if (WARN_ON_ONCE(io_tlb_start))
+		return -ENOMEM;
+
 	bytes = nslabs << IO_TLB_SHIFT;
 
 	io_tlb_nslabs = nslabs;
-- 
2.29.2


^ permalink raw reply related

* [PATCH 2/8] xen-swiotlb: use is_swiotlb_buffer in is_xen_swiotlb_buffer
From: Christoph Hellwig @ 2021-02-07 16:09 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk
  Cc: iommu, xen-devel, Claire Chang, linuxppc-dev, Dongli Zhang
In-Reply-To: <20210207160934.2955931-1-hch@lst.de>

Use the is_swiotlb_buffer to check if a physical address is
a swiotlb buffer.  This works because xen-swiotlb does use the
same buffer as the main swiotlb code, and xen_io_tlb_{start,end}
are just the addresses for it that went through phys_to_virt.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/xen/swiotlb-xen.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 2b385c1b4a99cb..a4026822a889f7 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -111,10 +111,8 @@ static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr)
 	 * have the same virtual address as another address
 	 * in our domain. Therefore _only_ check address within our domain.
 	 */
-	if (pfn_valid(PFN_DOWN(paddr))) {
-		return paddr >= virt_to_phys(xen_io_tlb_start) &&
-		       paddr < virt_to_phys(xen_io_tlb_end);
-	}
+	if (pfn_valid(PFN_DOWN(paddr)))
+		return is_swiotlb_buffer(paddr);
 	return 0;
 }
 
-- 
2.29.2


^ permalink raw reply related

* [PATCH 7/8] xen-swiotlb: split xen_swiotlb_init
From: Christoph Hellwig @ 2021-02-07 16:09 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk
  Cc: iommu, xen-devel, Claire Chang, linuxppc-dev, Dongli Zhang
In-Reply-To: <20210207160934.2955931-1-hch@lst.de>

Split xen_swiotlb_init into a normal an an early case.  That makes both
much simpler and more readable, and also allows marking the early
code as __init and x86-only.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/arm/xen/mm.c              |   2 +-
 arch/x86/xen/pci-swiotlb-xen.c |   4 +-
 drivers/xen/swiotlb-xen.c      | 124 +++++++++++++++++++--------------
 include/xen/swiotlb-xen.h      |   3 +-
 4 files changed, 75 insertions(+), 58 deletions(-)

diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 467fa225c3d0ed..aae950cd053fea 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -140,7 +140,7 @@ static int __init xen_mm_init(void)
 	struct gnttab_cache_flush cflush;
 	if (!xen_initial_domain())
 		return 0;
-	xen_swiotlb_init(1, false);
+	xen_swiotlb_init();
 
 	cflush.op = 0;
 	cflush.a.dev_bus_addr = 0;
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index 19ae3e4fe4e98e..54f9aa7e845739 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -59,7 +59,7 @@ int __init pci_xen_swiotlb_detect(void)
 void __init pci_xen_swiotlb_init(void)
 {
 	if (xen_swiotlb) {
-		xen_swiotlb_init(1, true /* early */);
+		xen_swiotlb_init_early();
 		dma_ops = &xen_swiotlb_dma_ops;
 
 #ifdef CONFIG_PCI
@@ -76,7 +76,7 @@ int pci_xen_swiotlb_init_late(void)
 	if (xen_swiotlb)
 		return 0;
 
-	rc = xen_swiotlb_init(1, false /* late */);
+	rc = xen_swiotlb_init();
 	if (rc)
 		return rc;
 
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index e6c8556e879ee6..b2d9e77059bf5a 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -156,96 +156,112 @@ static const char *xen_swiotlb_error(enum xen_swiotlb_err err)
 
 #define DEFAULT_NSLABS		ALIGN(SZ_64M >> IO_TLB_SHIFT, IO_TLB_SEGSIZE)
 
-int __ref xen_swiotlb_init(int verbose, bool early)
+int __ref xen_swiotlb_init(void)
 {
-	unsigned long bytes, order;
-	int rc = -ENOMEM;
 	enum xen_swiotlb_err m_ret = XEN_SWIOTLB_UNKNOWN;
+	unsigned long nslabs, bytes, order;
 	unsigned int repeat = 3;
+	int rc = -ENOMEM;
 	char *start;
-	unsigned long nslabs;
 
 	nslabs = swiotlb_nr_tbl();
-retry:
 	if (!nslabs)
 		nslabs = DEFAULT_NSLABS;
+retry:
+	m_ret = XEN_SWIOTLB_ENOMEM;
 	bytes = nslabs << IO_TLB_SHIFT;
 	order = get_order(bytes);
 
 	/*
 	 * Get IO TLB memory from any location.
 	 */
-	if (early) {
-		start = memblock_alloc(PAGE_ALIGN(bytes),
-						  PAGE_SIZE);
-		if (!start)
-			panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
-			      __func__, PAGE_ALIGN(bytes), PAGE_SIZE);
-	} else {
 #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
 #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
-		while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
-			start = (void *)xen_get_swiotlb_free_pages(order);
-			if (start)
-				break;
-			order--;
-		}
-		if (order != get_order(bytes)) {
-			pr_warn("Warning: only able to allocate %ld MB for software IO TLB\n",
-				(PAGE_SIZE << order) >> 20);
-			nslabs = SLABS_PER_PAGE << order;
-			bytes = nslabs << IO_TLB_SHIFT;
-		}
+	while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
+		start = (void *)xen_get_swiotlb_free_pages(order);
+		if (start)
+			break;
+		order--;
 	}
-	if (!start) {
-		m_ret = XEN_SWIOTLB_ENOMEM;
+	if (!start)
 		goto error;
+	if (order != get_order(bytes)) {
+		pr_warn("Warning: only able to allocate %ld MB for software IO TLB\n",
+			(PAGE_SIZE << order) >> 20);
+		nslabs = SLABS_PER_PAGE << order;
+		bytes = nslabs << IO_TLB_SHIFT;
 	}
+
 	/*
 	 * And replace that memory with pages under 4GB.
 	 */
-	rc = xen_swiotlb_fixup(start,
-			       bytes,
-			       nslabs);
+	rc = xen_swiotlb_fixup(start, bytes, nslabs);
 	if (rc) {
-		if (early)
-			memblock_free(__pa(start),
-				      PAGE_ALIGN(bytes));
-		else {
-			free_pages((unsigned long)start, order);
-			start = NULL;
-		}
+		free_pages((unsigned long)start, order);
 		m_ret = XEN_SWIOTLB_EFIXUP;
 		goto error;
 	}
-	if (early) {
-		if (swiotlb_init_with_tbl(start, nslabs,
-			 verbose))
-			panic("Cannot allocate SWIOTLB buffer");
-		rc = 0;
-	} else
-		rc = swiotlb_late_init_with_tbl(start, nslabs);
-
-	if (!rc)
-		swiotlb_set_max_segment(PAGE_SIZE);
-
-	return rc;
+	rc = swiotlb_late_init_with_tbl(start, nslabs);
+	if (rc)
+		return rc;
+	swiotlb_set_max_segment(PAGE_SIZE);
+	return 0;
 error:
 	if (repeat--) {
-		nslabs = max(1024UL, /* Min is 2MB */
-					(nslabs >> 1));
+		/* Min is 2MB */
+		nslabs = max(1024UL, (nslabs >> 1));
 		pr_info("Lowering to %luMB\n",
 			(nslabs << IO_TLB_SHIFT) >> 20);
 		goto retry;
 	}
 	pr_err("%s (rc:%d)\n", xen_swiotlb_error(m_ret), rc);
-	if (early)
-		panic("%s (rc:%d)", xen_swiotlb_error(m_ret), rc);
-	else
-		free_pages((unsigned long)start, order);
+	free_pages((unsigned long)start, order);
 	return rc;
 }
 
+#ifdef CONFIG_X86
+void __init xen_swiotlb_init_early(void)
+{
+	unsigned long nslabs, bytes;
+	unsigned int repeat = 3;
+	char *start;
+	int rc;
+
+	nslabs = swiotlb_nr_tbl();
+	if (!nslabs)
+		nslabs = DEFAULT_NSLABS;
+retry:
+	/*
+	 * Get IO TLB memory from any location.
+	 */
+	bytes = nslabs << IO_TLB_SHIFT;
+	start = memblock_alloc(PAGE_ALIGN(bytes), PAGE_SIZE);
+	if (!start)
+		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+		      __func__, PAGE_ALIGN(bytes), PAGE_SIZE);
+
+	/*
+	 * And replace that memory with pages under 4GB.
+	 */
+	rc = xen_swiotlb_fixup(start, bytes, nslabs);
+	if (rc) {
+		memblock_free(__pa(start), PAGE_ALIGN(bytes));
+		if (repeat--) {
+			/* Min is 2MB */
+			nslabs = max(1024UL, (nslabs >> 1));
+			pr_info("Lowering to %luMB\n",
+				(nslabs << IO_TLB_SHIFT) >> 20);
+			goto retry;
+		}
+		panic("%s (rc:%d)", xen_swiotlb_error(XEN_SWIOTLB_EFIXUP), rc);
+	}
+
+	if (swiotlb_init_with_tbl(start, nslabs, false))
+		panic("Cannot allocate SWIOTLB buffer");
+	swiotlb_set_max_segment(PAGE_SIZE);
+}
+#endif /* CONFIG_X86 */
+
 static void *
 xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 			   dma_addr_t *dma_handle, gfp_t flags,
diff --git a/include/xen/swiotlb-xen.h b/include/xen/swiotlb-xen.h
index d5eaf9d682b804..6206b1ec99168a 100644
--- a/include/xen/swiotlb-xen.h
+++ b/include/xen/swiotlb-xen.h
@@ -9,7 +9,8 @@ void xen_dma_sync_for_cpu(struct device *dev, dma_addr_t handle,
 void xen_dma_sync_for_device(struct device *dev, dma_addr_t handle,
 			     size_t size, enum dma_data_direction dir);
 
-extern int xen_swiotlb_init(int verbose, bool early);
+int xen_swiotlb_init(void);
+void __init xen_swiotlb_init_early(void);
 extern const struct dma_map_ops xen_swiotlb_dma_ops;
 
 #endif /* __LINUX_SWIOTLB_XEN_H */
-- 
2.29.2


^ permalink raw reply related

* [PATCH 8/8] xen-swiotlb: remove the unused size argument from xen_swiotlb_fixup
From: Christoph Hellwig @ 2021-02-07 16:09 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk
  Cc: iommu, xen-devel, Claire Chang, linuxppc-dev, Dongli Zhang
In-Reply-To: <20210207160934.2955931-1-hch@lst.de>

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/xen/swiotlb-xen.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index b2d9e77059bf5a..621a20c1143597 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -104,8 +104,7 @@ static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr)
 	return 0;
 }
 
-static int
-xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)
+static int xen_swiotlb_fixup(void *buf, unsigned long nslabs)
 {
 	int i, rc;
 	int dma_bits;
@@ -195,7 +194,7 @@ int __ref xen_swiotlb_init(void)
 	/*
 	 * And replace that memory with pages under 4GB.
 	 */
-	rc = xen_swiotlb_fixup(start, bytes, nslabs);
+	rc = xen_swiotlb_fixup(start, nslabs);
 	if (rc) {
 		free_pages((unsigned long)start, order);
 		m_ret = XEN_SWIOTLB_EFIXUP;
@@ -243,7 +242,7 @@ void __init xen_swiotlb_init_early(void)
 	/*
 	 * And replace that memory with pages under 4GB.
 	 */
-	rc = xen_swiotlb_fixup(start, bytes, nslabs);
+	rc = xen_swiotlb_fixup(start, nslabs);
 	if (rc) {
 		memblock_free(__pa(start), PAGE_ALIGN(bytes));
 		if (repeat--) {
-- 
2.29.2


^ permalink raw reply related

* Re: [PATCH] mm/memtest: Add ARCH_USE_MEMTEST
From: Anshuman Khandual @ 2021-02-08  3:04 UTC (permalink / raw)
  To: Max Filippov
  Cc: Chris Zankel, Thomas Bogendoerfer,
	open list:TENSILICA XTENSA PORT (xtensa), linuxppc-dev, LKML,
	Russell King, linux-mips, Linux Memory Management List,
	Ingo Molnar, Paul Mackerras, Catalin Marinas, Thomas Gleixner,
	Will Deacon, linux-arm-kernel
In-Reply-To: <CAMo8BfLXaycXgy-F=TaWzpEZZJKEhbZecxwvBVd6jTo0RJ8atQ@mail.gmail.com>



On 2/5/21 1:05 PM, Max Filippov wrote:
> On Thu, Feb 4, 2021 at 8:10 PM Anshuman Khandual
> <anshuman.khandual@arm.com> wrote:
>>
>> early_memtest() does not get called from all architectures. Hence enabling
>> CONFIG_MEMTEST and providing a valid memtest=[1..N] kernel command line
>> option might not trigger the memory pattern tests as would be expected in
>> normal circumstances. This situation is misleading.
>>
>> The change here prevents the above mentioned problem after introducing a
>> new config option ARCH_USE_MEMTEST that should be subscribed on platforms
>> that call early_memtest(), in order to enable the config CONFIG_MEMTEST.
>> Conversely CONFIG_MEMTEST cannot be enabled on platforms where it would
>> not be tested anyway.
>>
>> Cc: Russell King <linux@armlinux.org.uk>
>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>> Cc: Will Deacon <will@kernel.org>
>> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
>> Cc: Michael Ellerman <mpe@ellerman.id.au>
>> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
>> Cc: Paul Mackerras <paulus@samba.org>
>> Cc: Thomas Gleixner <tglx@linutronix.de>
>> Cc: Ingo Molnar <mingo@redhat.com>
>> Cc: Chris Zankel <chris@zankel.net>
>> Cc: Max Filippov <jcmvbkbc@gmail.com>
>> Cc: linux-arm-kernel@lists.infradead.org
>> Cc: linux-mips@vger.kernel.org
>> Cc: linuxppc-dev@lists.ozlabs.org
>> Cc: linux-xtensa@linux-xtensa.org
>> Cc: linux-mm@kvack.org
>> Cc: linux-kernel@vger.kernel.org
>> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
>> ---
>> This patch applies on v5.11-rc6 and has been tested on arm64 platform. But
>> it has been just build tested on all other platforms.
>>
>>  arch/arm/Kconfig     | 1 +
>>  arch/arm64/Kconfig   | 1 +
>>  arch/mips/Kconfig    | 1 +
>>  arch/powerpc/Kconfig | 1 +
>>  arch/x86/Kconfig     | 1 +
>>  arch/xtensa/Kconfig  | 1 +
>>  lib/Kconfig.debug    | 9 ++++++++-
>>  7 files changed, 14 insertions(+), 1 deletion(-)
> 
> Anshuman, entries in arch/*/Konfig files are sorted in alphabetical order,
> please keep them that way.

Sure, will fix up and resend.

> 
> Reviewed-by: Max Filippov <jcmvbkbc@gmail.com>
> 

^ permalink raw reply

* Re: [PATCH] mm/memtest: Add ARCH_USE_MEMTEST
From: Anshuman Khandual @ 2021-02-08  3:17 UTC (permalink / raw)
  To: Vladimir Murzin, linux-mm
  Cc: Chris Zankel, Thomas Bogendoerfer, Catalin Marinas, Will Deacon,
	linux-xtensa, linux-kernel, Russell King, Max Filippov,
	Ingo Molnar, Paul Mackerras, Thomas Gleixner, linux-mips,
	linuxppc-dev, linux-arm-kernel
In-Reply-To: <a58f1a92-087d-cf90-ce2b-0c88b39a8116@arm.com>



On 2/5/21 2:50 PM, Vladimir Murzin wrote:
> Hi Anshuman,
> 
> On 2/5/21 4:10 AM, Anshuman Khandual wrote:
>> early_memtest() does not get called from all architectures. Hence enabling
>> CONFIG_MEMTEST and providing a valid memtest=[1..N] kernel command line
>> option might not trigger the memory pattern tests as would be expected in
>> normal circumstances. This situation is misleading.
> 
> Documentation already mentions which architectures support that:
> 
> memtest=        [KNL,X86,ARM,PPC] Enable memtest
> 
> yet I admit that not all reflected there

But there is nothing that prevents CONFIG_MEMTEST from being set on
other platforms that do not have an affect, which is not optimal.

> 
>>
>> The change here prevents the above mentioned problem after introducing a
>> new config option ARCH_USE_MEMTEST that should be subscribed on platforms
>> that call early_memtest(), in order to enable the config CONFIG_MEMTEST.
>> Conversely CONFIG_MEMTEST cannot be enabled on platforms where it would
>> not be tested anyway.
>>
> 
> Is that generic pattern? What about other cross arch parameters? Do they already
> use similar subscription or they rely on documentation?

Depending solely on the documentation should not be sufficient.

> 
> I'm not against the patch just want to check if things are consistent...
Not sure about other similar situations but those if present should
get fixed as well.

^ permalink raw reply

* [PATCH v3 1/2] powerpc/64s: Fix pte update for kernel memory on radix
From: Jordan Niethe @ 2021-02-08  3:29 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: npiggin, cmr, naveen.n.rao, Jordan Niethe

When adding a pte a ptesync is needed to order the update of the pte
with subsequent accesses otherwise a spurious fault may be raised.

radix__set_pte_at() does not do this for performance gains. For
non-kernel memory this is not an issue as any faults of this kind are
corrected by the page fault handler.  For kernel memory these faults are
not handled.  The current solution is that there is a ptesync in
flush_cache_vmap() which should be called when mapping from the vmalloc
region.

However, map_kernel_page() does not call flush_cache_vmap(). This is
troublesome in particular for code patching with Strict RWX on radix. In
do_patch_instruction() the page frame that contains the instruction to
be patched is mapped and then immediately patched. With no ordering or
synchronization between setting up the pte and writing to the page it is
possible for faults.

As the code patching is done using __put_user_asm_goto() the resulting
fault is obscured - but using a normal store instead it can be seen:

[  418.498768][  T757] BUG: Unable to handle kernel data access on write at 0xc008000008f24a3c
[  418.498790][  T757] Faulting instruction address: 0xc00000000008bd74
[  418.498805][  T757] Oops: Kernel access of bad area, sig: 11 [#1]
[  418.498828][  T757] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA PowerNV
[  418.498843][  T757] Modules linked in: nop_module(PO+) [last unloaded: nop_module]
[  418.498872][  T757] CPU: 4 PID: 757 Comm: sh Tainted: P           O      5.10.0-rc5-01361-ge3c1b78c8440-dirty #43
[  418.498936][  T757] NIP:  c00000000008bd74 LR: c00000000008bd50 CTR: c000000000025810
[  418.498979][  T757] REGS: c000000016f634a0 TRAP: 0300   Tainted: P           O       (5.10.0-rc5-01361-ge3c1b78c8440-dirty)
[  418.499033][  T757] MSR:  9000000000009033 <SF,HV,EE,ME,IR,DR,RI,LE>  CR: 44002884  XER: 00000000
[  418.499084][  T757] CFAR: c00000000007c68c DAR: c008000008f24a3c DSISR: 42000000 IRQMASK: 1

This results in the kind of issue reported here:
https://lore.kernel.org/linuxppc-dev/15AC5B0E-A221-4B8C-9039-FA96B8EF7C88@lca.pw/

Chris Riedl suggested a reliable way to reproduce the issue:
$ mount -t debugfs none /sys/kernel/debug
$ (while true; do echo function > /sys/kernel/debug/tracing/current_tracer ; echo nop > /sys/kernel/debug/tracing/current_tracer ; done)&

Turning ftrace on and off does a large amount of code patching which in
usually less then 5min will crash giving a trace like:

[  146.668988][  T809] ftrace-powerpc: (____ptrval____): replaced (4b473b11) != old (60000000)
[  146.668995][  T809] ------------[ ftrace bug ]------------
[  146.669031][  T809] ftrace failed to modify
[  146.669039][  T809] [<c000000000bf8e5c>] napi_busy_loop+0xc/0x390
[  146.669045][  T809]  actual:   11:3b:47:4b
[  146.669070][  T809] Setting ftrace call site to call ftrace function
[  146.669075][  T809] ftrace record flags: 80000001
[  146.669081][  T809]  (1)
[  146.669081][  T809]  expected tramp: c00000000006c96c
[  146.669096][  T809] ------------[ cut here ]------------
[  146.669104][  T809] WARNING: CPU: 4 PID: 809 at kernel/trace/ftrace.c:2065 ftrace_bug+0x28c/0x2e8
[  146.669109][  T809] Modules linked in: nop_module(PO-) [last unloaded: nop_module]
[  146.669130][  T809] CPU: 4 PID: 809 Comm: sh Tainted: P           O      5.10.0-rc5-01360-gf878ccaf250a #1
[  146.669136][  T809] NIP:  c00000000024f334 LR: c00000000024f330 CTR: c0000000001a5af0
[  146.669142][  T809] REGS: c000000004c8b760 TRAP: 0700   Tainted: P           O       (5.10.0-rc5-01360-gf878ccaf250a)
[  146.669147][  T809] MSR:  900000000282b033 <SF,HV,VEC,VSX,EE,FP,ME,IR,DR,RI,LE>  CR: 28008848  XER: 20040000
[  146.669208][  T809] CFAR: c0000000001a9c98 IRQMASK: 0
[  146.669208][  T809] GPR00: c00000000024f330 c000000004c8b9f0 c000000002770600 0000000000000022
[  146.669208][  T809] GPR04: 00000000ffff7fff c000000004c8b6d0 0000000000000027 c0000007fe9bcdd8
[  146.669208][  T809] GPR08: 0000000000000023 ffffffffffffffd8 0000000000000027 c000000002613118
[  146.669208][  T809] GPR12: 0000000000008000 c0000007fffdca00 0000000000000000 0000000000000000
[  146.669208][  T809] GPR16: 0000000023ec37c5 0000000000000000 0000000000000000 0000000000000008
[  146.669208][  T809] GPR20: c000000004c8bc90 c0000000027a2d20 c000000004c8bcd0 c000000002612fe8
[  146.669208][  T809] GPR24: 0000000000000038 0000000000000030 0000000000000028 0000000000000020
[  146.669208][  T809] GPR28: c000000000ff1b68 c000000000bf8e5c c00000000312f700 c000000000fbb9b0
[  146.669384][  T809] NIP [c00000000024f334] ftrace_bug+0x28c/0x2e8
[  146.669391][  T809] LR [c00000000024f330] ftrace_bug+0x288/0x2e8
[  146.669396][  T809] Call Trace:
[  146.669403][  T809] [c000000004c8b9f0] [c00000000024f330] ftrace_bug+0x288/0x2e8 (unreliable)
[  146.669418][  T809] [c000000004c8ba80] [c000000000248778] ftrace_modify_all_code+0x168/0x210
[  146.669429][  T809] [c000000004c8bab0] [c00000000006c528] arch_ftrace_update_code+0x18/0x30
[  146.669440][  T809] [c000000004c8bad0] [c000000000248954] ftrace_run_update_code+0x44/0xc0
[  146.669451][  T809] [c000000004c8bb00] [c00000000024dc88] ftrace_startup+0xf8/0x1c0
[  146.669461][  T809] [c000000004c8bb40] [c00000000024dd9c] register_ftrace_function+0x4c/0xc0
[  146.669472][  T809] [c000000004c8bb70] [c00000000026e750] function_trace_init+0x80/0xb0
[  146.669484][  T809] [c000000004c8bba0] [c000000000266b84] tracing_set_tracer+0x2a4/0x4f0
[  146.669495][  T809] [c000000004c8bc70] [c000000000266ea4] tracing_set_trace_write+0xd4/0x130
[  146.669506][  T809] [c000000004c8bd20] [c000000000422790] vfs_write+0xf0/0x330
[  146.669518][  T809] [c000000004c8bd70] [c000000000422bb4] ksys_write+0x84/0x140
[  146.669529][  T809] [c000000004c8bdc0] [c00000000003499c] system_call_exception+0x14c/0x230
[  146.669540][  T809] [c000000004c8be20] [c00000000000d860] system_call_common+0xf0/0x27c
[  146.669549][  T809] Instruction dump:
[  146.669558][  T809] 48000014 3c62fe88 38631718 4bf5a941 60000000 7fc3f378 4bff877d 7c641b78
[  146.669598][  T809] 3c62fe88 38631730 4bf5a925 60000000 <0fe00000> 38210090 3d22fd90 39000001
[  146.669638][  T809] ---[ end trace 5ea7076ea28c0fbd ]---

To fix this when updating kernel memory ptes using ptesync.

Fixes: f1cb8f9beba8 ("powerpc/64s/radix: avoid ptesync after set_pte and ptep_set_access_flags")
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
---
v2: Only ptesync is needed
v3: Fix Fixes tag
---
 arch/powerpc/include/asm/book3s/64/radix.h | 6 ++++--
 arch/powerpc/mm/book3s64/radix_pgtable.c   | 4 ++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index c7813dc628fc..59cab558e2f0 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -222,8 +222,10 @@ static inline void radix__set_pte_at(struct mm_struct *mm, unsigned long addr,
 	 * from ptesync, it should probably go into update_mmu_cache, rather
 	 * than set_pte_at (which is used to set ptes unrelated to faults).
 	 *
-	 * Spurious faults to vmalloc region are not tolerated, so there is
-	 * a ptesync in flush_cache_vmap.
+	 * Spurious faults from the kernel memory are not tolerated, so there
+	 * is a ptesync in flush_cache_vmap, and __map_kernel_page() follows
+	 * the pte update sequence from ISA Book III 6.10 Translation Table
+	 * Update Synchronization Requirements.
 	 */
 }
 
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 3adcf730f478..1d5eec847b88 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -108,7 +108,7 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa,
 
 set_the_pte:
 	set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
-	smp_wmb();
+	asm volatile("ptesync": : :"memory");
 	return 0;
 }
 
@@ -168,7 +168,7 @@ static int __map_kernel_page(unsigned long ea, unsigned long pa,
 
 set_the_pte:
 	set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
-	smp_wmb();
+	asm volatile("ptesync": : :"memory");
 	return 0;
 }
 
-- 
2.25.1


^ permalink raw reply related

* [PATCH v3 2/2] selftests/powerpc: Test for spurious kernel memory faults on radix
From: Jordan Niethe @ 2021-02-08  3:29 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: npiggin, cmr, naveen.n.rao, Jordan Niethe
In-Reply-To: <20210208032957.1232102-1-jniethe5@gmail.com>

Previously when mapping kernel memory on radix, no ptesync was included
which would periodically lead to unhandled spurious faults. Mapping
kernel memory is used when code patching with Strict RWX enabled.  As
suggested by Chris Riedl, turning ftrace on and off does a large amount
of code patching so is a convenient way to see this kind of fault.

Add a selftest to try and trigger this kind of a spurious fault. It
tests for 30 seconds which is usually long enough for the issue to show
up.

Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
---
v3: New to series
---
 tools/testing/selftests/powerpc/mm/Makefile   |  1 +
 .../selftests/powerpc/mm/spurious_fault.sh    | 49 +++++++++++++++++++
 2 files changed, 50 insertions(+)
 create mode 100755 tools/testing/selftests/powerpc/mm/spurious_fault.sh

diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index defe488d6bf1..56c2896bed53 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -5,6 +5,7 @@ noarg:
 TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \
 		  large_vm_fork_separation bad_accesses pkey_exec_prot \
 		  pkey_siginfo stack_expansion_signal stack_expansion_ldst
+TEST_PROGS := spurious_fault.sh
 
 TEST_GEN_PROGS_EXTENDED := tlbie_test
 TEST_GEN_FILES := tempfile
diff --git a/tools/testing/selftests/powerpc/mm/spurious_fault.sh b/tools/testing/selftests/powerpc/mm/spurious_fault.sh
new file mode 100755
index 000000000000..e454509659f6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/spurious_fault.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+TIMEOUT=30
+
+DEBUFS_DIR=`cat /proc/mounts | grep debugfs | awk '{print $2}'`
+if [ ! -e "$DEBUFS_DIR" ]
+then
+	echo "debugfs not found, skipping" 1>&2
+	exit 4
+fi
+
+if [ ! -e "$DEBUFS_DIR/tracing/current_tracer" ]
+then
+	echo "Tracing files not found, skipping" 1>&2
+	exit 4
+fi
+
+
+echo "Testing for spurious faults when mapping kernel memory..."
+
+if grep -q "FUNCTION TRACING IS CORRUPTED" "$DEBUFS_DIR/tracing/trace"
+then
+	echo "FAILED: Ftrace already dead. Probably due to a spurious fault" 1>&2
+	exit 1
+fi
+
+dmesg -C
+START_TIME=`date +%s`
+END_TIME=`expr $START_TIME + $TIMEOUT`
+while [ `date +%s` -lt $END_TIME ]
+do
+	echo function > $DEBUFS_DIR/tracing/current_tracer
+	echo nop > $DEBUFS_DIR/tracing/current_tracer
+	if dmesg | grep -q 'ftrace bug'
+	then
+		break
+	fi
+done
+
+echo nop > $DEBUFS_DIR/tracing/current_tracer
+if dmesg | grep -q 'ftrace bug'
+then
+	echo "FAILED: Mapping kernel memory causes spurious faults" 1>&2
+	exit 1
+else
+	echo "OK: Mapping kernel memory does not cause spurious faults"
+	exit 0
+fi
-- 
2.25.1


^ permalink raw reply related

* [PATCH] ASoC: imx-hdmi: no need to set .owner when using module_platform_driver
From: Tian Tao @ 2021-02-08  3:51 UTC (permalink / raw)
  To: timur, nicoleotsuka, Xiubo.Lee, perex, tiwai, shawnguo, s.hauer
  Cc: alsa-devel, linuxppc-dev, linux-arm-kernel

the module_platform_driver will call platform_driver_register.
and It will set the .owner to THIS_MODULE

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
---
 sound/soc/fsl/imx-hdmi.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sound/soc/fsl/imx-hdmi.c b/sound/soc/fsl/imx-hdmi.c
index dbbb7618..cd0235a 100644
--- a/sound/soc/fsl/imx-hdmi.c
+++ b/sound/soc/fsl/imx-hdmi.c
@@ -223,7 +223,6 @@ MODULE_DEVICE_TABLE(of, imx_hdmi_dt_ids);
 static struct platform_driver imx_hdmi_driver = {
 	.driver = {
 		.name = "imx-hdmi",
-		.owner = THIS_MODULE,
 		.pm = &snd_soc_pm_ops,
 		.of_match_table = imx_hdmi_dt_ids,
 	},
-- 
2.7.4


^ permalink raw reply related

* Re: [PATCH v16 11/12] powerpc: Use OF alloc and free for FDT
From: Michael Ellerman @ 2021-02-08  4:12 UTC (permalink / raw)
  To: Rob Herring, Lakshmi Ramasubramanian
  Cc: Mark Rutland, Bhupesh Sharma, tao.li, Mimi Zohar, Paul Mackerras,
	vincenzo.frascino, Frank Rowand, Sasha Levin, Will Deacon,
	Masahiro Yamada, James Morris, AKASHI, Takahiro, linux-arm-kernel,
	Catalin Marinas, Serge E. Hallyn, devicetree, Pavel Tatashin,
	Prakhar Srivastava, Hsin-Yi Wang, Allison Randal,
	Christophe Leroy, Matthias Brugger, balajib, dmitry.kasatkin,
	linux-kernel@vger.kernel.org, James Morse, Greg Kroah-Hartman,
	Joe Perches, linux-integrity, linuxppc-dev, Thiago Jung Bauermann
In-Reply-To: <CAL_JsqK1Pb9nAeL84EP2U3MQgpBsm+E_0QXmzbigWXnS245WPQ@mail.gmail.com>

Rob Herring <robh@kernel.org> writes:
> On Thu, Feb 4, 2021 at 10:42 AM Lakshmi Ramasubramanian
> <nramas@linux.microsoft.com> wrote:
...
>> diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
>> index d0e459bb2f05..51d2d8eb6c1b 100644
>> --- a/arch/powerpc/kexec/elf_64.c
>> +++ b/arch/powerpc/kexec/elf_64.c
>> @@ -19,6 +19,7 @@
>>  #include <linux/kexec.h>
>>  #include <linux/libfdt.h>
>>  #include <linux/module.h>
>> +#include <linux/of.h>
>>  #include <linux/of_fdt.h>
>>  #include <linux/slab.h>
>>  #include <linux/types.h>
>> @@ -32,7 +33,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
>>         unsigned int fdt_size;
>>         unsigned long kernel_load_addr;
>>         unsigned long initrd_load_addr = 0, fdt_load_addr;
>> -       void *fdt;
>> +       void *fdt = NULL;
>>         const void *slave_code;
>>         struct elfhdr ehdr;
>>         char *modified_cmdline = NULL;
>> @@ -103,18 +104,12 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
>>         }
>>
>>         fdt_size = fdt_totalsize(initial_boot_params) * 2;
>> -       fdt = kmalloc(fdt_size, GFP_KERNEL);
>> +       fdt = of_alloc_and_init_fdt(fdt_size);
>>         if (!fdt) {
>>                 pr_err("Not enough memory for the device tree.\n");
>>                 ret = -ENOMEM;
>>                 goto out;
>>         }
>> -       ret = fdt_open_into(initial_boot_params, fdt, fdt_size);
>> -       if (ret < 0) {
>> -               pr_err("Error setting up the new device tree.\n");
>> -               ret = -EINVAL;
>> -               goto out;
>> -       }
>>
>>         ret = setup_new_fdt_ppc64(image, fdt, initrd_load_addr,
>
> The first thing this function does is call setup_new_fdt() which first
> calls of_kexec_setup_new_fdt(). (Note, I really don't understand the
> PPC code split. It looks like there's a 32-bit and 64-bit split, but
> 32-bit looks broken to me. Nothing ever calls setup_new_fdt() except
> setup_new_fdt_ppc64()).

I think that's because 32-bit doesn't support kexec_file_load().

cheers

^ permalink raw reply

* Re: [PATCH v5 03/10] powerpc/signal64: Move non-inline functions out of setup_sigcontext()
From: Daniel Axtens @ 2021-02-08  4:44 UTC (permalink / raw)
  To: Christopher M. Riedl, linuxppc-dev
In-Reply-To: <20210203184323.20792-4-cmr@codefail.de>

Hi Chris,

These two paragraphs are a little confusing and they seem slightly
repetitive. But I get the general idea. Two specific comments below:

> There are non-inline functions which get called in setup_sigcontext() to
> save register state to the thread struct. Move these functions into a
> separate prepare_setup_sigcontext() function so that
> setup_sigcontext() can be refactored later into an "unsafe" version
> which assumes an open uaccess window. Non-inline functions should be
> avoided when uaccess is open.

Why do we want to avoid non-inline functions? We came up with:

 - we want KUAP protection for as much of the kernel as possible: each
   extra bit of code run with the window open is another piece of attack
   surface.
   
 - non-inline functions default to traceable, which means we could end
   up ftracing while uaccess is enabled. That's a pretty big hole in the
   defences that KUAP provides.

I think we've also had problems with the window being opened or closed
unexpectedly by various bits of code? So the less code runs in uaccess
context the less likely that is to occur.
 
> The majority of setup_sigcontext() can be refactored to execute in an
> "unsafe" context (uaccess window is opened) except for some non-inline
> functions. Move these out into a separate prepare_setup_sigcontext()
> function which must be called first and before opening up a uaccess
> window. A follow-up commit converts setup_sigcontext() to be "unsafe".

This was a bit confusing until we realise that you're moving the _calls_
to the non-inline functions out, not the non-inline functions themselves.

> Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
> ---
>  arch/powerpc/kernel/signal_64.c | 32 +++++++++++++++++++++-----------
>  1 file changed, 21 insertions(+), 11 deletions(-)
>
> diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
> index f9e4a1ac440f..b211a8ea4f6e 100644
> --- a/arch/powerpc/kernel/signal_64.c
> +++ b/arch/powerpc/kernel/signal_64.c
> @@ -79,6 +79,24 @@ static elf_vrreg_t __user *sigcontext_vmx_regs(struct sigcontext __user *sc)
>  }
>  #endif
>  
> +static void prepare_setup_sigcontext(struct task_struct *tsk, int ctx_has_vsx_region)

ctx_has_vsx_region should probably be a bool? Although setup_sigcontext
also has it as an int so I guess that's arguable, and maybe it's better
to stick with this for constency.

> +{
> +#ifdef CONFIG_ALTIVEC
> +	/* save altivec registers */
> +	if (tsk->thread.used_vr)
> +		flush_altivec_to_thread(tsk);
> +	if (cpu_has_feature(CPU_FTR_ALTIVEC))
> +		tsk->thread.vrsave = mfspr(SPRN_VRSAVE);
> +#endif /* CONFIG_ALTIVEC */
> +
> +	flush_fp_to_thread(tsk);
> +
> +#ifdef CONFIG_VSX
> +	if (tsk->thread.used_vsr && ctx_has_vsx_region)
> +		flush_vsx_to_thread(tsk);
> +#endif /* CONFIG_VSX */

Alternatively, given that this is the only use of ctx_has_vsx_region,
mpe suggested that perhaps we could drop it entirely and always
flush_vsx if used_vsr. The function is only ever called with either
`current` or wth ctx_has_vsx_region set to 1, so in either case I think
that's safe? I'm not sure if it would have performance implications.

Should we move this and the altivec ifdef to IS_ENABLED(CONFIG_VSX) etc?
I'm not sure if that runs into any problems with things like 'used_vsr'
only being defined if CONFIG_VSX is set, but I thought I'd ask.


> +}
> +
>  /*
>   * Set up the sigcontext for the signal frame.
>   */
> @@ -97,7 +115,6 @@ static long setup_sigcontext(struct sigcontext __user *sc,
>  	 */
>  #ifdef CONFIG_ALTIVEC
>  	elf_vrreg_t __user *v_regs = sigcontext_vmx_regs(sc);
> -	unsigned long vrsave;
>  #endif
>  	struct pt_regs *regs = tsk->thread.regs;
>  	unsigned long msr = regs->msr;
> @@ -112,7 +129,6 @@ static long setup_sigcontext(struct sigcontext __user *sc,
>  
>  	/* save altivec registers */
>  	if (tsk->thread.used_vr) {
> -		flush_altivec_to_thread(tsk);
>  		/* Copy 33 vec registers (vr0..31 and vscr) to the stack */
>  		err |= __copy_to_user(v_regs, &tsk->thread.vr_state,
>  				      33 * sizeof(vector128));
> @@ -124,17 +140,10 @@ static long setup_sigcontext(struct sigcontext __user *sc,
>  	/* We always copy to/from vrsave, it's 0 if we don't have or don't
>  	 * use altivec.
>  	 */
> -	vrsave = 0;
> -	if (cpu_has_feature(CPU_FTR_ALTIVEC)) {
> -		vrsave = mfspr(SPRN_VRSAVE);
> -		tsk->thread.vrsave = vrsave;
> -	}
> -
> -	err |= __put_user(vrsave, (u32 __user *)&v_regs[33]);
> +	err |= __put_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33]);

Previously, if !cpu_has_feature(ALTIVEC), v_regs[33] had vrsave stored,
which was set to 0 explicitly. Now we store thread.vrsave instead of the
local vrsave. That should be safe - it is initalised to 0 elsewhere.

So you don't have to do anything here, this is just letting you know
that we checked it and thought about it.

>  #else /* CONFIG_ALTIVEC */
>  	err |= __put_user(0, &sc->v_regs);
>  #endif /* CONFIG_ALTIVEC */
> -	flush_fp_to_thread(tsk);
>  	/* copy fpr regs and fpscr */
>  	err |= copy_fpr_to_user(&sc->fp_regs, tsk);
>  
> @@ -150,7 +159,6 @@ static long setup_sigcontext(struct sigcontext __user *sc,
>  	 * VMX data.
>  	 */
>  	if (tsk->thread.used_vsr && ctx_has_vsx_region) {
> -		flush_vsx_to_thread(tsk);
>  		v_regs += ELF_NVRREG;
>  		err |= copy_vsx_to_user(v_regs, tsk);
>  		/* set MSR_VSX in the MSR value in the frame to
> @@ -655,6 +663,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
>  		ctx_has_vsx_region = 1;
>  
>  	if (old_ctx != NULL) {
> +		prepare_setup_sigcontext(current, ctx_has_vsx_region);
>  		if (!access_ok(old_ctx, ctx_size)
>  		    || setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL, 0,
>  					ctx_has_vsx_region)

I had a think about whether there was a problem with bubbling
prepare_setup_sigcontext over the access_ok() test, but given that
prepare_setup_sigcontext(current ...) doesn't access any of old_ctx, I'm
satisfied that it's OK - no changes needed.


> @@ -842,6 +851,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
>  #endif
>  	{
>  		err |= __put_user(0, &frame->uc.uc_link);
> +		prepare_setup_sigcontext(tsk, 1);

Why do we call with ctx_has_vsx_region = 1 here?  It's not immediately
clear to me why this is correct, but mpe and Mikey seem pretty convinced
that it is.

>  		err |= setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
>  					NULL, (unsigned long)ksig->ka.sa.sa_handler,
>  					1);


Finally, it's a bit hard to figure out where to put this, but we spent
some time making sure that the various things you moved into the
prepare_setup_sigcontext() function were called under the same
circumstances as they were before, and there were no concerns there.

Kind regards,
Daniel

^ permalink raw reply

* Re: [RFC Qemu PATCH v2 1/2] spapr: drc: Add support for async hcalls at the drc level
From: David Gibson @ 2021-02-08  6:21 UTC (permalink / raw)
  To: Shivaprasad G Bhat
  Cc: xiaoguangrong.eric, mst, aneesh.kumar, linux-nvdimm, qemu-devel,
	kvm-ppc, Greg Kurz, shivaprasadbhat, qemu-ppc, bharata, imammedo,
	linuxppc-dev
In-Reply-To: <3b47312a-217f-8df5-0bfd-1a653598abad@linux.ibm.com>

[-- Attachment #1: Type: text/plain, Size: 1890 bytes --]

On Tue, Jan 19, 2021 at 12:40:31PM +0530, Shivaprasad G Bhat wrote:
> Thanks for the comments!
> 
> 
> On 12/28/20 2:08 PM, David Gibson wrote:
> 
> > On Mon, Dec 21, 2020 at 01:08:53PM +0100, Greg Kurz wrote:
> ...
> > > The overall idea looks good but I think you should consider using
> > > a thread pool to implement it. See below.
> > I am not convinced, however.  Specifically, attaching this to the DRC
> > doesn't make sense to me.  We're adding exactly one DRC related async
> > hcall, and I can't really see much call for another one.  We could
> > have other async hcalls - indeed we already have one for HPT resizing
> > - but attaching this to DRCs doesn't help for those.
> 
> The semantics of the hcall made me think, if this is going to be
> re-usable for future if implemented at DRC level.

It would only be re-usable for operations that are actually connected
to DRCs.  It doesn't seem to me particularly likely that we'll ever
have more asynchronous hcalls that are also associated with DRCs.

> Other option
> is to move the async-hcall-state/list into the NVDIMMState structure
> in include/hw/mem/nvdimm.h and handle it with machine->nvdimms_state
> at a global level.

I'm ok with either of two options:

A) Implement this ad-hoc for this specific case, making whatever
simplifications you can based on this specific case.

B) Implement a general mechanism for async hcalls that is *not* tied
to DRCs.  Then use that for the existing H_RESIZE_HPT_PREPARE call as
well as this new one.

> Hope you are okay with using the pool based approach that Greg

Honestly a thread pool seems like it might be overkill for this
application.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* [PATCH] powerpc/64s: syscall real mode entry use mtmsrd rather than rfid
From: Nicholas Piggin @ 2021-02-08  6:33 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

Have the real mode system call entry handler branch to the kernel
0xc000... address and then use mtmsrd to enable the MMU, rather than use
SRRs and rfid.

Commit 8729c26e675c ("powerpc/64s/exception: Move real to virt switch
into the common handler") implemented this style of real mode entry for
other interrupt handlers, so this brings system calls into line with
them, which is the main motivcation for the change.

This tends to be slightly faster due to avoiding the mtsprs, and it also
does not clobber the SRR registers, which becomes important in a
subsequent change. The real mode entry points don't tend to be too
important for performance these days, but it is possible for a
hypervisor to run guests in AIL=0 mode for certian reasons.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/entry_64.S       | 6 ++++++
 arch/powerpc/kernel/exceptions-64s.S | 9 +++------
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 33ddfeef4fe9..993ed95ed602 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -225,6 +225,12 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_emulate)
 	b	system_call_vectored_common
 #endif
 
+	.balign IFETCH_ALIGN_BYTES
+	.globl system_call_common_real
+system_call_common_real:
+	ld	r10,PACAKMSR(r13)	/* get MSR value for kernel */
+	mtmsrd	r10
+
 	.balign IFETCH_ALIGN_BYTES
 	.globl system_call_common
 system_call_common:
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 5478ffa85603..dad35b59bcfb 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1905,12 +1905,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
 	HMT_MEDIUM
 
 	.if ! \virt
-	__LOAD_HANDLER(r10, system_call_common)
-	mtspr	SPRN_SRR0,r10
-	ld	r10,PACAKMSR(r13)
-	mtspr	SPRN_SRR1,r10
-	RFI_TO_KERNEL
-	b	.	/* prevent speculative execution */
+	__LOAD_HANDLER(r10, system_call_common_real)
+	mtctr	r10
+	bctr
 	.else
 	li	r10,MSR_RI
 	mtmsrd 	r10,1			/* Set RI (EE=0) */
-- 
2.23.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox