LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH -next 3/9] ASoC: fsl_easrc: Use devm_platform_get_and_ioremap_resource()
From: Yang Yingliang @ 2021-06-11  9:36 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev, alsa-devel; +Cc: broonie, timur
In-Reply-To: <20210611093626.579176-1-yangyingliang@huawei.com>

Use devm_platform_get_and_ioremap_resource() to simplify
code.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
---
 sound/soc/fsl/fsl_easrc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sound/soc/fsl/fsl_easrc.c b/sound/soc/fsl/fsl_easrc.c
index b1765c7d3bcd..19c3c3b5939e 100644
--- a/sound/soc/fsl/fsl_easrc.c
+++ b/sound/soc/fsl/fsl_easrc.c
@@ -1887,8 +1887,7 @@ static int fsl_easrc_probe(struct platform_device *pdev)
 	easrc->private = easrc_priv;
 	np = dev->of_node;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	regs = devm_ioremap_resource(dev, res);
+	regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
 	if (IS_ERR(regs))
 		return PTR_ERR(regs);
 
-- 
2.25.1


^ permalink raw reply related

* [PATCH -next 0/9] ASoC: fsl: Use devm_platform_get_and_ioremap_resource()
From: Yang Yingliang @ 2021-06-11  9:36 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev, alsa-devel; +Cc: broonie, timur

patch #1 ~ #8:
  Use devm_platform_get_and_ioremap_resource()

patch #9
  check return value of platform_get_resource_byname()

Yang Yingliang (9):
  ASoC: fsl_asrc: Use devm_platform_get_and_ioremap_resource()
  ASoC: fsl_aud2htx: Use devm_platform_get_and_ioremap_resource()
  ASoC: fsl_easrc: Use devm_platform_get_and_ioremap_resource()
  ASoC: fsl_esai: Use devm_platform_get_and_ioremap_resource()
  ASoC: fsl_micfil: Use devm_platform_get_and_ioremap_resource()
  ASoC: fsl_sai: Use devm_platform_get_and_ioremap_resource()
  ASoC: fsl_spdif: Use devm_platform_get_and_ioremap_resource()
  ASoC: fsl_ssi: Use devm_platform_get_and_ioremap_resource()
  ASoC: fsl_xcvr: check return value after calling
    platform_get_resource_byname()

 sound/soc/fsl/fsl_asrc.c    | 3 +--
 sound/soc/fsl/fsl_aud2htx.c | 3 +--
 sound/soc/fsl/fsl_easrc.c   | 3 +--
 sound/soc/fsl/fsl_esai.c    | 3 +--
 sound/soc/fsl/fsl_micfil.c  | 3 +--
 sound/soc/fsl/fsl_sai.c     | 3 +--
 sound/soc/fsl/fsl_spdif.c   | 3 +--
 sound/soc/fsl/fsl_ssi.c     | 3 +--
 sound/soc/fsl/fsl_xcvr.c    | 4 ++++
 9 files changed, 12 insertions(+), 16 deletions(-)

-- 
2.25.1


^ permalink raw reply

* [PATCH -next 6/9] ASoC: fsl_sai: Use devm_platform_get_and_ioremap_resource()
From: Yang Yingliang @ 2021-06-11  9:36 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev, alsa-devel; +Cc: broonie, timur
In-Reply-To: <20210611093626.579176-1-yangyingliang@huawei.com>

Use devm_platform_get_and_ioremap_resource() to simplify
code.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
---
 sound/soc/fsl/fsl_sai.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c
index 407a45e48eee..223fcd15bfcc 100644
--- a/sound/soc/fsl/fsl_sai.c
+++ b/sound/soc/fsl/fsl_sai.c
@@ -1017,8 +1017,7 @@ static int fsl_sai_probe(struct platform_device *pdev)
 
 	sai->is_lsb_first = of_property_read_bool(np, "lsb-first");
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	base = devm_ioremap_resource(&pdev->dev, res);
+	base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 
-- 
2.25.1


^ permalink raw reply related

* [PATCH -next 2/9] ASoC: fsl_aud2htx: Use devm_platform_get_and_ioremap_resource()
From: Yang Yingliang @ 2021-06-11  9:36 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev, alsa-devel; +Cc: broonie, timur
In-Reply-To: <20210611093626.579176-1-yangyingliang@huawei.com>

Use devm_platform_get_and_ioremap_resource() to simplify
code.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
---
 sound/soc/fsl/fsl_aud2htx.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sound/soc/fsl/fsl_aud2htx.c b/sound/soc/fsl/fsl_aud2htx.c
index a328697511f7..99ab7f0241cf 100644
--- a/sound/soc/fsl/fsl_aud2htx.c
+++ b/sound/soc/fsl/fsl_aud2htx.c
@@ -196,8 +196,7 @@ static int fsl_aud2htx_probe(struct platform_device *pdev)
 
 	aud2htx->pdev = pdev;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	regs = devm_ioremap_resource(&pdev->dev, res);
+	regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
 	if (IS_ERR(regs))
 		return PTR_ERR(regs);
 
-- 
2.25.1


^ permalink raw reply related

* [PATCH -next 1/9] ASoC: fsl_asrc: Use devm_platform_get_and_ioremap_resource()
From: Yang Yingliang @ 2021-06-11  9:36 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev, alsa-devel; +Cc: broonie, timur
In-Reply-To: <20210611093626.579176-1-yangyingliang@huawei.com>

Use devm_platform_get_and_ioremap_resource() to simplify
code.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
---
 sound/soc/fsl/fsl_asrc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sound/soc/fsl/fsl_asrc.c b/sound/soc/fsl/fsl_asrc.c
index 0e1ad8efebd3..24b41881a68f 100644
--- a/sound/soc/fsl/fsl_asrc.c
+++ b/sound/soc/fsl/fsl_asrc.c
@@ -1035,8 +1035,7 @@ static int fsl_asrc_probe(struct platform_device *pdev)
 	asrc->private = asrc_priv;
 
 	/* Get the addresses and IRQ */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	regs = devm_ioremap_resource(&pdev->dev, res);
+	regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
 	if (IS_ERR(regs))
 		return PTR_ERR(regs);
 
-- 
2.25.1


^ permalink raw reply related

* [PATCH -next 9/9] ASoC: fsl_xcvr: check return value after calling platform_get_resource_byname()
From: Yang Yingliang @ 2021-06-11  9:36 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev, alsa-devel; +Cc: broonie, timur
In-Reply-To: <20210611093626.579176-1-yangyingliang@huawei.com>

It will cause null-ptr-deref if platform_get_resource_byname() returns NULL,
we need check the return value.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
---
 sound/soc/fsl/fsl_xcvr.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sound/soc/fsl/fsl_xcvr.c b/sound/soc/fsl/fsl_xcvr.c
index df7c189d97dd..2e9061c5ed74 100644
--- a/sound/soc/fsl/fsl_xcvr.c
+++ b/sound/soc/fsl/fsl_xcvr.c
@@ -1202,6 +1202,10 @@ static int fsl_xcvr_probe(struct platform_device *pdev)
 
 	rx_res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "rxfifo");
 	tx_res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "txfifo");
+	if (!rx_res || !tx_res) {
+		dev_err(dev, "Invalid resource\n");
+		return -EINVAL;
+	}
 	xcvr->dma_prms_rx.chan_name = "rx";
 	xcvr->dma_prms_tx.chan_name = "tx";
 	xcvr->dma_prms_rx.addr = rx_res->start;
-- 
2.25.1


^ permalink raw reply related

* [PATCH -next 8/9] ASoC: fsl_ssi: Use devm_platform_get_and_ioremap_resource()
From: Yang Yingliang @ 2021-06-11  9:36 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev, alsa-devel; +Cc: broonie, timur
In-Reply-To: <20210611093626.579176-1-yangyingliang@huawei.com>

Use devm_platform_get_and_ioremap_resource() to simplify
code.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
---
 sound/soc/fsl/fsl_ssi.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c
index 2b57b60431bb..ecbc1c365d5b 100644
--- a/sound/soc/fsl/fsl_ssi.c
+++ b/sound/soc/fsl/fsl_ssi.c
@@ -1503,8 +1503,7 @@ static int fsl_ssi_probe(struct platform_device *pdev)
 	}
 	ssi->cpu_dai_drv.name = dev_name(dev);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	iomem = devm_ioremap_resource(dev, res);
+	iomem = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
 	if (IS_ERR(iomem))
 		return PTR_ERR(iomem);
 	ssi->ssi_phys = res->start;
-- 
2.25.1


^ permalink raw reply related

* [PATCH -next 5/9] ASoC: fsl_micfil: Use devm_platform_get_and_ioremap_resource()
From: Yang Yingliang @ 2021-06-11  9:36 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev, alsa-devel; +Cc: broonie, timur
In-Reply-To: <20210611093626.579176-1-yangyingliang@huawei.com>

Use devm_platform_get_and_ioremap_resource() to simplify
code.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
---
 sound/soc/fsl/fsl_micfil.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c
index 3cf789ed6cbe..8c0c75ce9490 100644
--- a/sound/soc/fsl/fsl_micfil.c
+++ b/sound/soc/fsl/fsl_micfil.c
@@ -669,8 +669,7 @@ static int fsl_micfil_probe(struct platform_device *pdev)
 	}
 
 	/* init regmap */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	regs = devm_ioremap_resource(&pdev->dev, res);
+	regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
 	if (IS_ERR(regs))
 		return PTR_ERR(regs);
 
-- 
2.25.1


^ permalink raw reply related

* Re: [PATCH v4 2/2] powerpc/64: Option to use ELF V2 ABI for big-endian kernels
From: Michal Suchánek @ 2021-06-11  9:58 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: linuxppc-dev, linux-kernel, Jessica Yu
In-Reply-To: <20210611093959.821525-3-npiggin@gmail.com>

On Fri, Jun 11, 2021 at 07:39:59PM +1000, Nicholas Piggin wrote:
> Provide an option to build big-endian kernels using the ELFv2 ABI. This
> works on GCC only so far, although it is rumored to work with clang
> that's not been tested yet. A new module version check ensures the
> module ELF ABI level matches the kernel build.
> 
> This can give big-endian kernels some useful advantages of the ELFv2 ABI
> (e.g., less stack usage, -mprofile-kernel, better compatibility with eBPF
> tools).
> 
> BE+ELFv2 is not officially supported by the GNU toolchain, but it works
> fine in testing and has been used by some userspace for some time (e.g.,
> Void Linux).
> 
> Tested-by: Michal Suchánek <msuchanek@suse.de>
> Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  arch/powerpc/Kconfig                | 22 ++++++++++++++++++++++
>  arch/powerpc/Makefile               | 18 ++++++++++++------
>  arch/powerpc/boot/Makefile          |  4 +++-
>  arch/powerpc/include/asm/module.h   | 24 ++++++++++++++++++++++++
>  arch/powerpc/kernel/vdso64/Makefile | 13 +++++++++++++
>  drivers/crypto/vmx/Makefile         |  8 ++++++--
>  drivers/crypto/vmx/ppc-xlate.pl     | 10 ++++++----
>  7 files changed, 86 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index 088dd2afcfe4..093f973a28b9 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -163,6 +163,7 @@ config PPC
>  	select ARCH_WEAK_RELEASE_ACQUIRE
>  	select BINFMT_ELF
>  	select BUILDTIME_TABLE_SORT
> +	select PPC64_BUILD_ELF_V2_ABI		if PPC64 && CPU_LITTLE_ENDIAN
>  	select CLONE_BACKWARDS
>  	select DCACHE_WORD_ACCESS		if PPC64 && CPU_LITTLE_ENDIAN
>  	select DMA_OPS_BYPASS			if PPC64
> @@ -561,6 +562,27 @@ config KEXEC_FILE
>  config ARCH_HAS_KEXEC_PURGATORY
>  	def_bool KEXEC_FILE
>  
> +config PPC64_BUILD_ELF_V2_ABI
> +	bool
> +
> +config PPC64_BUILD_BIG_ENDIAN_ELF_V2_ABI
> +	bool "Build big-endian kernel using ELF ABI V2 (EXPERIMENTAL)"
> +	depends on PPC64 && CPU_BIG_ENDIAN && EXPERT
> +	depends on CC_IS_GCC && LD_VERSION >= 22400
> +	default n
> +	select PPC64_BUILD_ELF_V2_ABI
> +	help
> +	  This builds the kernel image using the "Power Architecture 64-Bit ELF
> +	  V2 ABI Specification", which has a reduced stack overhead and faster
> +	  function calls. This internal kernel ABI option does not affect
> +          userspace compatibility.
> +
> +	  The V2 ABI is standard for 64-bit little-endian, but for big-endian
> +	  it is less well tested by kernel and toolchain. However some distros
> +	  build userspace this way, and it can produce a functioning kernel.
> +
> +	  This requires GCC and binutils 2.24 or newer.
> +
>  config RELOCATABLE
>  	bool "Build a relocatable kernel"
>  	depends on PPC64 || (FLATMEM && (44x || FSL_BOOKE))
> diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
> index 3212d076ac6a..b90b5cb799aa 100644
> --- a/arch/powerpc/Makefile
> +++ b/arch/powerpc/Makefile
> @@ -91,10 +91,14 @@ endif
>  
>  ifdef CONFIG_PPC64
>  ifndef CONFIG_CC_IS_CLANG
> -cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mabi=elfv1)
> -cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mcall-aixdesc)
> -aflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mabi=elfv1)
> -aflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mabi=elfv2
> +ifdef CONFIG_PPC64_BUILD_ELF_V2_ABI
> +cflags-y				+= $(call cc-option,-mabi=elfv2)
> +aflags-y				+= $(call cc-option,-mabi=elfv2)
> +else
> +cflags-y				+= $(call cc-option,-mabi=elfv1)
> +cflags-y				+= $(call cc-option,-mcall-aixdesc)
> +aflags-y				+= $(call cc-option,-mabi=elfv1)
> +endif
>  endif
>  endif
>  
> @@ -142,15 +146,17 @@ endif
>  
>  CFLAGS-$(CONFIG_PPC64)	:= $(call cc-option,-mtraceback=no)
>  ifndef CONFIG_CC_IS_CLANG
> -ifdef CONFIG_CPU_LITTLE_ENDIAN
> -CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc))
> +ifdef CONFIG_PPC64_BUILD_ELF_V2_ABI
> +CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv2)
>  AFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv2)
>  else
> +# Keep these in synch with arch/powerpc/kernel/vdso64/Makefile
>  CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv1)
>  CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mcall-aixdesc)
>  AFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv1)
>  endif
>  endif
> +
>  CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc))
>  CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mno-pointers-to-nested-functions)
>  
> diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
> index 2b8da923ceca..be84a72f8258 100644
> --- a/arch/powerpc/boot/Makefile
> +++ b/arch/powerpc/boot/Makefile
> @@ -40,6 +40,9 @@ BOOTCFLAGS    := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
>  
>  ifdef CONFIG_PPC64_BOOT_WRAPPER
>  BOOTCFLAGS	+= -m64
> +ifdef CONFIG_PPC64_BUILD_ELF_V2_ABI
> +BOOTCFLAGS	+= $(call cc-option,-mabi=elfv2)
> +endif
>  else
>  BOOTCFLAGS	+= -m32
>  endif
> @@ -50,7 +53,6 @@ ifdef CONFIG_CPU_BIG_ENDIAN
>  BOOTCFLAGS	+= -mbig-endian
>  else
>  BOOTCFLAGS	+= -mlittle-endian
> -BOOTCFLAGS	+= $(call cc-option,-mabi=elfv2)
>  endif
>  
>  BOOTAFLAGS	:= -D__ASSEMBLY__ $(BOOTCFLAGS) -nostdinc
> diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
> index 857d9ff24295..043e11068ff4 100644
> --- a/arch/powerpc/include/asm/module.h
> +++ b/arch/powerpc/include/asm/module.h
> @@ -52,6 +52,30 @@ struct mod_arch_specific {
>  	unsigned int num_bugs;
>  };
>  
> +/*
> + * Check kernel module ELF header architecture specific compatibility.
> + */
> +static inline bool elf_check_module_arch(Elf_Ehdr *hdr)
> +{
> +	if (!elf_check_arch(hdr))
> +		return false;
> +
> +	if (IS_ENABLED(CONFIG_PPC64)) {
> +		unsigned long abi_level = hdr->e_flags & 0x3;
> +
> +		if (IS_ENABLED(CONFIG_PPC64_BUILD_ELF_V2_ABI)) {
> +			if (abi_level != 2)
> +				return false;
> +		} else {
> +			if (abi_level >= 2)
> +				return false;
> +		}
> +	}
> +
> +	return true;
> +}
> +#define elf_check_module_arch elf_check_module_arch
> +
>  /*
>   * Select ELF headers.
>   * Make empty section for module_frob_arch_sections to expand.
Shouldn't this part go to the second patch?

Thanks

Michal
> diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
> index 2813e3f98db6..d783c07e558f 100644
> --- a/arch/powerpc/kernel/vdso64/Makefile
> +++ b/arch/powerpc/kernel/vdso64/Makefile
> @@ -25,6 +25,19 @@ KCOV_INSTRUMENT := n
>  UBSAN_SANITIZE := n
>  KASAN_SANITIZE := n
>  
> +# Always build vdso64 with ELFv1 ABI for BE kernels
> +ifdef CONFIG_PPC64_BUILD_ELF_V2_ABI
> +ifdef CONFIG_CPU_BIG_ENDIAN
> +KBUILD_CFLAGS := $(filter-out -mabi=elfv2,$(KBUILD_CFLAGS))
> +KBUILD_AFLAGS := $(filter-out -mabi=elfv2,$(KBUILD_AFLAGS))
> +
> +# These are derived from arch/powerpc/Makefile
> +KBUILD_CFLAGS += $(call cc-option,-mabi=elfv1)
> +KBUILD_CFLAGS += $(call cc-option,-mcall-aixdesc)
> +KBUILD_AFLAGS += $(call cc-option,-mabi=elfv1)
> +endif
> +endif
> +
>  ccflags-y := -shared -fno-common -fno-builtin -nostdlib \
>  	-Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both
>  asflags-y := -D__VDSO64__ -s
> diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile
> index 709670d2b553..d9ccf9fc3483 100644
> --- a/drivers/crypto/vmx/Makefile
> +++ b/drivers/crypto/vmx/Makefile
> @@ -5,18 +5,22 @@ vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes
>  ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
>  override flavour := linux-ppc64le
>  else
> +ifdef CONFIG_PPC64_BUILD_ELF_V2_ABI
> +override flavour := linux-ppc64-elfv2
> +else
>  override flavour := linux-ppc64
>  endif
> +endif
>  
>  quiet_cmd_perl = PERL $@
>        cmd_perl = $(PERL) $(<) $(flavour) > $(@)
>  
>  targets += aesp8-ppc.S ghashp8-ppc.S
>  
> -$(obj)/aesp8-ppc.S: $(src)/aesp8-ppc.pl FORCE
> +$(obj)/aesp8-ppc.S: $(src)/aesp8-ppc.pl $(src)/ppc-xlate.pl FORCE
>  	$(call if_changed,perl)
>    
> -$(obj)/ghashp8-ppc.S: $(src)/ghashp8-ppc.pl FORCE
> +$(obj)/ghashp8-ppc.S: $(src)/ghashp8-ppc.pl $(src)/ppc-xlate.pl FORCE
>  	$(call if_changed,perl)
>  
>  clean-files := aesp8-ppc.S ghashp8-ppc.S
> diff --git a/drivers/crypto/vmx/ppc-xlate.pl b/drivers/crypto/vmx/ppc-xlate.pl
> index 36db2ef09e5b..b583898c11ae 100644
> --- a/drivers/crypto/vmx/ppc-xlate.pl
> +++ b/drivers/crypto/vmx/ppc-xlate.pl
> @@ -9,6 +9,8 @@ open STDOUT,">$output" || die "can't open $output: $!";
>  
>  my %GLOBALS;
>  my $dotinlocallabels=($flavour=~/linux/)?1:0;
> +my $elfv2abi=(($flavour =~ /linux-ppc64le/) or ($flavour =~ /linux-ppc64-elfv2/))?1:0;
> +my $dotfunctions=($elfv2abi=~1)?0:1;
>  
>  ################################################################
>  # directives which need special treatment on different platforms
> @@ -40,7 +42,7 @@ my $globl = sub {
>  };
>  my $text = sub {
>      my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text";
> -    $ret = ".abiversion	2\n".$ret	if ($flavour =~ /linux.*64le/);
> +    $ret = ".abiversion	2\n".$ret	if ($elfv2abi);
>      $ret;
>  };
>  my $machine = sub {
> @@ -56,8 +58,8 @@ my $size = sub {
>      if ($flavour =~ /linux/)
>      {	shift;
>  	my $name = shift; $name =~ s|^[\.\_]||;
> -	my $ret  = ".size	$name,.-".($flavour=~/64$/?".":"").$name;
> -	$ret .= "\n.size	.$name,.-.$name" if ($flavour=~/64$/);
> +	my $ret  = ".size	$name,.-".($dotfunctions?".":"").$name;
> +	$ret .= "\n.size	.$name,.-.$name" if ($dotfunctions);
>  	$ret;
>      }
>      else
> @@ -142,7 +144,7 @@ my $vmr = sub {
>  
>  # Some ABIs specify vrsave, special-purpose register #256, as reserved
>  # for system use.
> -my $no_vrsave = ($flavour =~ /linux-ppc64le/);
> +my $no_vrsave = ($elfv2abi);
>  my $mtspr = sub {
>      my ($f,$idx,$ra) = @_;
>      if ($idx == 256 && $no_vrsave) {
> -- 
> 2.23.0
> 

^ permalink raw reply

* Re: [PATCH v4 2/2] powerpc/64: Option to use ELF V2 ABI for big-endian kernels
From: Michal Suchánek @ 2021-06-11 10:20 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: linuxppc-dev, linux-kernel, Jessica Yu
In-Reply-To: <20210611095819.GA8544@kitsune.suse.cz>

On Fri, Jun 11, 2021 at 11:58:19AM +0200, Michal Suchánek wrote:
> On Fri, Jun 11, 2021 at 07:39:59PM +1000, Nicholas Piggin wrote:
> > Provide an option to build big-endian kernels using the ELFv2 ABI. This
> > works on GCC only so far, although it is rumored to work with clang
> > that's not been tested yet. A new module version check ensures the
> > module ELF ABI level matches the kernel build.
> > 
> > This can give big-endian kernels some useful advantages of the ELFv2 ABI
> > (e.g., less stack usage, -mprofile-kernel, better compatibility with eBPF
> > tools).
> > 
> > BE+ELFv2 is not officially supported by the GNU toolchain, but it works
> > fine in testing and has been used by some userspace for some time (e.g.,
> > Void Linux).
> > 
> > Tested-by: Michal Suchánek <msuchanek@suse.de>
> > Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>
> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> > ---
> >  arch/powerpc/Kconfig                | 22 ++++++++++++++++++++++
> >  arch/powerpc/Makefile               | 18 ++++++++++++------
> >  arch/powerpc/boot/Makefile          |  4 +++-
> >  arch/powerpc/include/asm/module.h   | 24 ++++++++++++++++++++++++
> >  arch/powerpc/kernel/vdso64/Makefile | 13 +++++++++++++
> >  drivers/crypto/vmx/Makefile         |  8 ++++++--
> >  drivers/crypto/vmx/ppc-xlate.pl     | 10 ++++++----
> >  7 files changed, 86 insertions(+), 13 deletions(-)
> > 
> > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> > index 088dd2afcfe4..093f973a28b9 100644
> > --- a/arch/powerpc/Kconfig
> > +++ b/arch/powerpc/Kconfig
> > @@ -163,6 +163,7 @@ config PPC
> >  	select ARCH_WEAK_RELEASE_ACQUIRE
> >  	select BINFMT_ELF
> >  	select BUILDTIME_TABLE_SORT
> > +	select PPC64_BUILD_ELF_V2_ABI		if PPC64 && CPU_LITTLE_ENDIAN
> >  	select CLONE_BACKWARDS
> >  	select DCACHE_WORD_ACCESS		if PPC64 && CPU_LITTLE_ENDIAN
> >  	select DMA_OPS_BYPASS			if PPC64
> > @@ -561,6 +562,27 @@ config KEXEC_FILE
> >  config ARCH_HAS_KEXEC_PURGATORY
> >  	def_bool KEXEC_FILE
> >  
> > +config PPC64_BUILD_ELF_V2_ABI
> > +	bool
> > +
> > +config PPC64_BUILD_BIG_ENDIAN_ELF_V2_ABI
> > +	bool "Build big-endian kernel using ELF ABI V2 (EXPERIMENTAL)"
> > +	depends on PPC64 && CPU_BIG_ENDIAN && EXPERT
> > +	depends on CC_IS_GCC && LD_VERSION >= 22400
> > +	default n
> > +	select PPC64_BUILD_ELF_V2_ABI
> > +	help
> > +	  This builds the kernel image using the "Power Architecture 64-Bit ELF
> > +	  V2 ABI Specification", which has a reduced stack overhead and faster
> > +	  function calls. This internal kernel ABI option does not affect
> > +          userspace compatibility.
> > +
> > +	  The V2 ABI is standard for 64-bit little-endian, but for big-endian
> > +	  it is less well tested by kernel and toolchain. However some distros
> > +	  build userspace this way, and it can produce a functioning kernel.
> > +
> > +	  This requires GCC and binutils 2.24 or newer.
> > +
> >  config RELOCATABLE
> >  	bool "Build a relocatable kernel"
> >  	depends on PPC64 || (FLATMEM && (44x || FSL_BOOKE))
> > diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
> > index 3212d076ac6a..b90b5cb799aa 100644
> > --- a/arch/powerpc/Makefile
> > +++ b/arch/powerpc/Makefile
> > @@ -91,10 +91,14 @@ endif
> >  
> >  ifdef CONFIG_PPC64
> >  ifndef CONFIG_CC_IS_CLANG
> > -cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mabi=elfv1)
> > -cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mcall-aixdesc)
> > -aflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mabi=elfv1)
> > -aflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mabi=elfv2
> > +ifdef CONFIG_PPC64_BUILD_ELF_V2_ABI
> > +cflags-y				+= $(call cc-option,-mabi=elfv2)
> > +aflags-y				+= $(call cc-option,-mabi=elfv2)
> > +else
> > +cflags-y				+= $(call cc-option,-mabi=elfv1)
> > +cflags-y				+= $(call cc-option,-mcall-aixdesc)
> > +aflags-y				+= $(call cc-option,-mabi=elfv1)
> > +endif
> >  endif
> >  endif
> >  
> > @@ -142,15 +146,17 @@ endif
> >  
> >  CFLAGS-$(CONFIG_PPC64)	:= $(call cc-option,-mtraceback=no)
> >  ifndef CONFIG_CC_IS_CLANG
> > -ifdef CONFIG_CPU_LITTLE_ENDIAN
> > -CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc))
> > +ifdef CONFIG_PPC64_BUILD_ELF_V2_ABI
> > +CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv2)
> >  AFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv2)
> >  else
> > +# Keep these in synch with arch/powerpc/kernel/vdso64/Makefile
> >  CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv1)
> >  CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mcall-aixdesc)
> >  AFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv1)
> >  endif
> >  endif
> > +
> >  CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc))
> >  CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mno-pointers-to-nested-functions)
> >  
> > diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
> > index 2b8da923ceca..be84a72f8258 100644
> > --- a/arch/powerpc/boot/Makefile
> > +++ b/arch/powerpc/boot/Makefile
> > @@ -40,6 +40,9 @@ BOOTCFLAGS    := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
> >  
> >  ifdef CONFIG_PPC64_BOOT_WRAPPER
> >  BOOTCFLAGS	+= -m64
> > +ifdef CONFIG_PPC64_BUILD_ELF_V2_ABI
> > +BOOTCFLAGS	+= $(call cc-option,-mabi=elfv2)
> > +endif
> >  else
> >  BOOTCFLAGS	+= -m32
> >  endif
> > @@ -50,7 +53,6 @@ ifdef CONFIG_CPU_BIG_ENDIAN
> >  BOOTCFLAGS	+= -mbig-endian
> >  else
> >  BOOTCFLAGS	+= -mlittle-endian
> > -BOOTCFLAGS	+= $(call cc-option,-mabi=elfv2)
> >  endif
> >  
> >  BOOTAFLAGS	:= -D__ASSEMBLY__ $(BOOTCFLAGS) -nostdinc
> > diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
> > index 857d9ff24295..043e11068ff4 100644
> > --- a/arch/powerpc/include/asm/module.h
> > +++ b/arch/powerpc/include/asm/module.h
> > @@ -52,6 +52,30 @@ struct mod_arch_specific {
> >  	unsigned int num_bugs;
> >  };
> >  
> > +/*
> > + * Check kernel module ELF header architecture specific compatibility.
> > + */
> > +static inline bool elf_check_module_arch(Elf_Ehdr *hdr)
> > +{
> > +	if (!elf_check_arch(hdr))
> > +		return false;
> > +
> > +	if (IS_ENABLED(CONFIG_PPC64)) {
> > +		unsigned long abi_level = hdr->e_flags & 0x3;
> > +
> > +		if (IS_ENABLED(CONFIG_PPC64_BUILD_ELF_V2_ABI)) {
> > +			if (abi_level != 2)
> > +				return false;
> > +		} else {
> > +			if (abi_level >= 2)
> > +				return false;
> > +		}
> > +	}
> > +
> > +	return true;
> > +}
> > +#define elf_check_module_arch elf_check_module_arch
> > +
> >  /*
> >   * Select ELF headers.
> >   * Make empty section for module_frob_arch_sections to expand.
> Shouldn't this part go to the second patch?
Nevermind, this is the second patch now.
> 
> Thanks
> 
> Michal
> > diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
> > index 2813e3f98db6..d783c07e558f 100644
> > --- a/arch/powerpc/kernel/vdso64/Makefile
> > +++ b/arch/powerpc/kernel/vdso64/Makefile
> > @@ -25,6 +25,19 @@ KCOV_INSTRUMENT := n
> >  UBSAN_SANITIZE := n
> >  KASAN_SANITIZE := n
> >  
> > +# Always build vdso64 with ELFv1 ABI for BE kernels
> > +ifdef CONFIG_PPC64_BUILD_ELF_V2_ABI
> > +ifdef CONFIG_CPU_BIG_ENDIAN
> > +KBUILD_CFLAGS := $(filter-out -mabi=elfv2,$(KBUILD_CFLAGS))
> > +KBUILD_AFLAGS := $(filter-out -mabi=elfv2,$(KBUILD_AFLAGS))
> > +
> > +# These are derived from arch/powerpc/Makefile
> > +KBUILD_CFLAGS += $(call cc-option,-mabi=elfv1)
> > +KBUILD_CFLAGS += $(call cc-option,-mcall-aixdesc)
> > +KBUILD_AFLAGS += $(call cc-option,-mabi=elfv1)
> > +endif
> > +endif
> > +
> >  ccflags-y := -shared -fno-common -fno-builtin -nostdlib \
> >  	-Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both
> >  asflags-y := -D__VDSO64__ -s
> > diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile
> > index 709670d2b553..d9ccf9fc3483 100644
> > --- a/drivers/crypto/vmx/Makefile
> > +++ b/drivers/crypto/vmx/Makefile
> > @@ -5,18 +5,22 @@ vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes
> >  ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
> >  override flavour := linux-ppc64le
> >  else
> > +ifdef CONFIG_PPC64_BUILD_ELF_V2_ABI
> > +override flavour := linux-ppc64-elfv2
> > +else
> >  override flavour := linux-ppc64
> >  endif
> > +endif
> >  
> >  quiet_cmd_perl = PERL $@
> >        cmd_perl = $(PERL) $(<) $(flavour) > $(@)
> >  
> >  targets += aesp8-ppc.S ghashp8-ppc.S
> >  
> > -$(obj)/aesp8-ppc.S: $(src)/aesp8-ppc.pl FORCE
> > +$(obj)/aesp8-ppc.S: $(src)/aesp8-ppc.pl $(src)/ppc-xlate.pl FORCE
> >  	$(call if_changed,perl)
> >    
> > -$(obj)/ghashp8-ppc.S: $(src)/ghashp8-ppc.pl FORCE
> > +$(obj)/ghashp8-ppc.S: $(src)/ghashp8-ppc.pl $(src)/ppc-xlate.pl FORCE
> >  	$(call if_changed,perl)
> >  
> >  clean-files := aesp8-ppc.S ghashp8-ppc.S
> > diff --git a/drivers/crypto/vmx/ppc-xlate.pl b/drivers/crypto/vmx/ppc-xlate.pl
> > index 36db2ef09e5b..b583898c11ae 100644
> > --- a/drivers/crypto/vmx/ppc-xlate.pl
> > +++ b/drivers/crypto/vmx/ppc-xlate.pl
> > @@ -9,6 +9,8 @@ open STDOUT,">$output" || die "can't open $output: $!";
> >  
> >  my %GLOBALS;
> >  my $dotinlocallabels=($flavour=~/linux/)?1:0;
> > +my $elfv2abi=(($flavour =~ /linux-ppc64le/) or ($flavour =~ /linux-ppc64-elfv2/))?1:0;
> > +my $dotfunctions=($elfv2abi=~1)?0:1;
> >  
> >  ################################################################
> >  # directives which need special treatment on different platforms
> > @@ -40,7 +42,7 @@ my $globl = sub {
> >  };
> >  my $text = sub {
> >      my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text";
> > -    $ret = ".abiversion	2\n".$ret	if ($flavour =~ /linux.*64le/);
> > +    $ret = ".abiversion	2\n".$ret	if ($elfv2abi);
> >      $ret;
> >  };
> >  my $machine = sub {
> > @@ -56,8 +58,8 @@ my $size = sub {
> >      if ($flavour =~ /linux/)
> >      {	shift;
> >  	my $name = shift; $name =~ s|^[\.\_]||;
> > -	my $ret  = ".size	$name,.-".($flavour=~/64$/?".":"").$name;
> > -	$ret .= "\n.size	.$name,.-.$name" if ($flavour=~/64$/);
> > +	my $ret  = ".size	$name,.-".($dotfunctions?".":"").$name;
> > +	$ret .= "\n.size	.$name,.-.$name" if ($dotfunctions);
> >  	$ret;
> >      }
> >      else
> > @@ -142,7 +144,7 @@ my $vmr = sub {
> >  
> >  # Some ABIs specify vrsave, special-purpose register #256, as reserved
> >  # for system use.
> > -my $no_vrsave = ($flavour =~ /linux-ppc64le/);
> > +my $no_vrsave = ($elfv2abi);
> >  my $mtspr = sub {
> >      my ($f,$idx,$ra) = @_;
> >      if ($idx == 256 && $no_vrsave) {
> > -- 
> > 2.23.0
> > 

^ permalink raw reply

* [PATCH] powerpc/build: vdso linker warning for orphan sections
From: Nicholas Piggin @ 2021-06-11 11:10 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

Add --orphan-handling=warn for vdsos, and adjust vdso linker scripts to
deal with orphan sections.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/vdso32/Makefile     | 2 +-
 arch/powerpc/kernel/vdso32/vdso32.lds.S | 3 ++-
 arch/powerpc/kernel/vdso64/Makefile     | 2 +-
 arch/powerpc/kernel/vdso64/vdso64.lds.S | 3 ++-
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
index 7d9a6fee0e3d..403033013848 100644
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -66,7 +66,7 @@ include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE
 
 # actual build commands
 quiet_cmd_vdso32ld_and_check = VDSO32L $@
-      cmd_vdso32ld_and_check = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) ; $(cmd_vdso_check)
+      cmd_vdso32ld_and_check = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) $(if $(CONFIG_LD_ORPHAN_WARN), -Wl$(comma)--orphan-handling=warn) ; $(cmd_vdso_check)
 quiet_cmd_vdso32as = VDSO32A $@
       cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) -c -o $@ $<
 quiet_cmd_vdso32cc = VDSO32C $@
diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S
index 58e0099f70f4..b42e8759e67a 100644
--- a/arch/powerpc/kernel/vdso32/vdso32.lds.S
+++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S
@@ -85,9 +85,10 @@ SECTIONS
 
 	/DISCARD/	: {
 		*(.note.GNU-stack)
+		*(.branch_lt)
 		*(.data .data.* .gnu.linkonce.d.* .sdata*)
 		*(.bss .sbss .dynbss .dynsbss)
-		*(.got1)
+		*(.got1 .glink .iplt .rela*)
 	}
 }
 
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
index d783c07e558f..980dfd631a08 100644
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -59,4 +59,4 @@ include/generated/vdso64-offsets.h: $(obj)/vdso64.so.dbg FORCE
 
 # actual build commands
 quiet_cmd_vdso64ld_and_check = VDSO64L $@
-      cmd_vdso64ld_and_check = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^); $(cmd_vdso_check)
+      cmd_vdso64ld_and_check = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) $(if $(CONFIG_LD_ORPHAN_WARN), -Wl$(comma)--orphan-handling=warn) ; $(cmd_vdso_check)
diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S
index 0288cad428b0..3750b3b15b51 100644
--- a/arch/powerpc/kernel/vdso64/vdso64.lds.S
+++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S
@@ -33,7 +33,7 @@ SECTIONS
 	. = ALIGN(16);
 	.text		: {
 		*(.text .stub .text.* .gnu.linkonce.t.* __ftr_alt_*)
-		*(.sfpr .glink)
+		*(.sfpr)
 	}						:text
 	PROVIDE(__etext = .);
 	PROVIDE(_etext = .);
@@ -87,6 +87,7 @@ SECTIONS
 		*(.data .data.* .gnu.linkonce.d.* .sdata*)
 		*(.bss .sbss .dynbss .dynsbss)
 		*(.opd)
+		*(.glink .iplt .plt .rela*)
 	}
 }
 
-- 
2.23.0


^ permalink raw reply related

* [PATCH] powerpc/boot: add zImage.lds to targets
From: Nicholas Piggin @ 2021-06-11 11:11 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

This prevents spurious rebuilds of the lds and then wrappers.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/boot/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index be84a72f8258..405acd2df160 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -229,7 +229,7 @@ $(obj)/wrapper.a: $(obj-wlib) FORCE
 
 hostprogs	:= addnote hack-coff mktree
 
-targets		+= $(patsubst $(obj)/%,%,$(obj-boot) wrapper.a)
+targets		+= $(patsubst $(obj)/%,%,$(obj-boot) wrapper.a) zImage.lds
 extra-y		:= $(obj)/wrapper.a $(obj-plat) $(obj)/empty.o \
 		   $(obj)/zImage.lds $(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds
 
-- 
2.23.0


^ permalink raw reply related

* Re: [PATCH] fs: btrfs: Disable BTRFS on platforms having 256K pages
From: David Sterba @ 2021-06-11 12:34 UTC (permalink / raw)
  To: Christophe Leroy
  Cc: linux-hexagon, Josef Bacik, linux-kernel, Chris Mason,
	David Sterba, linuxppc-dev, linux-btrfs
In-Reply-To: <a16c31f3caf448dda5d9315e056585b6fafc22c5.1623302442.git.christophe.leroy@csgroup.eu>

On Thu, Jun 10, 2021 at 05:23:02AM +0000, Christophe Leroy wrote:
> With a config having PAGE_SIZE set to 256K, BTRFS build fails
> with the following message
> 
>  include/linux/compiler_types.h:326:38: error: call to '__compiletime_assert_791' declared with attribute error: BUILD_BUG_ON failed: (BTRFS_MAX_COMPRESSED % PAGE_SIZE) != 0
> 
> BTRFS_MAX_COMPRESSED being 128K, BTRFS cannot support platforms with
> 256K pages at the time being.
> 
> There are two platforms that can select 256K pages:
>  - hexagon
>  - powerpc
> 
> Disable BTRFS when 256K page size is selected.
> 
> Reported-by: kernel test robot <lkp@intel.com>
> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>

With updated changelog added to misc-next, thanks.

^ permalink raw reply

* [RFC 0/2] CPU-Idle latency selftest framework
From: Pratik R. Sampat @ 2021-06-11 12:41 UTC (permalink / raw)
  To: mpe, shuah, linux-kernel, linuxppc-dev, linux-kselftest, psampat,
	pratik.r.sampat

A kernel module + userspace driver to estimate the wakeup latency
caused by going into stop states. The motivation behind this program is
to find significant deviations behind advertised latency and residency
values.

The patchset measures latencies for two kinds of events. IPIs and Timers
As this is a software-only mechanism, there will additional latencies of
the kernel-firmware-hardware interactions. To account for that, the
program also measures a baseline latency on a 100 percent loaded CPU
and the latencies achieved must be in view relative to that.

To achieve this, we introduce a kernel module and expose its control
knobs through the debugfs interface that the selftests can engage with.

The kernel module provides the following interfaces within
/sys/kernel/debug/latency_test/ for,

IPI test:
    ipi_cpu_dest = Destination CPU for the IPI
    ipi_cpu_src = Origin of the IPI
    ipi_latency_ns = Measured latency time in ns
Timeout test:
    timeout_cpu_src = CPU on which the timer to be queued
    timeout_expected_ns = Timer duration
    timeout_diff_ns = Difference of actual duration vs expected timer


Sample output on a POWER9 system is as follows:
# --IPI Latency Test---
# Baseline Average IPI latency(ns): 3114
# Observed Average IPI latency(ns) - Snooze: 3265
# Observed Average IPI latency(ns) - Stop0_lite: 3507
# Observed Average IPI latency(ns) - Stop0: 3739
# Observed Average IPI latency(ns) - Stop2: 3807
# Observed Average IPI latency(ns) - Stop4: 17070
# Observed Average IPI latency(ns) - Stop5: 1038174
# 
# --Timeout Latency Test--
# Baseline Average timeout diff(ns): 1420
# Observed Average timeout diff(ns) - Snooze: 1640
# Observed Average timeout diff(ns) - Stop0_lite: 1764
# Observed Average timeout diff(ns) - Stop0: 1715
# Observed Average timeout diff(ns) - Stop2: 1845
# Observed Average timeout diff(ns) - Stop4: 16581
# Observed Average timeout diff(ns) - Stop5: 939977

Pratik R. Sampat (2):
  powerpc/cpuidle: Extract IPI based and timer based wakeup latency from
    idle states
  powerpc/selftest: Add support for cpuidle latency measurement

 arch/powerpc/kernel/Makefile                  |   1 +
 arch/powerpc/kernel/test-cpuidle_latency.c    | 157 +++++++
 lib/Kconfig.debug                             |  10 +
 tools/testing/selftests/powerpc/Makefile      |   1 +
 .../powerpc/cpuidle_latency/.gitignore        |   2 +
 .../powerpc/cpuidle_latency/Makefile          |   6 +
 .../cpuidle_latency/cpuidle_latency.sh        | 419 ++++++++++++++++++
 .../powerpc/cpuidle_latency/settings          |   1 +
 8 files changed, 597 insertions(+)
 create mode 100644 arch/powerpc/kernel/test-cpuidle_latency.c
 create mode 100644 tools/testing/selftests/powerpc/cpuidle_latency/.gitignore
 create mode 100644 tools/testing/selftests/powerpc/cpuidle_latency/Makefile
 create mode 100755 tools/testing/selftests/powerpc/cpuidle_latency/cpuidle_latency.sh
 create mode 100644 tools/testing/selftests/powerpc/cpuidle_latency/settings

-- 
2.17.1


^ permalink raw reply

* [RFC 2/2] powerpc/selftest: Add support for cpuidle latency measurement
From: Pratik R. Sampat @ 2021-06-11 12:41 UTC (permalink / raw)
  To: mpe, shuah, linux-kernel, linuxppc-dev, linux-kselftest, psampat,
	pratik.r.sampat
In-Reply-To: <20210611124154.56427-1-psampat@linux.ibm.com>

The cpuidle latency selftest provides support to systematically extract,
analyse and present IPI and timer based wakeup latencies for each CPU
and each idle state available on the system.

The selftest leverages test-cpuidle_latency module's debugfs interface
to interact and extract latency information from the kernel.

The selftest inserts the module if already not inserted, disables all
the idle states and enables them one by one testing the following:
1. Keeping source CPU constant, iterate through all the CPUS measuring
  IPI latency for baseline (CPU is busy with cat /dev/random > /dev/null
  workload) and then when the CPU is allowed to be at rest
2. Iterating through all the CPUs, sending expected timer durations to
  be equivalent to the residency of the deepest idle state enabled
  and extracting the difference in time between the time of wakeup and
  the expected timer duration

To run this test specifically:
$ sudo make -C tools/testing/selftests \
  TARGETS="powerpc/cpuidle_latency" run_tests

There are a few optional arguments too that the script can take
        [-h <help>]
        [-m <location of the module>]
        [-o <location of the output>]
        [-v <verbose> (run on all cpus)]
Default Output location in:
tools/testing/selftests/powerpc/cpuidle_latency/cpuidle_latency.log

To run the test without re-compiling:
$ cd tools/testing/selftest/powerpc/cpuidle_latency/
$ sudo ./cpuidle_latency.sh

Signed-off-by: Pratik R. Sampat <psampat@linux.ibm.com>
---
 tools/testing/selftests/powerpc/Makefile      |   1 +
 .../powerpc/cpuidle_latency/.gitignore        |   2 +
 .../powerpc/cpuidle_latency/Makefile          |   6 +
 .../cpuidle_latency/cpuidle_latency.sh        | 419 ++++++++++++++++++
 .../powerpc/cpuidle_latency/settings          |   1 +
 5 files changed, 429 insertions(+)
 create mode 100644 tools/testing/selftests/powerpc/cpuidle_latency/.gitignore
 create mode 100644 tools/testing/selftests/powerpc/cpuidle_latency/Makefile
 create mode 100755 tools/testing/selftests/powerpc/cpuidle_latency/cpuidle_latency.sh
 create mode 100644 tools/testing/selftests/powerpc/cpuidle_latency/settings

diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index 0830e63818c1..71ce6fff867d 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -17,6 +17,7 @@ SUB_DIRS = alignment		\
 	   benchmarks		\
 	   cache_shape		\
 	   copyloops		\
+	   cpuidle_latency	\
 	   dscr			\
 	   mm			\
 	   nx-gzip		\
diff --git a/tools/testing/selftests/powerpc/cpuidle_latency/.gitignore b/tools/testing/selftests/powerpc/cpuidle_latency/.gitignore
new file mode 100644
index 000000000000..987f8852dc59
--- /dev/null
+++ b/tools/testing/selftests/powerpc/cpuidle_latency/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+cpuidle_latency.log
diff --git a/tools/testing/selftests/powerpc/cpuidle_latency/Makefile b/tools/testing/selftests/powerpc/cpuidle_latency/Makefile
new file mode 100644
index 000000000000..04492b6d2582
--- /dev/null
+++ b/tools/testing/selftests/powerpc/cpuidle_latency/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+TEST_PROGS := cpuidle_latency.sh
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/powerpc/cpuidle_latency/cpuidle_latency.sh b/tools/testing/selftests/powerpc/cpuidle_latency/cpuidle_latency.sh
new file mode 100755
index 000000000000..6b55167de488
--- /dev/null
+++ b/tools/testing/selftests/powerpc/cpuidle_latency/cpuidle_latency.sh
@@ -0,0 +1,419 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# CPU-Idle latency selftest provides support to systematically extract,
+# analyse and present IPI and timer based wakeup latencies for each CPU
+# and each idle state available on the system by leveraging the
+# test-cpuidle_latency module
+#
+# Author: Pratik R. Sampat <psampat@linux.ibm.com>
+
+LOG=cpuidle_latency.log
+MODULE=/lib/modules/$(uname -r)/kernel/arch/powerpc/kernel/test-cpuidle_latency.ko
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+VERBOSE=0
+
+DISABLE=1
+ENABLE=0
+
+helpme()
+{
+	printf "Usage: $0 [-h] [-todg args]
+	[-h <help>]
+	[-m <location of the module>]
+	[-o <location of the output>]
+	[-v <verbose>]
+	\n"
+	exit 2
+}
+
+parse_arguments()
+{
+	while getopts ht:m:o:vt:it: arg
+	do
+		case $arg in
+			h) # --help
+				helpme
+				;;
+			m) # --mod-file
+				MODULE=$OPTARG
+				;;
+			o) # output log files
+				LOG=$OPTARG
+				;;
+			v) # Verbose mode - all threads of the CPU
+				VERBOSE=1
+				;;
+			\?)
+				helpme
+				;;
+		esac
+	done
+}
+
+ins_mod()
+{
+	debugfs_file=/sys/kernel/debug/latency_test/ipi_latency_ns
+	# Check if the module is already loaded
+	if [ -f "$debugfs_file" ]; then
+		printf "Module already loaded\n\n"
+		return 0
+	fi
+	# Try to load the module
+	if [ ! -f "$MODULE" ]; then
+		printf "$MODULE module does not exist. Exitting\n"
+		exit $ksft_skip
+	fi
+	printf "Inserting $MODULE module\n\n"
+	insmod $MODULE
+	if [ $? != 0 ]; then
+		printf "Insmod $MODULE failed\n"
+		exit $ksft_skip
+	fi
+}
+
+compute_average()
+{
+	arr=("$@")
+	sum=0
+	size=${#arr[@]}
+	if [ $size == 0 ]; then
+		avg=0
+		return 1
+	fi
+	for i in "${arr[@]}"
+	do
+		sum=$((sum + i))
+	done
+	avg=$((sum/size))
+}
+
+cpu_is_online()
+{
+	cpu=$1
+	if [ ! -f "/sys/devices/system/cpu/cpu$cpu/online" ]; then
+		echo 1
+		return
+	fi
+	status=$(cat /sys/devices/system/cpu/cpu$cpu/online)
+	echo $status
+}
+
+# Perform operation on each CPU for the given state
+# $1 - Operation: enable (0) / disable (1)
+# $2 - State to enable
+op_state()
+{
+	for ((cpu=0; cpu<NUM_CPUS; cpu++))
+	do
+		local cpu_status=$(cpu_is_online $cpu)
+		if [ $cpu_status == 0 ]; then
+			continue
+		fi
+		echo $1 > /sys/devices/system/cpu/cpu$cpu/cpuidle/state$2/disable
+	done
+}
+
+cpuidle_enable_state()
+{
+	state=$1
+	op_state $ENABLE $state
+}
+
+cpuidle_disable_state()
+{
+	state=$1
+	op_state $DISABLE $state
+}
+
+# Enable/Disable all stop states for all CPUs
+# $1 - Operation: enable (0) / disable (1)
+op_cpuidle()
+{
+	for ((state=0; state<NUM_STATES; state++))
+	do
+		op_state $1 $state
+	done
+}
+
+extract_state_information()
+{
+	for ((state=0; state<NUM_STATES; state++))
+	do
+		state_name=$(cat /sys/devices/system/cpu/cpu0/cpuidle/state$state/name)
+		state_name_arr+=($state_name)
+	done
+}
+
+# Extract latency in microseconds and convert to nanoseconds
+extract_latency()
+{
+	for ((state=0; state<NUM_STATES; state++))
+	do
+		latency=$(($(cat /sys/devices/system/cpu/cpu0/cpuidle/state$state/latency) * 1000))
+		latency_arr+=($latency)
+	done
+}
+
+# Simple linear search in an array
+# $1 - Element to search for
+# $2 - Array
+element_in()
+{
+	local item="$1"
+	shift
+	for element in "$@";
+	do
+		if [ "$element" == "$item" ]; then
+			return 0
+		fi
+	done
+	return 1
+}
+
+# Parse and return a cpuset with ","(individual) and "-" (range) of CPUs
+# $1 - cpuset string
+parse_cpuset()
+{
+	echo $1 | awk '/-/{for (i=$1; i<=$2; i++)printf "%s%s",i,ORS;next} 1' RS=, FS=-
+}
+
+extract_core_information()
+{
+	declare -a thread_arr
+	for ((cpu=0; cpu<NUM_CPUS; cpu++))
+	do
+		local cpu_status=$(cpu_is_online $cpu)
+		if [ $cpu_status == 0 ]; then
+			continue
+		fi
+
+		siblings=$(cat /sys/devices/system/cpu/cpu$cpu/topology/thread_siblings_list)
+		sib_arr=()
+		for c in $(parse_cpuset $siblings)
+		do
+			sib_arr+=($c)
+		done
+
+		if [ $VERBOSE == 1 ]; then
+			core_arr+=($cpu)
+			for thread in "${sib_arr[@]}"
+			do
+				if [ $cpu == 0 ]; then
+					first_core_arr+=($thread)
+				fi
+			done
+			continue
+		fi
+
+		element_in "${sib_arr[0]}" "${thread_arr[@]}"
+		if [ $? == 0 ]; then
+			continue
+		fi
+		core_arr+=(${sib_arr[0]})
+
+		for thread in "${sib_arr[@]}"
+		do
+			thread_arr+=($thread)
+			if [ $cpu == 0 ]; then
+				first_core_arr+=($thread)
+			fi
+		done
+	done
+}
+
+# Run the IPI test
+# $1 run for baseline - busy cpu or regular environment
+# $2 destination cpu
+ipi_test_once()
+{
+	dest_cpu=$2
+	if [ "$1" = "baseline" ]; then
+		# Keep the CPU busy
+		taskset -c $dest_cpu cat /dev/random > /dev/null &
+		task_pid=$!
+		# Wait for the workload to achieve 100% CPU usage
+		sleep 1
+	fi
+	taskset 0x1 echo $dest_cpu > /sys/kernel/debug/latency_test/ipi_cpu_dest
+	ipi_latency=$(cat /sys/kernel/debug/latency_test/ipi_latency_ns)
+	src_cpu=$(cat /sys/kernel/debug/latency_test/ipi_cpu_src)
+	if [ "$1" = "baseline" ]; then
+		kill $task_pid
+		wait $task_pid 2>/dev/null
+	fi
+}
+
+# Incrementally Enable idle states one by one and compute the latency
+run_ipi_tests()
+{
+	extract_latency
+	# Disable idle states for CPUs
+	op_cpuidle $DISABLE
+
+	declare -a avg_arr
+	echo -e "--IPI Latency Test---" | tee -a $LOG
+
+	echo -e "--Baseline IPI Latency measurement: CPU Busy--" >> $LOG
+	printf "%s %10s %12s\n" "SRC_CPU" "DEST_CPU" "IPI_Latency(ns)" >> $LOG
+	for cpu in "${core_arr[@]}"
+	do
+		local cpu_status=$(cpu_is_online $cpu)
+		if [ $cpu_status == 0 ]; then
+			continue
+		fi
+		ipi_test_once "baseline" $cpu
+		printf "%-3s %10s %12s\n" $src_cpu $cpu $ipi_latency >> $LOG
+		# Skip computing latency average from the source CPU to avoid bias
+		element_in "$cpu" "${first_core_arr[@]}"
+		if [ $? == 0 ]; then
+			continue
+		fi
+		avg_arr+=($ipi_latency)
+	done
+	compute_average "${avg_arr[@]}"
+	echo -e "Baseline Avg IPI latency(ns): $avg" | tee -a $LOG
+
+	for ((state=0; state<NUM_STATES; state++))
+	do
+		unset avg_arr
+		echo -e "---Enabling state: ${state_name_arr[$state]}---" >> $LOG
+		cpuidle_enable_state $state
+		printf "%s %10s %12s\n" "SRC_CPU" "DEST_CPU" "IPI_Latency(ns)" >> $LOG
+		for cpu in "${core_arr[@]}"
+		do
+			local cpu_status=$(cpu_is_online $cpu)
+			if [ $cpu_status == 0 ]; then
+				continue
+			fi
+			# Running IPI test and logging results
+			sleep 1
+			ipi_test_once "test" $cpu
+			printf "%-3s %10s %12s\n" $src_cpu $cpu $ipi_latency >> $LOG
+			# Skip computing latency average from the source CPU to avoid bias
+			element_in "$cpu" "${first_core_arr[@]}"
+			if [ $? == 0 ]; then
+				continue
+			fi
+			avg_arr+=($ipi_latency)
+		done
+		compute_average "${avg_arr[@]}"
+		echo -e "Expected IPI latency(ns): ${latency_arr[$state]}" >> $LOG
+		echo -e "Observed Avg IPI latency(ns) - State ${state_name_arr[$state]}: $avg" | tee -a $LOG
+		cpuidle_disable_state $state
+	done
+}
+
+# Extract the residency in microseconds and convert to nanoseconds.
+# Add 200 ns so that the timer stays for a little longer than the residency
+extract_residency()
+{
+	for ((state=0; state<NUM_STATES; state++))
+	do
+		residency=$(($(cat /sys/devices/system/cpu/cpu0/cpuidle/state$state/residency) * 1000 + 200))
+		residency_arr+=($residency)
+	done
+}
+
+# Run the Timeout test
+# $1 run for baseline - busy cpu or regular environment
+# $2 destination cpu
+# $3 timeout
+timeout_test_once()
+{
+	dest_cpu=$2
+	if [ "$1" = "baseline" ]; then
+		# Keep the CPU busy
+		taskset -c $dest_cpu cat /dev/random > /dev/null &
+		task_pid=$!
+		# Wait for the workload to achieve 100% CPU usage
+		sleep 1
+	fi
+	taskset -c $dest_cpu echo $3 > /sys/kernel/debug/latency_test/timeout_expected_ns
+	# Wait for the result to populate
+	sleep 0.1
+	timeout_diff=$(cat /sys/kernel/debug/latency_test/timeout_diff_ns)
+	src_cpu=$(cat /sys/kernel/debug/latency_test/timeout_cpu_src)
+	if [ "$1" = "baseline" ]; then
+		kill $task_pid
+		wait $task_pid 2>/dev/null
+	fi
+}
+
+run_timeout_tests()
+{
+	extract_residency
+	# Disable idle states for all CPUs
+	op_cpuidle $DISABLE
+
+	declare -a avg_arr
+	echo -e "\n--Timeout Latency Test--" | tee -a $LOG
+
+	echo -e "--Baseline Timeout Latency measurement: CPU Busy--" >> $LOG
+	printf "%s %10s %10s\n" "Wakeup_src" "Baseline_delay(ns)">> $LOG
+	for cpu in "${core_arr[@]}"
+	do
+		local cpu_status=$(cpu_is_online $cpu)
+		if [ $cpu_status == 0 ]; then
+			continue
+		fi
+		timeout_test_once "baseline" $cpu 1000000
+		printf "%-3s %13s\n" $src_cpu $timeout_diff >> $LOG
+		avg_arr+=($timeout_diff)
+	done
+	compute_average "${avg_arr[@]}"
+	echo -e "Baseline Avg timeout diff(ns): $avg" | tee -a $LOG
+
+	for ((state=0; state<NUM_STATES; state++))
+	do
+		unset avg_arr
+		echo -e "---Enabling state: ${state_name_arr[$state]}---" >> $LOG
+		cpuidle_enable_state $state
+		printf "%s %10s %10s\n" "Wakeup_src" "Baseline_delay(ns)" "Delay(ns)" >> $LOG
+		for cpu in "${core_arr[@]}"
+		do
+			local cpu_status=$(cpu_is_online $cpu)
+			if [ $cpu_status == 0 ]; then
+				continue
+			fi
+			timeout_test_once "test" $cpu 1000000
+			printf "%-3s %13s %18s\n" $src_cpu $baseline_timeout_diff $timeout_diff >> $LOG
+			avg_arr+=($timeout_diff)
+		done
+		compute_average "${avg_arr[@]}"
+		echo -e "Expected timeout(ns): ${residency_arr[$state]}" >> $LOG
+		echo -e "Observed Avg timeout diff(ns) - State ${state_name_arr[$state]}: $avg" | tee -a $LOG
+		cpuidle_disable_state $state
+	done
+}
+
+declare -a residency_arr
+declare -a latency_arr
+declare -a core_arr
+declare -a first_core_arr
+declare -a state_name_arr
+
+parse_arguments $@
+
+rm -f $LOG
+touch $LOG
+NUM_CPUS=$(nproc --all)
+NUM_STATES=$(ls -1 /sys/devices/system/cpu/cpu0/cpuidle/ | wc -l)
+
+extract_core_information
+extract_state_information
+
+ins_mod $MODULE
+
+run_ipi_tests
+run_timeout_tests
+
+# Enable all idle states for all CPUs
+op_cpuidle $ENABLE
+printf "Removing $MODULE module\n"
+printf "Full Output logged at: $LOG\n"
+rmmod $MODULE
+
diff --git a/tools/testing/selftests/powerpc/cpuidle_latency/settings b/tools/testing/selftests/powerpc/cpuidle_latency/settings
new file mode 100644
index 000000000000..e7b9417537fb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/cpuidle_latency/settings
@@ -0,0 +1 @@
+timeout=0
-- 
2.17.1


^ permalink raw reply related

* [RFC 1/2] powerpc/cpuidle: Extract IPI based and timer based wakeup latency from idle states
From: Pratik R. Sampat @ 2021-06-11 12:41 UTC (permalink / raw)
  To: mpe, shuah, linux-kernel, linuxppc-dev, linux-kselftest, psampat,
	pratik.r.sampat
In-Reply-To: <20210611124154.56427-1-psampat@linux.ibm.com>

Introduce a mechanism to fire directed IPIs from a specified source CPU
to a specified target CPU and measure the difference in time incurred on
wakeup.

Also, introduce a mechanism to queue a HR timer on a specified CPU and
subsequently measure the time taken to wakeup the CPU.

Finally define a simple debugfs interface to control the knobs to fire
the IPI and Timer events on specified CPU and view their incurred idle
wakeup latencies.

Signed-off-by: Pratik R. Sampat <psampat@linux.ibm.com>
---
 arch/powerpc/kernel/Makefile               |   1 +
 arch/powerpc/kernel/test-cpuidle_latency.c | 157 +++++++++++++++++++++
 lib/Kconfig.debug                          |  10 ++
 3 files changed, 168 insertions(+)
 create mode 100644 arch/powerpc/kernel/test-cpuidle_latency.c

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index f66b63e81c3b..56e36e797dd4 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_PPC_WATCHDOG)	+= watchdog.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
 obj-$(CONFIG_PPC_DAWR)		+= dawr.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_ppc970.o cpu_setup_pa6t.o
+obj-$(CONFIG_IDLE_LATENCY_SELFTEST)	  += test-cpuidle_latency.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_power.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= mce.o mce_power.o
 obj-$(CONFIG_PPC_BOOK3E_64)	+= exceptions-64e.o idle_book3e.o
diff --git a/arch/powerpc/kernel/test-cpuidle_latency.c b/arch/powerpc/kernel/test-cpuidle_latency.c
new file mode 100644
index 000000000000..f138011ac225
--- /dev/null
+++ b/arch/powerpc/kernel/test-cpuidle_latency.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Module-based API test facility for cpuidle latency using IPIs and timers
+ */
+
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+/*
+ * IPI based wakeup latencies
+ * Measure time taken for a CPU to wakeup on a IPI sent from another CPU
+ * The latency measured also includes the latency of sending the IPI
+ */
+struct latency {
+	unsigned int src_cpu;
+	unsigned int dest_cpu;
+	ktime_t time_start;
+	ktime_t time_end;
+	u64 latency_ns;
+} ipi_wakeup;
+
+static void measure_latency(void *info)
+{
+	struct latency *v;
+	ktime_t time_diff;
+
+	v = (struct latency *)info;
+	v->time_end = ktime_get();
+	time_diff = ktime_sub(v->time_end, v->time_start);
+	v->latency_ns = ktime_to_ns(time_diff);
+}
+
+void run_smp_call_function_test(unsigned int cpu)
+{
+	ipi_wakeup.src_cpu = smp_processor_id();
+	ipi_wakeup.dest_cpu = cpu;
+	ipi_wakeup.time_start = ktime_get();
+	smp_call_function_single(cpu, measure_latency, &ipi_wakeup, 1);
+}
+
+/*
+ * Timer based wakeup latencies
+ * Measure time taken for a CPU to wakeup on a timer being armed and fired
+ */
+struct timer_data {
+	unsigned int src_cpu;
+	u64 timeout;
+	ktime_t time_start;
+	ktime_t time_end;
+	struct hrtimer timer;
+	u64 timeout_diff_ns;
+} timer_wakeup;
+
+static enum hrtimer_restart timer_called(struct hrtimer *hrtimer)
+{
+	struct timer_data *w;
+	ktime_t time_diff;
+
+	w = container_of(hrtimer, struct timer_data, timer);
+	w->time_end = ktime_get();
+
+	time_diff = ktime_sub(w->time_end, w->time_start);
+	time_diff = ktime_sub(time_diff, ns_to_ktime(w->timeout));
+	w->timeout_diff_ns = ktime_to_ns(time_diff);
+	return HRTIMER_NORESTART;
+}
+
+static void run_timer_test(unsigned int ns)
+{
+	hrtimer_init(&timer_wakeup.timer, CLOCK_MONOTONIC,
+		     HRTIMER_MODE_REL);
+	timer_wakeup.timer.function = timer_called;
+	timer_wakeup.src_cpu = smp_processor_id();
+	timer_wakeup.timeout = ns;
+	timer_wakeup.time_start = ktime_get();
+
+	hrtimer_start(&timer_wakeup.timer, ns_to_ktime(ns),
+		      HRTIMER_MODE_REL_PINNED);
+}
+
+static struct dentry *dir;
+
+static int cpu_read_op(void *data, u64 *dest_cpu)
+{
+	*dest_cpu = ipi_wakeup.dest_cpu;
+	return 0;
+}
+
+static int cpu_write_op(void *data, u64 value)
+{
+	run_smp_call_function_test(value);
+	return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(ipi_ops, cpu_read_op, cpu_write_op, "%llu\n");
+
+static int timeout_read_op(void *data, u64 *timeout)
+{
+	*timeout = timer_wakeup.timeout;
+	return 0;
+}
+
+static int timeout_write_op(void *data, u64 value)
+{
+	run_timer_test(value);
+	return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(timeout_ops, timeout_read_op, timeout_write_op, "%llu\n");
+
+static int __init latency_init(void)
+{
+	struct dentry *temp;
+
+	dir = debugfs_create_dir("latency_test", 0);
+	if (!dir) {
+		pr_alert("latency_test: failed to create /sys/kernel/debug/latency_test\n");
+		return -1;
+	}
+	temp = debugfs_create_file("ipi_cpu_dest",
+				   0666,
+				   dir,
+				   NULL,
+				   &ipi_ops);
+	if (!temp) {
+		pr_alert("latency_test: failed to create /sys/kernel/debug/ipi_cpu_dest\n");
+		return -1;
+	}
+	debugfs_create_u64("ipi_latency_ns", 0444, dir, &ipi_wakeup.latency_ns);
+	debugfs_create_u32("ipi_cpu_src", 0444, dir, &ipi_wakeup.src_cpu);
+
+	temp = debugfs_create_file("timeout_expected_ns",
+				   0666,
+				   dir,
+				   NULL,
+				   &timeout_ops);
+	if (!temp) {
+		pr_alert("latency_test: failed to create /sys/kernel/debug/timeout_expected_ns\n");
+		return -1;
+	}
+	debugfs_create_u64("timeout_diff_ns", 0444, dir, &timer_wakeup.timeout_diff_ns);
+	debugfs_create_u32("timeout_cpu_src", 0444, dir, &timer_wakeup.src_cpu);
+	pr_info("Latency Test module loaded\n");
+	return 0;
+}
+
+static void __exit latency_cleanup(void)
+{
+	pr_info("Cleaning up Latency Test module.\n");
+	debugfs_remove_recursive(dir);
+}
+
+module_init(latency_init);
+module_exit(latency_cleanup);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("IBM Corporation");
+MODULE_DESCRIPTION("Measuring idle latency for IPIs and Timers");
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 417c3d3e521b..70264687e0c2 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1513,6 +1513,16 @@ config DEBUG_KOBJECT
 	  If you say Y here, some extra kobject debugging messages will be sent
 	  to the syslog.
 
+config IDLE_LATENCY_SELFTEST
+	tristate "Cpuidle latency selftests"
+	depends on CPU_IDLE
+	help
+	  This option provides a kernel module that runs tests using the IPI and
+	  timers to measure latency.
+
+	  Say M if you want these self tests to build as a module.
+	  Say N if you are unsure.
+
 config DEBUG_KOBJECT_RELEASE
 	bool "kobject release debugging"
 	depends on DEBUG_OBJECTS_TIMERS
-- 
2.17.1


^ permalink raw reply related

* Re: [PATCH] btrfs: Disable BTRFS on platforms having 256K pages
From: Chris Mason @ 2021-06-11 12:58 UTC (permalink / raw)
  To: dsterba@suse.cz
  Cc: linux-hexagon@vger.kernel.org, Josef Bacik,
	linux-kernel@vger.kernel.org, David Sterba,
	linuxppc-dev@lists.ozlabs.org, linux-btrfs
In-Reply-To: <20210610162046.GB28158@suse.cz>


> On Jun 10, 2021, at 12:20 PM, David Sterba <dsterba@suse.cz> wrote:
> 
> On Thu, Jun 10, 2021 at 04:50:09PM +0200, Christophe Leroy wrote:
>> 
>> 
>> Le 10/06/2021 à 15:54, Chris Mason a écrit :
>>> 
>>>> On Jun 10, 2021, at 1:23 AM, Christophe Leroy <christophe.leroy@csgroup.eu> wrote:
>>>> 
>>>> With a config having PAGE_SIZE set to 256K, BTRFS build fails
>>>> with the following message
>>>> 
>>>> include/linux/compiler_types.h:326:38: error: call to '__compiletime_assert_791' declared with attribute error: BUILD_BUG_ON failed: (BTRFS_MAX_COMPRESSED % PAGE_SIZE) != 0
>>>> 
>>>> BTRFS_MAX_COMPRESSED being 128K, BTRFS cannot support platforms with
>>>> 256K pages at the time being.
>>>> 
>>>> There are two platforms that can select 256K pages:
>>>> - hexagon
>>>> - powerpc
>>>> 
>>>> Disable BTRFS when 256K page size is selected.
>>>> 
>>> 
>>> We’ll have other subpage blocksize concerns with 256K pages, but this BTRFS_MAX_COMPRESSED #define is arbitrary.  It’s just trying to have an upper bound on the amount of memory we’ll need to uncompress a single page’s worth of random reads.
>>> 
>>> We could change it to max(PAGE_SIZE, 128K) or just bump to 256K.
>>> 
>> 
>> But if 256K is problematic in other ways, is it worth bumping BTRFS_MAX_COMPRESSED to 256K ?
>> 
>> David, in below mail, said that 256K support would require deaper changes. So disabling BTRFS 
>> support seems the easiest solution for the time being, at least for Stable (I forgot the Fixes: tag 
>> and the CC: to stable).
>> 
>> On powerpc, 256k pages is a corner case, it requires customised binutils, so I don't think disabling 
>> BTRFS is a issue there. For hexagon I don't know.
> 
> That it blew up due to the max compressed size is a coincidence. We
> could have explicit BUILD_BUG_ONs for page size or other constraints
> derived from the page size like INLINE_EXTENT_BUFFER_PAGES.
> 

Right, the constraint is bigger and more complex than BTRFS_MAX_COMPRESSED.

> And there's no such thing like "just bump BTRFS_MAX_COMPRESSED to 256K".
> The constant is part of on-disk format for lzo and otherwise changing it
> would impact performance so this would need proper evaluation.

Sorry, how is it baked into LZO?  It definitely will have performance implications, I agree there.

-chris


^ permalink raw reply

* Re: [PATCH] btrfs: Disable BTRFS on platforms having 256K pages
From: David Sterba @ 2021-06-11 13:21 UTC (permalink / raw)
  To: Chris Mason
  Cc: linux-hexagon@vger.kernel.org, linux-kernel@vger.kernel.org,
	dsterba@suse.cz, David Sterba, Josef Bacik,
	linuxppc-dev@lists.ozlabs.org, linux-btrfs
In-Reply-To: <6769ED4C-15A8-4CFF-BF2B-26A5328257A0@fb.com>

On Fri, Jun 11, 2021 at 12:58:58PM +0000, Chris Mason wrote:
> > On Jun 10, 2021, at 12:20 PM, David Sterba <dsterba@suse.cz> wrote:
> > On Thu, Jun 10, 2021 at 04:50:09PM +0200, Christophe Leroy wrote:
> >> Le 10/06/2021 à 15:54, Chris Mason a écrit :
> >>>> On Jun 10, 2021, at 1:23 AM, Christophe Leroy <christophe.leroy@csgroup.eu> wrote:
> > And there's no such thing like "just bump BTRFS_MAX_COMPRESSED to 256K".
> > The constant is part of on-disk format for lzo and otherwise changing it
> > would impact performance so this would need proper evaluation.
> 
> Sorry, how is it baked into LZO?  It definitely will have performance implications, I agree there.

lzo_decompress_bio:

309         /*
310          * Compressed data header check.
311          *
312          * The real compressed size can't exceed the maximum extent length, and
313          * all pages should be used (whole unused page with just the segment
314          * header is not possible).  If this happens it means the compressed
315          * extent is corrupted.
316          */
317         if (tot_len > min_t(size_t, BTRFS_MAX_COMPRESSED, srclen) ||
318             tot_len < srclen - PAGE_SIZE) {
319                 ret = -EUCLEAN;
320                 goto done;
321         }

^ permalink raw reply

* Re: [PATCH 0/1] PPC32: fix ptrace() access to FPU registers
From: Radu Rendec @ 2021-06-11 14:37 UTC (permalink / raw)
  To: Christophe Leroy, Daniel Axtens, Andreas Schwab
  Cc: linuxppc-dev, Paul Mackerras, Oleg Nesterov
In-Reply-To: <39d6eb22-2142-d869-7649-0242cecf5532@csgroup.eu>

On Fri, 2021-06-11 at 08:02 +0200, Christophe Leroy wrote:
>Le 19/06/2019 à 14:57, Radu Rendec a écrit :
>> On Wed, 2019-06-19 at 10:36 +1000, Daniel Axtens wrote:
>>> Andreas Schwab <
>>> schwab@linux-m68k.org
>>>> writes:
>>>
>>>> On Jun 18 2019, Radu Rendec <
>>>> radu.rendec@gmail.com
>>>>> wrote:
>>>>
>>>>> Since you already have a working setup, it would be nice if you could
>>>>> add a printk to arch_ptrace() to print the address and confirm what I
>>>>> believe happens (by reading the gdb source code).
>>>>
>>>> A ppc32 ptrace syscall goes through compat_arch_ptrace.
>>
>> Right. I completely overlooked that part.
>>
>>> Ah right, and that (in ptrace32.c) contains code that will work:
>>>
>>>
>>> 			/*
>>> 			 * the user space code considers the floating point
>>> 			 * to be an array of unsigned int (32 bits) - the
>>> 			 * index passed in is based on this assumption.
>>> 			 */
>>> 			tmp = ((unsigned int *)child->thread.fp_state.fpr)
>>> 				[FPRINDEX(index)];
>>>
>>> FPRINDEX is defined above to deal with the various manipulations you
>>> need to do.
>>
>> Correct. Basically it does the same that I did in my patch: it divides
>> the index again by 2 (it's already divided by 4 in compat_arch_ptrace()
>> so it ends up divided by 8), then takes the least significant bit and
>> adds it to the index. I take bit 2 of the original address, which is the
>> same thing (because in FPRHALF() the address is already divided by 4).
>>
>> So we have this in ptrace32.c:
>>
>> #define FPRNUMBER(i) (((i) - PT_FPR0) >> 1)
>> #define FPRHALF(i) (((i) - PT_FPR0) & 1)
>> #define FPRINDEX(i) TS_FPRWIDTH * FPRNUMBER(i) * 2 + FPRHALF(i)
>>
>> index = (unsigned long) addr >> 2;
>> (unsigned int *)child->thread.fp_state.fpr)[FPRINDEX(index)]
>>
>>
>> And we have this in my patch:
>>
>> fpidx = (addr - PT_FPR0 * sizeof(long)) / 8;
>> (void *)&child->thread.TS_FPR(fpidx) + (addr & 4)
>>
>>> Radu: I think we want to copy that working code back into ptrace.c.
>>
>> I'm not sure that would work. There's a subtle difference: the code in
>> ptrace32.c is always compiled on a 64-bit kernel and the user space
>> calling it is always 32-bit; on the other hand, the code in ptrace.c can
>> be compiled on either a 64-bit kernel or a 32-bit kernel and the user
>> space calling it always has the same "bitness" as the kernel.
>>
>> One difference is the size of the CPU registers. On 64-bit they are 8
>> byte long and user space knows that and generates 8-byte aligned
>> addresses. So you have to divide the address by 8 to calculate the CPU
>> register index correctly, which compat_arch_ptrace() currently doesn't.
>>
>> Another difference is that on 64-bit `long` is 8 bytes, so user space
>> can read a whole FPU register in a single ptrace call.
>>
>> Now that we are all aware of compat_arch_ptrace() (which handles the
>> special case of a 32-bit process running on a 64-bit kernel) I would say
>> the patch is correct and does the right thing for both 32-bit and 64-bit
>> kernels and processes.
>>
>>> The challenge will be unpicking the awful mess of ifdefs in ptrace.c
>>> and making it somewhat more comprehensible.
>>
>> I'm not sure what ifdefs you're thinking about. The only that are used
>> inside arch_ptrace() are PT_FPR0, PT_FPSCR and TS_FPR, which seem to be
>> correct.
>>
>> But perhaps it would be useful to change my patch and add a comment just
>> before arch_ptrace() that explains how the math is done and that the
>> code must work on both 32-bit and 64-bit, the user space address
>> assumptions, etc.
>>
>> By the way, I'm not sure the code in compat_arch_ptrace() handles
>> PT_FPSCR correctly. It might (just because fpscr is right next to fpr[]
>> in memory - and that's a hack), but I can't figure out if it accesses
>> the right half.
>>
>
>Does the issue still exists ? If yes, the patch has to be rebased.

Hard to say. I'm still using 4.9 (stable) on the systems that I created
the patch for. I tried to rebase, and the patch no longer applies. It
looks like there have been some changes around that area, notably your
commit e009fa433542, so it could actually be fixed now.

It's been exactly two years since I sent the patch and I don't remember
all the details. I will have to go back and look. Also, running a recent
kernel on my PPC32 systems is not an option because there are a bunch of
custom patches that would have to be ported. I will try in a VM and get
back to you, hopefully early next week.

Best regards,
Radu


^ permalink raw reply

* [PATCH v9 00/14] Restricted DMA
From: Claire Chang @ 2021-06-11 15:26 UTC (permalink / raw)
  To: Rob Herring, mpe, Joerg Roedel, Will Deacon, Frank Rowand,
	Konrad Rzeszutek Wilk, boris.ostrovsky, jgross, Christoph Hellwig,
	Marek Szyprowski
  Cc: heikki.krogerus, thomas.hellstrom, peterz, joonas.lahtinen,
	dri-devel, chris, grant.likely, paulus, mingo, jxgao, sstabellini,
	Saravana Kannan, xypron.glpk, Rafael J . Wysocki,
	Bartosz Golaszewski, bskeggs, linux-pci, xen-devel,
	Thierry Reding, intel-gfx, matthew.auld, linux-devicetree, daniel,
	airlied, maarten.lankhorst, linuxppc-dev, jani.nikula,
	Nicolas Boichat, rodrigo.vivi, bhelgaas, tientzu, Dan Williams,
	Andy Shevchenko, Greg KH, Randy Dunlap, lkml, tfiga,
	list@263.net:IOMMU DRIVERS, Jim Quinlan, Robin Murphy, bauerman

This series implements mitigations for lack of DMA access control on
systems without an IOMMU, which could result in the DMA accessing the
system memory at unexpected times and/or unexpected addresses, possibly
leading to data leakage or corruption.

For example, we plan to use the PCI-e bus for Wi-Fi and that PCI-e bus is
not behind an IOMMU. As PCI-e, by design, gives the device full access to
system memory, a vulnerability in the Wi-Fi firmware could easily escalate
to a full system exploit (remote wifi exploits: [1a], [1b] that shows a
full chain of exploits; [2], [3]).

To mitigate the security concerns, we introduce restricted DMA. Restricted
DMA utilizes the existing swiotlb to bounce streaming DMA in and out of a
specially allocated region and does memory allocation from the same region.
The feature on its own provides a basic level of protection against the DMA
overwriting buffer contents at unexpected times. However, to protect
against general data leakage and system memory corruption, the system needs
to provide a way to restrict the DMA to a predefined memory region (this is
usually done at firmware level, e.g. MPU in ATF on some ARM platforms [4]).

[1a] https://googleprojectzero.blogspot.com/2017/04/over-air-exploiting-broadcoms-wi-fi_4.html
[1b] https://googleprojectzero.blogspot.com/2017/04/over-air-exploiting-broadcoms-wi-fi_11.html
[2] https://blade.tencent.com/en/advisories/qualpwn/
[3] https://www.bleepingcomputer.com/news/security/vulnerabilities-found-in-highly-popular-firmware-for-wifi-chips/
[4] https://github.com/ARM-software/arm-trusted-firmware/blob/master/plat/mediatek/mt8183/drivers/emi_mpu/emi_mpu.c#L132

v9:
Address the comments in v7 to
  - set swiotlb active pool to dev->dma_io_tlb_mem
  - get rid of get_io_tlb_mem
  - dig out the device struct for is_swiotlb_active
  - move debugfs_create_dir out of swiotlb_create_debugfs
  - do set_memory_decrypted conditionally in swiotlb_init_io_tlb_mem
  - use IS_ENABLED in kernel/dma/direct.c
  - fix redefinition of 'of_dma_set_restricted_buffer'

v8:
- Fix reserved-memory.txt and add the reg property in example.
- Fix sizeof for of_property_count_elems_of_size in
  drivers/of/address.c#of_dma_set_restricted_buffer.
- Apply Will's suggestion to try the OF node having DMA configuration in
  drivers/of/address.c#of_dma_set_restricted_buffer.
- Fix typo in the comment of drivers/of/address.c#of_dma_set_restricted_buffer.
- Add error message for PageHighMem in
  kernel/dma/swiotlb.c#rmem_swiotlb_device_init and move it to
  rmem_swiotlb_setup.
- Fix the message string in rmem_swiotlb_setup.
https://lore.kernel.org/patchwork/cover/1437112/

v7:
Fix debugfs, PageHighMem and comment style in rmem_swiotlb_device_init
https://lore.kernel.org/patchwork/cover/1431031/

v6:
Address the comments in v5
https://lore.kernel.org/patchwork/cover/1423201/

v5:
Rebase on latest linux-next
https://lore.kernel.org/patchwork/cover/1416899/

v4:
- Fix spinlock bad magic
- Use rmem->name for debugfs entry
- Address the comments in v3
https://lore.kernel.org/patchwork/cover/1378113/

v3:
Using only one reserved memory region for both streaming DMA and memory
allocation.
https://lore.kernel.org/patchwork/cover/1360992/

v2:
Building on top of swiotlb.
https://lore.kernel.org/patchwork/cover/1280705/

v1:
Using dma_map_ops.
https://lore.kernel.org/patchwork/cover/1271660/


Claire Chang (14):
  swiotlb: Refactor swiotlb init functions
  swiotlb: Refactor swiotlb_create_debugfs
  swiotlb: Set dev->dma_io_tlb_mem to the swiotlb pool used
  swiotlb: Add restricted DMA pool initialization
  swiotlb: Update is_swiotlb_buffer to add a struct device argument
  swiotlb: Update is_swiotlb_active to add a struct device argument
  swiotlb: Bounce data from/to restricted DMA pool if available
  swiotlb: Move alloc_size to find_slots
  swiotlb: Refactor swiotlb_tbl_unmap_single
  dma-direct: Add a new wrapper __dma_direct_free_pages()
  swiotlb: Add restricted DMA alloc/free support.
  dma-direct: Allocate memory from restricted DMA pool if available
  dt-bindings: of: Add restricted DMA pool
  of: Add plumbing for restricted DMA pool

 .../reserved-memory/reserved-memory.txt       |  36 ++-
 drivers/gpu/drm/i915/gem/i915_gem_internal.c  |   2 +-
 drivers/gpu/drm/nouveau/nouveau_ttm.c         |   2 +-
 drivers/iommu/dma-iommu.c                     |  12 +-
 drivers/of/address.c                          |  33 +++
 drivers/of/device.c                           |   6 +
 drivers/of/of_private.h                       |   6 +
 drivers/pci/xen-pcifront.c                    |   2 +-
 drivers/xen/swiotlb-xen.c                     |   2 +-
 include/linux/device.h                        |   4 +
 include/linux/swiotlb.h                       |  45 +++-
 kernel/dma/Kconfig                            |  14 +
 kernel/dma/direct.c                           |  62 +++--
 kernel/dma/direct.h                           |   9 +-
 kernel/dma/swiotlb.c                          | 242 +++++++++++++-----
 15 files changed, 376 insertions(+), 101 deletions(-)

-- 
2.32.0.272.g935e593368-goog


^ permalink raw reply

* [PATCH v9 01/14] swiotlb: Refactor swiotlb init functions
From: Claire Chang @ 2021-06-11 15:26 UTC (permalink / raw)
  To: Rob Herring, mpe, Joerg Roedel, Will Deacon, Frank Rowand,
	Konrad Rzeszutek Wilk, boris.ostrovsky, jgross, Christoph Hellwig,
	Marek Szyprowski
  Cc: heikki.krogerus, thomas.hellstrom, peterz, joonas.lahtinen,
	dri-devel, chris, grant.likely, paulus, mingo, jxgao, sstabellini,
	Saravana Kannan, xypron.glpk, Rafael J . Wysocki,
	Bartosz Golaszewski, bskeggs, linux-pci, xen-devel,
	Thierry Reding, intel-gfx, matthew.auld, linux-devicetree, daniel,
	airlied, maarten.lankhorst, linuxppc-dev, jani.nikula,
	Nicolas Boichat, rodrigo.vivi, bhelgaas, tientzu, Dan Williams,
	Andy Shevchenko, Greg KH, Randy Dunlap, lkml, tfiga,
	list@263.net:IOMMU DRIVERS, Jim Quinlan, Robin Murphy, bauerman
In-Reply-To: <20210611152659.2142983-1-tientzu@chromium.org>

Add a new function, swiotlb_init_io_tlb_mem, for the io_tlb_mem struct
initialization to make the code reusable.

Signed-off-by: Claire Chang <tientzu@chromium.org>
---
 kernel/dma/swiotlb.c | 53 ++++++++++++++++++++++----------------------
 1 file changed, 27 insertions(+), 26 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 8ca7d505d61c..1a1208c81e85 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -168,9 +168,32 @@ void __init swiotlb_update_mem_attributes(void)
 	memset(vaddr, 0, bytes);
 }
 
-int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
+static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
+				    unsigned long nslabs, bool late_alloc,
+				    bool memory_decrypted)
 {
+	void *vaddr = phys_to_virt(start);
 	unsigned long bytes = nslabs << IO_TLB_SHIFT, i;
+
+	mem->nslabs = nslabs;
+	mem->start = start;
+	mem->end = mem->start + bytes;
+	mem->index = 0;
+	mem->late_alloc = late_alloc;
+	spin_lock_init(&mem->lock);
+	for (i = 0; i < mem->nslabs; i++) {
+		mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
+		mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
+		mem->slots[i].alloc_size = 0;
+	}
+
+	if (memory_decrypted)
+		set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
+	memset(vaddr, 0, bytes);
+}
+
+int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
+{
 	struct io_tlb_mem *mem;
 	size_t alloc_size;
 
@@ -186,16 +209,8 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
 	if (!mem)
 		panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
 		      __func__, alloc_size, PAGE_SIZE);
-	mem->nslabs = nslabs;
-	mem->start = __pa(tlb);
-	mem->end = mem->start + bytes;
-	mem->index = 0;
-	spin_lock_init(&mem->lock);
-	for (i = 0; i < mem->nslabs; i++) {
-		mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
-		mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
-		mem->slots[i].alloc_size = 0;
-	}
+
+	swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, false, false);
 
 	io_tlb_default_mem = mem;
 	if (verbose)
@@ -282,7 +297,6 @@ swiotlb_late_init_with_default_size(size_t default_size)
 int
 swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 {
-	unsigned long bytes = nslabs << IO_TLB_SHIFT, i;
 	struct io_tlb_mem *mem;
 
 	if (swiotlb_force == SWIOTLB_NO_FORCE)
@@ -297,20 +311,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 	if (!mem)
 		return -ENOMEM;
 
-	mem->nslabs = nslabs;
-	mem->start = virt_to_phys(tlb);
-	mem->end = mem->start + bytes;
-	mem->index = 0;
-	mem->late_alloc = 1;
-	spin_lock_init(&mem->lock);
-	for (i = 0; i < mem->nslabs; i++) {
-		mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
-		mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
-		mem->slots[i].alloc_size = 0;
-	}
-
-	set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
-	memset(tlb, 0, bytes);
+	swiotlb_init_io_tlb_mem(mem, virt_to_phys(tlb), nslabs, true, true);
 
 	io_tlb_default_mem = mem;
 	swiotlb_print_info();
-- 
2.32.0.272.g935e593368-goog


^ permalink raw reply related

* [PATCH v9 02/14] swiotlb: Refactor swiotlb_create_debugfs
From: Claire Chang @ 2021-06-11 15:26 UTC (permalink / raw)
  To: Rob Herring, mpe, Joerg Roedel, Will Deacon, Frank Rowand,
	Konrad Rzeszutek Wilk, boris.ostrovsky, jgross, Christoph Hellwig,
	Marek Szyprowski
  Cc: heikki.krogerus, thomas.hellstrom, peterz, joonas.lahtinen,
	dri-devel, chris, grant.likely, paulus, mingo, jxgao, sstabellini,
	Saravana Kannan, xypron.glpk, Rafael J . Wysocki,
	Bartosz Golaszewski, bskeggs, linux-pci, xen-devel,
	Thierry Reding, intel-gfx, matthew.auld, linux-devicetree, daniel,
	airlied, maarten.lankhorst, linuxppc-dev, jani.nikula,
	Nicolas Boichat, rodrigo.vivi, bhelgaas, tientzu, Dan Williams,
	Andy Shevchenko, Greg KH, Randy Dunlap, lkml, tfiga,
	list@263.net:IOMMU DRIVERS, Jim Quinlan, Robin Murphy, bauerman
In-Reply-To: <20210611152659.2142983-1-tientzu@chromium.org>

Split the debugfs creation to make the code reusable for supporting
different bounce buffer pools, e.g. restricted DMA pool.

Signed-off-by: Claire Chang <tientzu@chromium.org>
---
 kernel/dma/swiotlb.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 1a1208c81e85..8a3e2b3b246d 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -64,6 +64,9 @@
 enum swiotlb_force swiotlb_force;
 
 struct io_tlb_mem *io_tlb_default_mem;
+#ifdef CONFIG_DEBUG_FS
+static struct dentry *debugfs_dir;
+#endif
 
 /*
  * Max segment that we can provide which (if pages are contingous) will
@@ -664,18 +667,24 @@ EXPORT_SYMBOL_GPL(is_swiotlb_active);
 
 #ifdef CONFIG_DEBUG_FS
 
-static int __init swiotlb_create_debugfs(void)
+static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem)
 {
-	struct io_tlb_mem *mem = io_tlb_default_mem;
-
-	if (!mem)
-		return 0;
-	mem->debugfs = debugfs_create_dir("swiotlb", NULL);
 	debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs);
 	debugfs_create_ulong("io_tlb_used", 0400, mem->debugfs, &mem->used);
+}
+
+static int __init swiotlb_create_default_debugfs(void)
+{
+	struct io_tlb_mem *mem = io_tlb_default_mem;
+
+	debugfs_dir = debugfs_create_dir("swiotlb", NULL);
+	if (mem) {
+		mem->debugfs = debugfs_dir;
+		swiotlb_create_debugfs_files(mem);
+	}
 	return 0;
 }
 
-late_initcall(swiotlb_create_debugfs);
+late_initcall(swiotlb_create_default_debugfs);
 
 #endif
-- 
2.32.0.272.g935e593368-goog


^ permalink raw reply related

* [PATCH v9 03/14] swiotlb: Set dev->dma_io_tlb_mem to the swiotlb pool used
From: Claire Chang @ 2021-06-11 15:26 UTC (permalink / raw)
  To: Rob Herring, mpe, Joerg Roedel, Will Deacon, Frank Rowand,
	Konrad Rzeszutek Wilk, boris.ostrovsky, jgross, Christoph Hellwig,
	Marek Szyprowski
  Cc: heikki.krogerus, thomas.hellstrom, peterz, joonas.lahtinen,
	dri-devel, chris, grant.likely, paulus, mingo, jxgao, sstabellini,
	Saravana Kannan, xypron.glpk, Rafael J . Wysocki,
	Bartosz Golaszewski, bskeggs, linux-pci, xen-devel,
	Thierry Reding, intel-gfx, matthew.auld, linux-devicetree, daniel,
	airlied, maarten.lankhorst, linuxppc-dev, jani.nikula,
	Nicolas Boichat, rodrigo.vivi, bhelgaas, tientzu, Dan Williams,
	Andy Shevchenko, Greg KH, Randy Dunlap, lkml, tfiga,
	list@263.net:IOMMU DRIVERS, Jim Quinlan, Robin Murphy, bauerman
In-Reply-To: <20210611152659.2142983-1-tientzu@chromium.org>

Always have the pointer to the swiotlb pool used in struct device. This
could help simplify the code for other pools.

Signed-off-by: Claire Chang <tientzu@chromium.org>
---
 drivers/of/device.c     | 3 +++
 include/linux/device.h  | 4 ++++
 include/linux/swiotlb.h | 8 ++++++++
 kernel/dma/swiotlb.c    | 8 ++++----
 4 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/drivers/of/device.c b/drivers/of/device.c
index c5a9473a5fb1..1defdf15ba95 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -165,6 +165,9 @@ int of_dma_configure_id(struct device *dev, struct device_node *np,
 
 	arch_setup_dma_ops(dev, dma_start, size, iommu, coherent);
 
+	if (IS_ENABLED(CONFIG_SWIOTLB))
+		swiotlb_set_io_tlb_default_mem(dev);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(of_dma_configure_id);
diff --git a/include/linux/device.h b/include/linux/device.h
index 4443e12238a0..2e9a378c9100 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -432,6 +432,7 @@ struct dev_links_info {
  * @dma_pools:	Dma pools (if dma'ble device).
  * @dma_mem:	Internal for coherent mem override.
  * @cma_area:	Contiguous memory area for dma allocations
+ * @dma_io_tlb_mem: Pointer to the swiotlb pool used.  Not for driver use.
  * @archdata:	For arch-specific additions.
  * @of_node:	Associated device tree node.
  * @fwnode:	Associated device node supplied by platform firmware.
@@ -540,6 +541,9 @@ struct device {
 #ifdef CONFIG_DMA_CMA
 	struct cma *cma_area;		/* contiguous memory area for dma
 					   allocations */
+#endif
+#ifdef CONFIG_SWIOTLB
+	struct io_tlb_mem *dma_io_tlb_mem;
 #endif
 	/* arch specific additions */
 	struct dev_archdata	archdata;
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 216854a5e513..008125ccd509 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -108,6 +108,11 @@ static inline bool is_swiotlb_buffer(phys_addr_t paddr)
 	return mem && paddr >= mem->start && paddr < mem->end;
 }
 
+static inline void swiotlb_set_io_tlb_default_mem(struct device *dev)
+{
+	dev->dma_io_tlb_mem = io_tlb_default_mem;
+}
+
 void __init swiotlb_exit(void);
 unsigned int swiotlb_max_segment(void);
 size_t swiotlb_max_mapping_size(struct device *dev);
@@ -119,6 +124,9 @@ static inline bool is_swiotlb_buffer(phys_addr_t paddr)
 {
 	return false;
 }
+static inline void swiotlb_set_io_tlb_default_mem(struct device *dev)
+{
+}
 static inline void swiotlb_exit(void)
 {
 }
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 8a3e2b3b246d..29b950ab1351 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -344,7 +344,7 @@ void __init swiotlb_exit(void)
 static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size,
 			   enum dma_data_direction dir)
 {
-	struct io_tlb_mem *mem = io_tlb_default_mem;
+	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
 	int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT;
 	phys_addr_t orig_addr = mem->slots[index].orig_addr;
 	size_t alloc_size = mem->slots[index].alloc_size;
@@ -426,7 +426,7 @@ static unsigned int wrap_index(struct io_tlb_mem *mem, unsigned int index)
 static int find_slots(struct device *dev, phys_addr_t orig_addr,
 		size_t alloc_size)
 {
-	struct io_tlb_mem *mem = io_tlb_default_mem;
+	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
 	unsigned long boundary_mask = dma_get_seg_boundary(dev);
 	dma_addr_t tbl_dma_addr =
 		phys_to_dma_unencrypted(dev, mem->start) & boundary_mask;
@@ -503,7 +503,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
 		size_t mapping_size, size_t alloc_size,
 		enum dma_data_direction dir, unsigned long attrs)
 {
-	struct io_tlb_mem *mem = io_tlb_default_mem;
+	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
 	unsigned int offset = swiotlb_align_offset(dev, orig_addr);
 	unsigned int i;
 	int index;
@@ -554,7 +554,7 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
 			      size_t mapping_size, enum dma_data_direction dir,
 			      unsigned long attrs)
 {
-	struct io_tlb_mem *mem = io_tlb_default_mem;
+	struct io_tlb_mem *mem = hwdev->dma_io_tlb_mem;
 	unsigned long flags;
 	unsigned int offset = swiotlb_align_offset(hwdev, tlb_addr);
 	int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
-- 
2.32.0.272.g935e593368-goog


^ permalink raw reply related

* [PATCH v9 04/14] swiotlb: Add restricted DMA pool initialization
From: Claire Chang @ 2021-06-11 15:26 UTC (permalink / raw)
  To: Rob Herring, mpe, Joerg Roedel, Will Deacon, Frank Rowand,
	Konrad Rzeszutek Wilk, boris.ostrovsky, jgross, Christoph Hellwig,
	Marek Szyprowski
  Cc: heikki.krogerus, thomas.hellstrom, peterz, joonas.lahtinen,
	dri-devel, chris, grant.likely, paulus, mingo, jxgao, sstabellini,
	Saravana Kannan, xypron.glpk, Rafael J . Wysocki,
	Bartosz Golaszewski, bskeggs, linux-pci, xen-devel,
	Thierry Reding, intel-gfx, matthew.auld, linux-devicetree, daniel,
	airlied, maarten.lankhorst, linuxppc-dev, jani.nikula,
	Nicolas Boichat, rodrigo.vivi, bhelgaas, tientzu, Dan Williams,
	Andy Shevchenko, Greg KH, Randy Dunlap, lkml, tfiga,
	list@263.net:IOMMU DRIVERS, Jim Quinlan, Robin Murphy, bauerman
In-Reply-To: <20210611152659.2142983-1-tientzu@chromium.org>

Add the initialization function to create restricted DMA pools from
matching reserved-memory nodes.

Signed-off-by: Claire Chang <tientzu@chromium.org>
---
 include/linux/swiotlb.h |  3 +-
 kernel/dma/Kconfig      | 14 ++++++++
 kernel/dma/swiotlb.c    | 75 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 91 insertions(+), 1 deletion(-)

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 008125ccd509..ec0c01796c8a 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -72,7 +72,8 @@ extern enum swiotlb_force swiotlb_force;
  *		range check to see if the memory was in fact allocated by this
  *		API.
  * @nslabs:	The number of IO TLB blocks (in groups of 64) between @start and
- *		@end. This is command line adjustable via setup_io_tlb_npages.
+ *		@end. For default swiotlb, this is command line adjustable via
+ *		setup_io_tlb_npages.
  * @used:	The number of used IO TLB block.
  * @list:	The free list describing the number of free entries available
  *		from each index.
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 77b405508743..3e961dc39634 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -80,6 +80,20 @@ config SWIOTLB
 	bool
 	select NEED_DMA_MAP_STATE
 
+config DMA_RESTRICTED_POOL
+	bool "DMA Restricted Pool"
+	depends on OF && OF_RESERVED_MEM
+	select SWIOTLB
+	help
+	  This enables support for restricted DMA pools which provide a level of
+	  DMA memory protection on systems with limited hardware protection
+	  capabilities, such as those lacking an IOMMU.
+
+	  For more information see
+	  <Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt>
+	  and <kernel/dma/swiotlb.c>.
+	  If unsure, say "n".
+
 #
 # Should be selected if we can mmap non-coherent mappings to userspace.
 # The only thing that is really required is a way to set an uncached bit
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 29b950ab1351..c4a071d6a63f 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -39,6 +39,13 @@
 #ifdef CONFIG_DEBUG_FS
 #include <linux/debugfs.h>
 #endif
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_reserved_mem.h>
+#include <linux/slab.h>
+#endif
 
 #include <asm/io.h>
 #include <asm/dma.h>
@@ -688,3 +695,71 @@ static int __init swiotlb_create_default_debugfs(void)
 late_initcall(swiotlb_create_default_debugfs);
 
 #endif
+
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
+				    struct device *dev)
+{
+	struct io_tlb_mem *mem = rmem->priv;
+	unsigned long nslabs = rmem->size >> IO_TLB_SHIFT;
+
+	/*
+	 * Since multiple devices can share the same pool, the private data,
+	 * io_tlb_mem struct, will be initialized by the first device attached
+	 * to it.
+	 */
+	if (!mem) {
+		mem = kzalloc(struct_size(mem, slots, nslabs), GFP_KERNEL);
+		if (!mem)
+			return -ENOMEM;
+
+		swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, false, true);
+
+		rmem->priv = mem;
+
+		if (IS_ENABLED(CONFIG_DEBUG_FS)) {
+			mem->debugfs =
+				debugfs_create_dir(rmem->name, debugfs_dir);
+			swiotlb_create_debugfs_files(mem);
+		}
+	}
+
+	dev->dma_io_tlb_mem = mem;
+
+	return 0;
+}
+
+static void rmem_swiotlb_device_release(struct reserved_mem *rmem,
+					struct device *dev)
+{
+	dev->dma_io_tlb_mem = io_tlb_default_mem;
+}
+
+static const struct reserved_mem_ops rmem_swiotlb_ops = {
+	.device_init = rmem_swiotlb_device_init,
+	.device_release = rmem_swiotlb_device_release,
+};
+
+static int __init rmem_swiotlb_setup(struct reserved_mem *rmem)
+{
+	unsigned long node = rmem->fdt_node;
+
+	if (of_get_flat_dt_prop(node, "reusable", NULL) ||
+	    of_get_flat_dt_prop(node, "linux,cma-default", NULL) ||
+	    of_get_flat_dt_prop(node, "linux,dma-default", NULL) ||
+	    of_get_flat_dt_prop(node, "no-map", NULL))
+		return -EINVAL;
+
+	if (PageHighMem(pfn_to_page(PHYS_PFN(rmem->base)))) {
+		pr_err("Restricted DMA pool must be accessible within the linear mapping.");
+		return -EINVAL;
+	}
+
+	rmem->ops = &rmem_swiotlb_ops;
+	pr_info("Reserved memory: created restricted DMA pool at %pa, size %ld MiB\n",
+		&rmem->base, (unsigned long)rmem->size / SZ_1M);
+	return 0;
+}
+
+RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", rmem_swiotlb_setup);
+#endif /* CONFIG_DMA_RESTRICTED_POOL */
-- 
2.32.0.272.g935e593368-goog


^ permalink raw reply related

* [PATCH v9 05/14] swiotlb: Update is_swiotlb_buffer to add a struct device argument
From: Claire Chang @ 2021-06-11 15:26 UTC (permalink / raw)
  To: Rob Herring, mpe, Joerg Roedel, Will Deacon, Frank Rowand,
	Konrad Rzeszutek Wilk, boris.ostrovsky, jgross, Christoph Hellwig,
	Marek Szyprowski
  Cc: heikki.krogerus, thomas.hellstrom, peterz, joonas.lahtinen,
	dri-devel, chris, grant.likely, paulus, mingo, jxgao, sstabellini,
	Saravana Kannan, xypron.glpk, Rafael J . Wysocki,
	Bartosz Golaszewski, bskeggs, linux-pci, xen-devel,
	Thierry Reding, intel-gfx, matthew.auld, linux-devicetree, daniel,
	airlied, maarten.lankhorst, linuxppc-dev, jani.nikula,
	Nicolas Boichat, rodrigo.vivi, bhelgaas, tientzu, Dan Williams,
	Andy Shevchenko, Greg KH, Randy Dunlap, lkml, tfiga,
	list@263.net:IOMMU DRIVERS, Jim Quinlan, Robin Murphy, bauerman
In-Reply-To: <20210611152659.2142983-1-tientzu@chromium.org>

Update is_swiotlb_buffer to add a struct device argument. This will be
useful later to allow for restricted DMA pool.

Signed-off-by: Claire Chang <tientzu@chromium.org>
---
 drivers/iommu/dma-iommu.c | 12 ++++++------
 drivers/xen/swiotlb-xen.c |  2 +-
 include/linux/swiotlb.h   |  7 ++++---
 kernel/dma/direct.c       |  6 +++---
 kernel/dma/direct.h       |  6 +++---
 5 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 5d96fcc45fec..1a6a08908245 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -506,7 +506,7 @@ static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
 
 	__iommu_dma_unmap(dev, dma_addr, size);
 
-	if (unlikely(is_swiotlb_buffer(phys)))
+	if (unlikely(is_swiotlb_buffer(dev, phys)))
 		swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
 }
 
@@ -577,7 +577,7 @@ static dma_addr_t __iommu_dma_map_swiotlb(struct device *dev, phys_addr_t phys,
 	}
 
 	iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
-	if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys))
+	if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys))
 		swiotlb_tbl_unmap_single(dev, phys, org_size, dir, attrs);
 	return iova;
 }
@@ -783,7 +783,7 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev,
 	if (!dev_is_dma_coherent(dev))
 		arch_sync_dma_for_cpu(phys, size, dir);
 
-	if (is_swiotlb_buffer(phys))
+	if (is_swiotlb_buffer(dev, phys))
 		swiotlb_sync_single_for_cpu(dev, phys, size, dir);
 }
 
@@ -796,7 +796,7 @@ static void iommu_dma_sync_single_for_device(struct device *dev,
 		return;
 
 	phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
-	if (is_swiotlb_buffer(phys))
+	if (is_swiotlb_buffer(dev, phys))
 		swiotlb_sync_single_for_device(dev, phys, size, dir);
 
 	if (!dev_is_dma_coherent(dev))
@@ -817,7 +817,7 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
 		if (!dev_is_dma_coherent(dev))
 			arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
 
-		if (is_swiotlb_buffer(sg_phys(sg)))
+		if (is_swiotlb_buffer(dev, sg_phys(sg)))
 			swiotlb_sync_single_for_cpu(dev, sg_phys(sg),
 						    sg->length, dir);
 	}
@@ -834,7 +834,7 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
 		return;
 
 	for_each_sg(sgl, sg, nelems, i) {
-		if (is_swiotlb_buffer(sg_phys(sg)))
+		if (is_swiotlb_buffer(dev, sg_phys(sg)))
 			swiotlb_sync_single_for_device(dev, sg_phys(sg),
 						       sg->length, dir);
 
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 24d11861ac7d..0c4fb34f11ab 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -100,7 +100,7 @@ static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr)
 	 * in our domain. Therefore _only_ check address within our domain.
 	 */
 	if (pfn_valid(PFN_DOWN(paddr)))
-		return is_swiotlb_buffer(paddr);
+		return is_swiotlb_buffer(dev, paddr);
 	return 0;
 }
 
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index ec0c01796c8a..921b469c6ad2 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -2,6 +2,7 @@
 #ifndef __LINUX_SWIOTLB_H
 #define __LINUX_SWIOTLB_H
 
+#include <linux/device.h>
 #include <linux/dma-direction.h>
 #include <linux/init.h>
 #include <linux/types.h>
@@ -102,9 +103,9 @@ struct io_tlb_mem {
 };
 extern struct io_tlb_mem *io_tlb_default_mem;
 
-static inline bool is_swiotlb_buffer(phys_addr_t paddr)
+static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
 {
-	struct io_tlb_mem *mem = io_tlb_default_mem;
+	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
 
 	return mem && paddr >= mem->start && paddr < mem->end;
 }
@@ -121,7 +122,7 @@ bool is_swiotlb_active(void);
 void __init swiotlb_adjust_size(unsigned long size);
 #else
 #define swiotlb_force SWIOTLB_NO_FORCE
-static inline bool is_swiotlb_buffer(phys_addr_t paddr)
+static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
 {
 	return false;
 }
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index f737e3347059..84c9feb5474a 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -343,7 +343,7 @@ void dma_direct_sync_sg_for_device(struct device *dev,
 	for_each_sg(sgl, sg, nents, i) {
 		phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg));
 
-		if (unlikely(is_swiotlb_buffer(paddr)))
+		if (unlikely(is_swiotlb_buffer(dev, paddr)))
 			swiotlb_sync_single_for_device(dev, paddr, sg->length,
 						       dir);
 
@@ -369,7 +369,7 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
 		if (!dev_is_dma_coherent(dev))
 			arch_sync_dma_for_cpu(paddr, sg->length, dir);
 
-		if (unlikely(is_swiotlb_buffer(paddr)))
+		if (unlikely(is_swiotlb_buffer(dev, paddr)))
 			swiotlb_sync_single_for_cpu(dev, paddr, sg->length,
 						    dir);
 
@@ -504,7 +504,7 @@ size_t dma_direct_max_mapping_size(struct device *dev)
 bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr)
 {
 	return !dev_is_dma_coherent(dev) ||
-		is_swiotlb_buffer(dma_to_phys(dev, dma_addr));
+	       is_swiotlb_buffer(dev, dma_to_phys(dev, dma_addr));
 }
 
 /**
diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
index 50afc05b6f1d..13e9e7158d94 100644
--- a/kernel/dma/direct.h
+++ b/kernel/dma/direct.h
@@ -56,7 +56,7 @@ static inline void dma_direct_sync_single_for_device(struct device *dev,
 {
 	phys_addr_t paddr = dma_to_phys(dev, addr);
 
-	if (unlikely(is_swiotlb_buffer(paddr)))
+	if (unlikely(is_swiotlb_buffer(dev, paddr)))
 		swiotlb_sync_single_for_device(dev, paddr, size, dir);
 
 	if (!dev_is_dma_coherent(dev))
@@ -73,7 +73,7 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev,
 		arch_sync_dma_for_cpu_all();
 	}
 
-	if (unlikely(is_swiotlb_buffer(paddr)))
+	if (unlikely(is_swiotlb_buffer(dev, paddr)))
 		swiotlb_sync_single_for_cpu(dev, paddr, size, dir);
 
 	if (dir == DMA_FROM_DEVICE)
@@ -113,7 +113,7 @@ static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
 		dma_direct_sync_single_for_cpu(dev, addr, size, dir);
 
-	if (unlikely(is_swiotlb_buffer(phys)))
+	if (unlikely(is_swiotlb_buffer(dev, phys)))
 		swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
 }
 #endif /* _KERNEL_DMA_DIRECT_H */
-- 
2.32.0.272.g935e593368-goog


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox