LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v19 02/13] arm64: Use ELF fields defined in 'struct kimage'
From: Lakshmi Ramasubramanian @ 2021-02-21 17:49 UTC (permalink / raw)
  To: zohar, bauerman, robh, takahiro.akashi, gregkh, will, joe,
	catalin.marinas, mpe, sfr
  Cc: mark.rutland, tao.li, paulus, vincenzo.frascino, frowand.list,
	sashal, masahiroy, jmorris, allison, serge, devicetree,
	pasha.tatashin, prsriva, hsinyi, linux-arm-kernel,
	christophe.leroy, mbrugger, balajib, dmitry.kasatkin,
	linux-kernel, james.morse, linux-integrity, linuxppc-dev
In-Reply-To: <20210221174930.27324-1-nramas@linux.microsoft.com>

ELF related fields elf_headers, elf_headers_sz, and elf_headers_mem
have been moved from 'struct kimage_arch' to 'struct kimage' as
elf_headers, elf_headers_sz, and elf_load_addr respectively.

Use the ELF fields defined in 'struct kimage'.

Signed-off-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
Suggested-by: Rob Herring <robh@kernel.org>
Fixes: 33488dc4d61f ("of: Add a common kexec FDT setup function")
Reported-by: kernel test robot <lkp@intel.com>
---
 arch/arm64/include/asm/kexec.h         |  4 ----
 arch/arm64/kernel/machine_kexec_file.c | 18 +++++++++---------
 2 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
index d24b527e8c00..12a561a54128 100644
--- a/arch/arm64/include/asm/kexec.h
+++ b/arch/arm64/include/asm/kexec.h
@@ -96,10 +96,6 @@ static inline void crash_post_resume(void) {}
 struct kimage_arch {
 	void *dtb;
 	unsigned long dtb_mem;
-	/* Core ELF header buffer */
-	void *elf_headers;
-	unsigned long elf_headers_mem;
-	unsigned long elf_headers_sz;
 };
 
 extern const struct kexec_file_ops kexec_image_ops;
diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
index 03210f644790..5553de3d401a 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -43,9 +43,9 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image)
 	vfree(image->arch.dtb);
 	image->arch.dtb = NULL;
 
-	vfree(image->arch.elf_headers);
-	image->arch.elf_headers = NULL;
-	image->arch.elf_headers_sz = 0;
+	vfree(image->elf_headers);
+	image->elf_headers = NULL;
+	image->elf_headers_sz = 0;
 
 	return kexec_image_post_load_cleanup_default(image);
 }
@@ -73,8 +73,8 @@ static int setup_dtb(struct kimage *image,
 		/* add linux,elfcorehdr */
 		ret = fdt_appendprop_addrrange(dtb, 0, off,
 				FDT_PROP_KEXEC_ELFHDR,
-				image->arch.elf_headers_mem,
-				image->arch.elf_headers_sz);
+				image->elf_load_addr,
+				image->elf_headers_sz);
 		if (ret)
 			return (ret == -FDT_ERR_NOSPACE ? -ENOMEM : -EINVAL);
 
@@ -282,12 +282,12 @@ int load_other_segments(struct kimage *image,
 			vfree(headers);
 			goto out_err;
 		}
-		image->arch.elf_headers = headers;
-		image->arch.elf_headers_mem = kbuf.mem;
-		image->arch.elf_headers_sz = headers_sz;
+		image->elf_headers = headers;
+		image->elf_load_addr = kbuf.mem;
+		image->elf_headers_sz = headers_sz;
 
 		pr_debug("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
-			 image->arch.elf_headers_mem, kbuf.bufsz, kbuf.memsz);
+			 image->elf_load_addr, kbuf.bufsz, kbuf.memsz);
 	}
 
 	/* load initrd */
-- 
2.30.0


^ permalink raw reply related

* [PATCH v19 03/13] powerpc: Use ELF fields defined in 'struct kimage'
From: Lakshmi Ramasubramanian @ 2021-02-21 17:49 UTC (permalink / raw)
  To: zohar, bauerman, robh, takahiro.akashi, gregkh, will, joe,
	catalin.marinas, mpe, sfr
  Cc: mark.rutland, tao.li, paulus, vincenzo.frascino, frowand.list,
	sashal, masahiroy, jmorris, allison, serge, devicetree,
	pasha.tatashin, prsriva, hsinyi, linux-arm-kernel,
	christophe.leroy, mbrugger, balajib, dmitry.kasatkin,
	linux-kernel, james.morse, linux-integrity, linuxppc-dev
In-Reply-To: <20210221174930.27324-1-nramas@linux.microsoft.com>

ELF related fields elf_headers, elf_headers_sz, and elfcorehdr_addr
have been moved from 'struct kimage_arch' to 'struct kimage' as
elf_headers, elf_headers_sz, and elf_load_addr respectively.

Use the ELF fields defined in 'struct kimage'.

Signed-off-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
Suggested-by: Rob Herring <robh@kernel.org>
Fixes: 33488dc4d61f ("of: Add a common kexec FDT setup function")
Reported-by: kernel test robot <lkp@intel.com>
---
 arch/powerpc/include/asm/kexec.h  |  4 ----
 arch/powerpc/kexec/file_load.c    |  6 +++---
 arch/powerpc/kexec/file_load_64.c | 14 +++++++-------
 3 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index 55d6ede30c19..69c2a8aa142a 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -108,10 +108,6 @@ struct kimage_arch {
 	unsigned long backup_start;
 	void *backup_buf;
 
-	unsigned long elfcorehdr_addr;
-	unsigned long elf_headers_sz;
-	void *elf_headers;
-
 #ifdef CONFIG_IMA_KEXEC
 	phys_addr_t ima_buffer_addr;
 	size_t ima_buffer_size;
diff --git a/arch/powerpc/kexec/file_load.c b/arch/powerpc/kexec/file_load.c
index 9a232bc36c8f..19d2c5f49daf 100644
--- a/arch/powerpc/kexec/file_load.c
+++ b/arch/powerpc/kexec/file_load.c
@@ -45,7 +45,7 @@ char *setup_kdump_cmdline(struct kimage *image, char *cmdline,
 		return NULL;
 
 	elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ",
-				    image->arch.elfcorehdr_addr);
+				    image->elf_load_addr);
 
 	if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) {
 		pr_err("Appending elfcorehdr=<addr> exceeds cmdline size\n");
@@ -263,8 +263,8 @@ int setup_new_fdt(const struct kimage *image, void *fdt,
 		 * Avoid elfcorehdr from being stomped on in kdump kernel by
 		 * setting up memory reserve map.
 		 */
-		ret = fdt_add_mem_rsv(fdt, image->arch.elfcorehdr_addr,
-				      image->arch.elf_headers_sz);
+		ret = fdt_add_mem_rsv(fdt, image->elf_load_addr,
+				      image->elf_headers_sz);
 		if (ret) {
 			pr_err("Error reserving elfcorehdr memory: %s\n",
 			       fdt_strerror(ret));
diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
index c69bcf9b547a..4350f225bb67 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -815,9 +815,9 @@ static int load_elfcorehdr_segment(struct kimage *image, struct kexec_buf *kbuf)
 		goto out;
 	}
 
-	image->arch.elfcorehdr_addr = kbuf->mem;
-	image->arch.elf_headers_sz = headers_sz;
-	image->arch.elf_headers = headers;
+	image->elf_load_addr = kbuf->mem;
+	image->elf_headers_sz = headers_sz;
+	image->elf_headers = headers;
 out:
 	kfree(cmem);
 	return ret;
@@ -851,7 +851,7 @@ int load_crashdump_segments_ppc64(struct kimage *image,
 		return ret;
 	}
 	pr_debug("Loaded elf core header at 0x%lx, bufsz=0x%lx memsz=0x%lx\n",
-		 image->arch.elfcorehdr_addr, kbuf->bufsz, kbuf->memsz);
+		 image->elf_load_addr, kbuf->bufsz, kbuf->memsz);
 
 	return 0;
 }
@@ -1107,9 +1107,9 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image)
 	vfree(image->arch.backup_buf);
 	image->arch.backup_buf = NULL;
 
-	vfree(image->arch.elf_headers);
-	image->arch.elf_headers = NULL;
-	image->arch.elf_headers_sz = 0;
+	vfree(image->elf_headers);
+	image->elf_headers = NULL;
+	image->elf_headers_sz = 0;
 
 	return kexec_image_post_load_cleanup_default(image);
 }
-- 
2.30.0


^ permalink raw reply related

* [PATCH v19 00/13] Carry forward IMA measurement log on kexec on ARM64
From: Lakshmi Ramasubramanian @ 2021-02-21 17:49 UTC (permalink / raw)
  To: zohar, bauerman, robh, takahiro.akashi, gregkh, will, joe,
	catalin.marinas, mpe, sfr
  Cc: mark.rutland, tao.li, paulus, vincenzo.frascino, frowand.list,
	sashal, masahiroy, jmorris, allison, serge, devicetree,
	pasha.tatashin, prsriva, hsinyi, linux-arm-kernel,
	christophe.leroy, mbrugger, balajib, dmitry.kasatkin,
	linux-kernel, james.morse, linux-integrity, linuxppc-dev

On kexec file load Integrity Measurement Architecture (IMA) subsystem
may verify the IMA signature of the kernel and initramfs, and measure
it.  The command line parameters passed to the kernel in the kexec call
may also be measured by IMA.  A remote attestation service can verify
a TPM quote based on the TPM event log, the IMA measurement list, and
the TPM PCR data.  This can be achieved only if the IMA measurement log
is carried over from the current kernel to the next kernel across
the kexec call.

powerpc already supports carrying forward the IMA measurement log on
kexec.  This patch set adds support for carrying forward the IMA
measurement log on kexec on ARM64.

This patch set moves the platform independent code defined for powerpc
such that it can be reused for other platforms as well.  A chosen node
"linux,ima-kexec-buffer" is added to the DTB for ARM64 to hold
the address and the size of the memory reserved to carry
the IMA measurement log.

This patch set has been tested for ARM64 platform using QEMU.
I would like help from the community for testing this change on powerpc.
Thanks.

This patch set is based on
commit f31e3386a4e9 ("ima: Free IMA measurement buffer after kexec syscall")
in https://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity.git
"ima-kexec-fixes" branch.

Changelog:

v19
  - Moved ELF related fields from "struct kimage_arch" for x86, arm64,
    and powerpc architectures to "struct kimage".

v18
  - Added a parameter to of_kexec_alloc_and_setup_fdt() for the caller
    to specify additional space needed for the FDT buffer
  - Renamed arm64 and powerpc ELF buffer address field in
    "struct kimage_arch" to elf_load_addr to match x86_64 name.
  - Removed of_ima_add_kexec_buffer() and instead directly set
    ima_buffer_addr and ima_buffer_size in ima_add_kexec_buffer()
  - Moved FDT_EXTRA_SPACE definition from include/linux/of.h to
    drivers/of/kexec.c

v17
  - Renamed of_kexec_setup_new_fdt() to of_kexec_alloc_and_setup_fdt(),
    and moved memory allocation for the new FDT to this function.

v16
  - Defined functions to allocate and free buffer for FDT for powerpc
    and arm64.
  - Moved ima_buffer_addr and ima_buffer_size fields from
    "struct kimage_arch" in powerpc to "struct kimage"
v15
  - Included Rob's patches in the patch set, and rebased
    the changes to "next-integrity" branch.
  - Allocate memory for DTB, on arm64, using kmalloc() instead of
    vmalloc() to keep it consistent with powerpc implementation.
  - Call of_kexec_setup_new_fdt() from setup_new_fdt_ppc64() and
    remove setup_new_fdt() in the same patch to keep it bisect safe.

v14
  - Select CONFIG_HAVE_IMA_KEXEC for CONFIG_KEXEC_FILE, for powerpc
    and arm64, if CONFIG_IMA is enabled.
  - Use IS_ENABLED() macro instead of "#ifdef" in remove_ima_buffer(),
    ima_get_kexec_buffer(), and ima_free_kexec_buffer().
  - Call of_kexec_setup_new_fdt() from setup_new_fdt_ppc64() and
    remove setup_new_fdt() in "arch/powerpc/kexec/file_load.c".

v13
  - Moved the arch independent functions to drivers/of/kexec.c
    and then refactored the code.
  - Moved arch_ima_add_kexec_buffer() to
    security/integrity/ima/ima_kexec.c

v12
  - Use fdt_appendprop_addrrange() in setup_ima_buffer()
    to setup the IMA measurement list property in
    the device tree.
  - Moved architecture independent functions from
    "arch/powerpc/kexec/ima.c" to "drivers/of/kexec."
  - Deleted "arch/powerpc/kexec/ima.c" and
    "arch/powerpc/include/asm/ima.h".

v11
  - Rebased the changes on the kexec code refactoring done by
    Rob Herring in his "dt/kexec" branch
  - Removed "extern" keyword in function declarations
  - Removed unnecessary header files included in C files
  - Updated patch descriptions per Thiago's comments

v10
  - Moved delete_fdt_mem_rsv(), remove_ima_buffer(),
    get_ima_kexec_buffer, and get_root_addr_size_cells()
    to drivers/of/kexec.c
  - Moved arch_ima_add_kexec_buffer() to
    security/integrity/ima/ima_kexec.c
  - Conditionally define IMA buffer fields in struct kimage_arch

v9
  - Moved delete_fdt_mem_rsv() to drivers/of/kexec_fdt.c
  - Defined a new function get_ima_kexec_buffer() in
    drivers/of/ima_kexec.c to replace do_get_kexec_buffer()
  - Changed remove_ima_kexec_buffer() to the original function name
    remove_ima_buffer()
  - Moved remove_ima_buffer() to drivers/of/ima_kexec.c
  - Moved ima_get_kexec_buffer() and ima_free_kexec_buffer()
    to security/integrity/ima/ima_kexec.c

v8:
  - Moved remove_ima_kexec_buffer(), do_get_kexec_buffer(), and
    delete_fdt_mem_rsv() to drivers/of/fdt.c
  - Moved ima_dump_measurement_list() and ima_add_kexec_buffer()
    back to security/integrity/ima/ima_kexec.c

v7:
  - Renamed remove_ima_buffer() to remove_ima_kexec_buffer() and moved
    this function definition to kernel.
  - Moved delete_fdt_mem_rsv() definition to kernel
  - Moved ima_dump_measurement_list() and ima_add_kexec_buffer() to
    a new file namely ima_kexec_fdt.c in IMA

v6:
  - Remove any existing FDT_PROP_IMA_KEXEC_BUFFER property in the device
    tree and also its corresponding memory reservation in the currently
    running kernel.
  - Moved the function remove_ima_buffer() defined for powerpc to IMA
    and renamed the function to ima_remove_kexec_buffer(). Also, moved
    delete_fdt_mem_rsv() from powerpc to IMA.

v5:
  - Merged get_addr_size_cells() and do_get_kexec_buffer() into a single
    function when moving the arch independent code from powerpc to IMA
  - Reverted the change to use FDT functions in powerpc code and added
    back the original code in get_addr_size_cells() and
    do_get_kexec_buffer() for powerpc.
  - Added fdt_add_mem_rsv() for ARM64 to reserve the memory for
    the IMA log buffer during kexec.
  - Fixed the warning reported by kernel test bot for ARM64
    arch_ima_add_kexec_buffer() - moved this function to a new file
    namely arch/arm64/kernel/ima_kexec.c

v4:
  - Submitting the patch series on behalf of the original author
    Prakhar Srivastava <prsriva@linux.microsoft.com>
  - Moved FDT_PROP_IMA_KEXEC_BUFFER ("linux,ima-kexec-buffer") to
    libfdt.h so that it can be shared by multiple platforms.

v3:
Breakup patches further into separate patches.
  - Refactoring non architecture specific code out of powerpc
  - Update powerpc related code to use fdt functions
  - Update IMA buffer read related code to use of functions
  - Add support to store the memory information of the IMA
    measurement logs to be carried forward.
  - Update the property strings to align with documented nodes
    https://github.com/devicetree-org/dt-schema/pull/46

v2:
  Break patches into separate patches.
  - Powerpc related Refactoring
  - Updating the docuemntation for chosen node
  - Updating arm64 to support IMA buffer pass

v1:
  Refactoring carrying over IMA measuremnet logs over Kexec. This patch
    moves the non-architecture specific code out of powerpc and adds to
    security/ima.(Suggested by Thiago)
  Add Documentation regarding the ima-kexec-buffer node in the chosen
    node documentation

v0:
  Add a layer of abstraction to use the memory reserved by device tree
    for ima buffer pass.
  Add support for ima buffer pass using reserved memory for arm64 kexec.
    Update the arch sepcific code path in kexec file load to store the
    ima buffer in the reserved memory. The same reserved memory is read
    on kexec or cold boot.

Lakshmi Ramasubramanian (10):
  kexec: Move ELF fields to struct kimage
  arm64: Use ELF fields defined in 'struct kimage'
  powerpc: Use ELF fields defined in 'struct kimage'
  x86: Use ELF fields defined in 'struct kimage'
  powerpc: Move ima buffer fields to struct kimage
  powerpc: Enable passing IMA log to next kernel on kexec
  powerpc: Move arch independent ima kexec functions to
    drivers/of/kexec.c
  kexec: Use fdt_appendprop_addrrange() to add ima buffer to FDT
  powerpc: Delete unused function delete_fdt_mem_rsv()
  arm64: Enable passing IMA log to next kernel on kexec

Rob Herring (3):
  of: Add a common kexec FDT setup function
  arm64: Use common of_kexec_alloc_and_setup_fdt()
  powerpc: Use common of_kexec_alloc_and_setup_fdt()

 arch/arm64/Kconfig                     |   1 +
 arch/arm64/include/asm/kexec.h         |   4 -
 arch/arm64/kernel/machine_kexec_file.c | 194 +----------
 arch/powerpc/Kconfig                   |   2 +-
 arch/powerpc/include/asm/ima.h         |  30 --
 arch/powerpc/include/asm/kexec.h       |  14 +-
 arch/powerpc/kexec/Makefile            |   7 -
 arch/powerpc/kexec/elf_64.c            |  30 +-
 arch/powerpc/kexec/file_load.c         | 183 +---------
 arch/powerpc/kexec/file_load_64.c      |  21 +-
 arch/powerpc/kexec/ima.c               | 219 ------------
 arch/x86/include/asm/kexec.h           |   5 -
 arch/x86/kernel/crash.c                |  14 +-
 arch/x86/kernel/kexec-bzimage64.c      |   2 +-
 arch/x86/kernel/machine_kexec_64.c     |   4 +-
 drivers/of/Makefile                    |   6 +
 drivers/of/kexec.c                     | 458 +++++++++++++++++++++++++
 include/linux/kexec.h                  |   8 +
 include/linux/of.h                     |   7 +
 security/integrity/ima/ima.h           |   4 -
 security/integrity/ima/ima_kexec.c     |   9 +-
 21 files changed, 539 insertions(+), 683 deletions(-)
 delete mode 100644 arch/powerpc/include/asm/ima.h
 delete mode 100644 arch/powerpc/kexec/ima.c
 create mode 100644 drivers/of/kexec.c

-- 
2.30.0


^ permalink raw reply

* [PATCH v19 01/13] kexec: Move ELF fields to struct kimage
From: Lakshmi Ramasubramanian @ 2021-02-21 17:49 UTC (permalink / raw)
  To: zohar, bauerman, robh, takahiro.akashi, gregkh, will, joe,
	catalin.marinas, mpe, sfr
  Cc: mark.rutland, tao.li, paulus, vincenzo.frascino, frowand.list,
	sashal, masahiroy, jmorris, allison, serge, devicetree,
	pasha.tatashin, prsriva, hsinyi, linux-arm-kernel,
	christophe.leroy, mbrugger, balajib, dmitry.kasatkin,
	linux-kernel, james.morse, linux-integrity, linuxppc-dev
In-Reply-To: <20210221174930.27324-1-nramas@linux.microsoft.com>

ELF related fields elf_headers, elf_headers_sz, and elf_load_addr are
defined in architecture specific 'struct kimage_arch' for x86, powerpc,
and arm64.  The name of these fields are different in these
architectures that makes it hard to have a common code for setting up
the device tree for kexec system call.

Move the ELF fields to 'struct kimage' defined in include/linux/kexec.h
so common code can use it.

Signed-off-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
Suggested-by: Rob Herring <robh@kernel.org>
Fixes: 33488dc4d61f ("of: Add a common kexec FDT setup function")
Reported-by: kernel test robot <lkp@intel.com>
---
 include/linux/kexec.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 5f61389f5f36..0208fe8f8e42 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -305,6 +305,11 @@ struct kimage {
 	/* Virtual address of IMA measurement buffer for kexec syscall */
 	void *ima_buffer;
 #endif
+
+	/* Core ELF header buffer */
+	void *elf_headers;
+	unsigned long elf_headers_sz;
+	unsigned long elf_load_addr;
 };
 
 /* kexec interface functions */
-- 
2.30.0


^ permalink raw reply related

* [PATCH v19 04/13] x86: Use ELF fields defined in 'struct kimage'
From: Lakshmi Ramasubramanian @ 2021-02-21 17:49 UTC (permalink / raw)
  To: zohar, bauerman, robh, takahiro.akashi, gregkh, will, joe,
	catalin.marinas, mpe, sfr
  Cc: mark.rutland, tao.li, paulus, vincenzo.frascino, frowand.list,
	sashal, masahiroy, jmorris, allison, serge, devicetree,
	pasha.tatashin, prsriva, hsinyi, linux-arm-kernel,
	christophe.leroy, mbrugger, balajib, dmitry.kasatkin,
	linux-kernel, james.morse, linux-integrity, linuxppc-dev
In-Reply-To: <20210221174930.27324-1-nramas@linux.microsoft.com>

ELF related fields elf_headers, elf_headers_sz, and elf_load_addr
have been moved from 'struct kimage_arch' to 'struct kimage'.

Use the ELF fields defined in 'struct kimage'.

Signed-off-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
Suggested-by: Rob Herring <robh@kernel.org>
Fixes: 33488dc4d61f ("of: Add a common kexec FDT setup function")
Reported-by: kernel test robot <lkp@intel.com>
---
 arch/x86/include/asm/kexec.h       |  5 -----
 arch/x86/kernel/crash.c            | 14 +++++++-------
 arch/x86/kernel/kexec-bzimage64.c  |  2 +-
 arch/x86/kernel/machine_kexec_64.c |  4 ++--
 4 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 6802c59e8252..0a6e34b07017 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -150,11 +150,6 @@ struct kimage_arch {
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
-
-	/* Core ELF header buffer */
-	void *elf_headers;
-	unsigned long elf_headers_sz;
-	unsigned long elf_load_addr;
 };
 #endif /* CONFIG_X86_32 */
 
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index a8f3af257e26..9d0722fb8842 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -323,8 +323,8 @@ static int memmap_exclude_ranges(struct kimage *image, struct crash_mem *cmem,
 	cmem->nr_ranges = 1;
 
 	/* Exclude elf header region */
-	start = image->arch.elf_load_addr;
-	end = start + image->arch.elf_headers_sz - 1;
+	start = image->elf_load_addr;
+	end = start + image->elf_headers_sz - 1;
 	return crash_exclude_mem_range(cmem, start, end);
 }
 
@@ -407,20 +407,20 @@ int crash_load_segments(struct kimage *image)
 	if (ret)
 		return ret;
 
-	image->arch.elf_headers = kbuf.buffer;
-	image->arch.elf_headers_sz = kbuf.bufsz;
+	image->elf_headers = kbuf.buffer;
+	image->elf_headers_sz = kbuf.bufsz;
 
 	kbuf.memsz = kbuf.bufsz;
 	kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
 	kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
 	ret = kexec_add_buffer(&kbuf);
 	if (ret) {
-		vfree((void *)image->arch.elf_headers);
+		vfree((void *)image->elf_headers);
 		return ret;
 	}
-	image->arch.elf_load_addr = kbuf.mem;
+	image->elf_load_addr = kbuf.mem;
 	pr_debug("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
-		 image->arch.elf_load_addr, kbuf.bufsz, kbuf.bufsz);
+		 image->elf_load_addr, kbuf.bufsz, kbuf.bufsz);
 
 	return ret;
 }
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index ce831f9448e7..170d0fd68b1f 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -75,7 +75,7 @@ static int setup_cmdline(struct kimage *image, struct boot_params *params,
 
 	if (image->type == KEXEC_TYPE_CRASH) {
 		len = sprintf(cmdline_ptr,
-			"elfcorehdr=0x%lx ", image->arch.elf_load_addr);
+			"elfcorehdr=0x%lx ", image->elf_load_addr);
 	}
 	memcpy(cmdline_ptr + len, cmdline, cmdline_len);
 	cmdline_len += len;
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index a29a44a98e5b..055c18a6f7bf 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -402,8 +402,8 @@ void machine_kexec(struct kimage *image)
 #ifdef CONFIG_KEXEC_FILE
 void *arch_kexec_kernel_image_load(struct kimage *image)
 {
-	vfree(image->arch.elf_headers);
-	image->arch.elf_headers = NULL;
+	vfree(image->elf_headers);
+	image->elf_headers = NULL;
 
 	if (!image->fops || !image->fops->load)
 		return ERR_PTR(-ENOEXEC);
-- 
2.30.0


^ permalink raw reply related

* Re: [PATCH 1/2] arch: syscalls: add missing FORCE and fix 'targets' to make if_changed work
From: Masahiro Yamada @ 2021-02-21 17:14 UTC (permalink / raw)
  To: Linux Kbuild mailing list
  Cc: Rich Felker, linux-ia64, Linux-sh list, linux-mips,
	James E.J. Bottomley, Max Filippov, Paul Mackerras,
	H. Peter Anvin, sparclinux, Yoshinori Sato, Helge Deller, X86 ML,
	Ingo Molnar, Geert Uytterhoeven, Matt Turner, Fenghua Yu,
	open list:TENSILICA XTENSA PORT (xtensa), linux-m68k,
	Thomas Bogendoerfer, Ivan Kokshaysky, Andy Lutomirski,
	Thomas Gleixner, Richard Henderson, Chris Zankel, Michal Simek,
	Tony Luck, linux-parisc, Linux Kernel Mailing List, linux-alpha,
	Borislav Petkov, linuxppc-dev, David S. Miller
In-Reply-To: <20210215004823.440102-1-masahiroy@kernel.org>

On Mon, Feb 15, 2021 at 9:50 AM Masahiro Yamada <masahiroy@kernel.org> wrote:
>
> The rules in these Makefiles cannot detect the command line change
> because the prerequisite 'FORCE' is missing.
>
> Adding 'FORCE' will result in the headers being rebuilt every time
> because the 'targets' additions are also wrong; the file paths in
> 'targets' must be relative to the current Makefile.
>
> Fix all of them so the if_changed rules work correctly.
>
> Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
> Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>


Both applied to linux-kbuild.


-- 
Best Regards
Masahiro Yamada

^ permalink raw reply

* [PATCH v6 06/10] powerpc/signal64: Replace setup_sigcontext() w/ unsafe_setup_sigcontext()
From: Christopher M. Riedl @ 2021-02-21  1:23 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <20210221012401.22328-1-cmr@codefail.de>

Previously setup_sigcontext() performed a costly KUAP switch on every
uaccess operation. These repeated uaccess switches cause a significant
drop in signal handling performance.

Rewrite setup_sigcontext() to assume that a userspace write access window
is open by replacing all uaccess functions with their 'unsafe' versions.
Modify the callers to first open, call unsafe_setup_sigcontext() and
then close the uaccess window.

Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
 arch/powerpc/kernel/signal_64.c | 71 ++++++++++++++++++++-------------
 1 file changed, 44 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index bd8d210c9115..3faaa736ed62 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -101,9 +101,13 @@ static void prepare_setup_sigcontext(struct task_struct *tsk)
  * Set up the sigcontext for the signal frame.
  */
 
-static long setup_sigcontext(struct sigcontext __user *sc,
-		struct task_struct *tsk, int signr, sigset_t *set,
-		unsigned long handler, int ctx_has_vsx_region)
+#define unsafe_setup_sigcontext(sc, tsk, signr, set, handler,		\
+				ctx_has_vsx_region, e)			\
+	unsafe_op_wrap(__unsafe_setup_sigcontext(sc, tsk, signr, set,	\
+			handler, ctx_has_vsx_region), e)
+static long notrace __unsafe_setup_sigcontext(struct sigcontext __user *sc,
+					struct task_struct *tsk, int signr, sigset_t *set,
+					unsigned long handler, int ctx_has_vsx_region)
 {
 	/* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the
 	 * process never used altivec yet (MSR_VEC is zero in pt_regs of
@@ -118,20 +122,19 @@ static long setup_sigcontext(struct sigcontext __user *sc,
 #endif
 	struct pt_regs *regs = tsk->thread.regs;
 	unsigned long msr = regs->msr;
-	long err = 0;
 	/* Force usr to alway see softe as 1 (interrupts enabled) */
 	unsigned long softe = 0x1;
 
 	BUG_ON(tsk != current);
 
 #ifdef CONFIG_ALTIVEC
-	err |= __put_user(v_regs, &sc->v_regs);
+	unsafe_put_user(v_regs, &sc->v_regs, efault_out);
 
 	/* save altivec registers */
 	if (tsk->thread.used_vr) {
 		/* Copy 33 vec registers (vr0..31 and vscr) to the stack */
-		err |= __copy_to_user(v_regs, &tsk->thread.vr_state,
-				      33 * sizeof(vector128));
+		unsafe_copy_to_user(v_regs, &tsk->thread.vr_state,
+				    33 * sizeof(vector128), efault_out);
 		/* set MSR_VEC in the MSR value in the frame to indicate that sc->v_reg)
 		 * contains valid data.
 		 */
@@ -140,12 +143,12 @@ static long setup_sigcontext(struct sigcontext __user *sc,
 	/* We always copy to/from vrsave, it's 0 if we don't have or don't
 	 * use altivec.
 	 */
-	err |= __put_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33]);
+	unsafe_put_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33], efault_out);
 #else /* CONFIG_ALTIVEC */
-	err |= __put_user(0, &sc->v_regs);
+	unsafe_put_user(0, &sc->v_regs, efault_out);
 #endif /* CONFIG_ALTIVEC */
 	/* copy fpr regs and fpscr */
-	err |= copy_fpr_to_user(&sc->fp_regs, tsk);
+	unsafe_copy_fpr_to_user(&sc->fp_regs, tsk, efault_out);
 
 	/*
 	 * Clear the MSR VSX bit to indicate there is no valid state attached
@@ -160,24 +163,27 @@ static long setup_sigcontext(struct sigcontext __user *sc,
 	 */
 	if (tsk->thread.used_vsr && ctx_has_vsx_region) {
 		v_regs += ELF_NVRREG;
-		err |= copy_vsx_to_user(v_regs, tsk);
+		unsafe_copy_vsx_to_user(v_regs, tsk, efault_out);
 		/* set MSR_VSX in the MSR value in the frame to
 		 * indicate that sc->vs_reg) contains valid data.
 		 */
 		msr |= MSR_VSX;
 	}
 #endif /* CONFIG_VSX */
-	err |= __put_user(&sc->gp_regs, &sc->regs);
+	unsafe_put_user(&sc->gp_regs, &sc->regs, efault_out);
 	WARN_ON(!FULL_REGS(regs));
-	err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE);
-	err |= __put_user(msr, &sc->gp_regs[PT_MSR]);
-	err |= __put_user(softe, &sc->gp_regs[PT_SOFTE]);
-	err |= __put_user(signr, &sc->signal);
-	err |= __put_user(handler, &sc->handler);
+	unsafe_copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE, efault_out);
+	unsafe_put_user(msr, &sc->gp_regs[PT_MSR], efault_out);
+	unsafe_put_user(softe, &sc->gp_regs[PT_SOFTE], efault_out);
+	unsafe_put_user(signr, &sc->signal, efault_out);
+	unsafe_put_user(handler, &sc->handler, efault_out);
 	if (set != NULL)
-		err |=  __put_user(set->sig[0], &sc->oldmask);
+		unsafe_put_user(set->sig[0], &sc->oldmask, efault_out);
 
-	return err;
+	return 0;
+
+efault_out:
+	return -EFAULT;
 }
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -670,12 +676,15 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 
 	if (old_ctx != NULL) {
 		prepare_setup_sigcontext(current);
-		if (!access_ok(old_ctx, ctx_size)
-		    || setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL, 0,
-					ctx_has_vsx_region)
-		    || __copy_to_user(&old_ctx->uc_sigmask,
-				      &current->blocked, sizeof(sigset_t)))
+		if (!user_write_access_begin(old_ctx, ctx_size))
 			return -EFAULT;
+
+		unsafe_setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL,
+					0, ctx_has_vsx_region, efault_out);
+		unsafe_copy_to_user(&old_ctx->uc_sigmask, &current->blocked,
+				    sizeof(sigset_t), efault_out);
+
+		user_write_access_end();
 	}
 	if (new_ctx == NULL)
 		return 0;
@@ -704,6 +713,10 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 	/* This returns like rt_sigreturn */
 	set_thread_flag(TIF_RESTOREALL);
 	return 0;
+
+efault_out:
+	user_write_access_end();
+	return -EFAULT;
 }
 
 
@@ -854,9 +867,13 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 	} else {
 		err |= __put_user(0, &frame->uc.uc_link);
 		prepare_setup_sigcontext(tsk);
-		err |= setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
-					NULL, (unsigned long)ksig->ka.sa.sa_handler,
-					1);
+		if (!user_write_access_begin(&frame->uc.uc_mcontext,
+					     sizeof(frame->uc.uc_mcontext)))
+			return -EFAULT;
+		err |= __unsafe_setup_sigcontext(&frame->uc.uc_mcontext, tsk,
+						ksig->sig, NULL,
+						(unsigned long)ksig->ka.sa.sa_handler, 1);
+		user_write_access_end();
 	}
 	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 	if (err)
-- 
2.26.1


^ permalink raw reply related

* [PATCH v6 05/10] powerpc/signal64: Remove TM ifdefery in middle of if/else block
From: Christopher M. Riedl @ 2021-02-21  1:23 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <20210221012401.22328-1-cmr@codefail.de>

Both rt_sigreturn() and handle_rt_signal_64() contain TM-related ifdefs
which break-up an if/else block. Provide stubs for the ifdef-guarded TM
functions and remove the need for an ifdef in rt_sigreturn().

Rework the remaining TM ifdef in handle_rt_signal64() similar to
commit f1cf4f93de2f ("powerpc/signal32: Remove ifdefery in middle of if/else").

Unlike in the commit for ppc32, the ifdef can't be removed entirely
since uc_transact in sigframe depends on CONFIG_PPC_TRANSACTIONAL_MEM.

Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
 arch/powerpc/kernel/process.c   |   3 +-
 arch/powerpc/kernel/signal_64.c | 102 ++++++++++++++++----------------
 2 files changed, 54 insertions(+), 51 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 924d023dad0a..08c3fbe45921 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1117,9 +1117,10 @@ void restore_tm_state(struct pt_regs *regs)
 	regs->msr |= msr_diff;
 }
 
-#else
+#else /* !CONFIG_PPC_TRANSACTIONAL_MEM */
 #define tm_recheckpoint_new_task(new)
 #define __switch_to_tm(prev, new)
+void tm_reclaim_current(uint8_t cause) {}
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 
 static inline void save_sprs(struct thread_struct *t)
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 6ca546192cbf..bd8d210c9115 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -594,6 +594,12 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
 
 	return err;
 }
+#else /* !CONFIG_PPC_TRANSACTIONAL_MEM */
+static long restore_tm_sigcontexts(struct task_struct *tsk, struct sigcontext __user *sc,
+				   struct sigcontext __user *tm_sc)
+{
+	return -EINVAL;
+}
 #endif
 
 /*
@@ -710,9 +716,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
 	struct pt_regs *regs = current_pt_regs();
 	struct ucontext __user *uc = (struct ucontext __user *)regs->gpr[1];
 	sigset_t set;
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	unsigned long msr;
-#endif
 
 	/* Always make any pending restarted system calls return -EINTR */
 	current->restart_block.fn = do_no_restart_syscall;
@@ -724,48 +728,50 @@ SYSCALL_DEFINE0(rt_sigreturn)
 		goto badframe;
 	set_current_blocked(&set);
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	/*
-	 * If there is a transactional state then throw it away.
-	 * The purpose of a sigreturn is to destroy all traces of the
-	 * signal frame, this includes any transactional state created
-	 * within in. We only check for suspended as we can never be
-	 * active in the kernel, we are active, there is nothing better to
-	 * do than go ahead and Bad Thing later.
-	 * The cause is not important as there will never be a
-	 * recheckpoint so it's not user visible.
-	 */
-	if (MSR_TM_SUSPENDED(mfmsr()))
-		tm_reclaim_current(0);
+	if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM)) {
+		/*
+		 * If there is a transactional state then throw it away.
+		 * The purpose of a sigreturn is to destroy all traces of the
+		 * signal frame, this includes any transactional state created
+		 * within in. We only check for suspended as we can never be
+		 * active in the kernel, we are active, there is nothing better to
+		 * do than go ahead and Bad Thing later.
+		 * The cause is not important as there will never be a
+		 * recheckpoint so it's not user visible.
+		 */
+		if (MSR_TM_SUSPENDED(mfmsr()))
+			tm_reclaim_current(0);
 
-	/*
-	 * Disable MSR[TS] bit also, so, if there is an exception in the
-	 * code below (as a page fault in copy_ckvsx_to_user()), it does
-	 * not recheckpoint this task if there was a context switch inside
-	 * the exception.
-	 *
-	 * A major page fault can indirectly call schedule(). A reschedule
-	 * process in the middle of an exception can have a side effect
-	 * (Changing the CPU MSR[TS] state), since schedule() is called
-	 * with the CPU MSR[TS] disable and returns with MSR[TS]=Suspended
-	 * (switch_to() calls tm_recheckpoint() for the 'new' process). In
-	 * this case, the process continues to be the same in the CPU, but
-	 * the CPU state just changed.
-	 *
-	 * This can cause a TM Bad Thing, since the MSR in the stack will
-	 * have the MSR[TS]=0, and this is what will be used to RFID.
-	 *
-	 * Clearing MSR[TS] state here will avoid a recheckpoint if there
-	 * is any process reschedule in kernel space. The MSR[TS] state
-	 * does not need to be saved also, since it will be replaced with
-	 * the MSR[TS] that came from user context later, at
-	 * restore_tm_sigcontexts.
-	 */
-	regs->msr &= ~MSR_TS_MASK;
+		/*
+		 * Disable MSR[TS] bit also, so, if there is an exception in the
+		 * code below (as a page fault in copy_ckvsx_to_user()), it does
+		 * not recheckpoint this task if there was a context switch inside
+		 * the exception.
+		 *
+		 * A major page fault can indirectly call schedule(). A reschedule
+		 * process in the middle of an exception can have a side effect
+		 * (Changing the CPU MSR[TS] state), since schedule() is called
+		 * with the CPU MSR[TS] disable and returns with MSR[TS]=Suspended
+		 * (switch_to() calls tm_recheckpoint() for the 'new' process). In
+		 * this case, the process continues to be the same in the CPU, but
+		 * the CPU state just changed.
+		 *
+		 * This can cause a TM Bad Thing, since the MSR in the stack will
+		 * have the MSR[TS]=0, and this is what will be used to RFID.
+		 *
+		 * Clearing MSR[TS] state here will avoid a recheckpoint if there
+		 * is any process reschedule in kernel space. The MSR[TS] state
+		 * does not need to be saved also, since it will be replaced with
+		 * the MSR[TS] that came from user context later, at
+		 * restore_tm_sigcontexts.
+		 */
+		regs->msr &= ~MSR_TS_MASK;
 
-	if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
-		goto badframe;
-	if (MSR_TM_ACTIVE(msr)) {
+		if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
+			goto badframe;
+	}
+
+	if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && MSR_TM_ACTIVE(msr)) {
 		/* We recheckpoint on return. */
 		struct ucontext __user *uc_transact;
 
@@ -778,9 +784,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
 		if (restore_tm_sigcontexts(current, &uc->uc_mcontext,
 					   &uc_transact->uc_mcontext))
 			goto badframe;
-	} else
-#endif
-	{
+	} else {
 		/*
 		 * Fall through, for non-TM restore
 		 *
@@ -818,10 +822,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 	unsigned long newsp = 0;
 	long err = 0;
 	struct pt_regs *regs = tsk->thread.regs;
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	/* Save the thread's msr before get_tm_stackpointer() changes it */
 	unsigned long msr = regs->msr;
-#endif
 
 	frame = get_sigframe(ksig, tsk, sizeof(*frame), 0);
 	if (!access_ok(frame, sizeof(*frame)))
@@ -836,8 +838,9 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 	/* Create the ucontext.  */
 	err |= __put_user(0, &frame->uc.uc_flags);
 	err |= __save_altstack(&frame->uc.uc_stack, regs->gpr[1]);
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+
 	if (MSR_TM_ACTIVE(msr)) {
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 		/* The ucontext_t passed to userland points to the second
 		 * ucontext_t (for transactional state) with its uc_link ptr.
 		 */
@@ -847,9 +850,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 					    tsk, ksig->sig, NULL,
 					    (unsigned long)ksig->ka.sa.sa_handler,
 					    msr);
-	} else
 #endif
-	{
+	} else {
 		err |= __put_user(0, &frame->uc.uc_link);
 		prepare_setup_sigcontext(tsk);
 		err |= setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
-- 
2.26.1


^ permalink raw reply related

* [PATCH v6 00/10] Improve signal performance on PPC64 with KUAP
From: Christopher M. Riedl @ 2021-02-21  1:23 UTC (permalink / raw)
  To: linuxppc-dev

As reported by Anton, there is a large penalty to signal handling
performance on radix systems using KUAP. The signal handling code
performs many user access operations, each of which needs to switch the
KUAP permissions bit to open and then close user access. This involves a
costly 'mtspr' operation [0].

There is existing work done on x86 and by Christophe Leroy for PPC32 to
instead open up user access in "blocks" using user_*_access_{begin,end}.
We can do the same in PPC64 to bring performance back up on KUAP-enabled
radix and now also hash MMU systems [1].

Hash MMU KUAP support along with uaccess flush has landed in linuxppc/next
since the last revision. This series also provides a large benefit on hash
with KUAP. However, in the hash implementation of KUAP the user AMR is
always restored during system_call_exception() which cannot be avoided.
Fewer user access switches naturally also result in less uaccess flushing.

The first two patches add some needed 'unsafe' versions of copy-from
functions. While these do not make use of asm-goto they still allow for
avoiding the repeated uaccess switches.

The third patch moves functions called by setup_sigcontext() into a new
prepare_setup_sigcontext() to simplify converting setup_sigcontext()
into an 'unsafe' version which assumes an open uaccess window later.

The fourth and fifths patches clean-up some of the Transactional Memory
ifdef stuff to simplify using uaccess blocks later.

The next two patches rewrite some of the signal64 helper functions to
be 'unsafe'. Finally, the last three patches update the main signal
handling functions to make use of the new 'unsafe' helpers and eliminate
some additional uaccess switching.

I used the will-it-scale signal1 benchmark to measure and compare
performance [2]. The below results are from running a minimal
kernel+initramfs QEMU/KVM guest on a POWER9 Blackbird:

	signal1_threads -t1 -s10

	|                              | hash   | radix  |
	| ---------------------------- | ------ | ------ |
	| linuxppc/next                | 117288 | 135555 |
	| linuxppc/next w/o KUAP+KUEP  | 224759 | 227886 |
	| unsafe-signal64              | 197378 | 233528 |

[0]: https://github.com/linuxppc/issues/issues/277
[1]: https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=196278
[2]: https://github.com/antonblanchard/will-it-scale/blob/master/tests/signal1.c

v6:	* Rebase on latest linuxppc/next and address feedback comments from
	  Daniel Axtens and friends (also pick up some Reviewed-by tags)
	* Simplify __get_user_sigset(), fix sparse warnings, and use it
	  in ppc32 signal handling
	* Remove ctx_has_vsx_region arg to prepare_setup_sigcontext()
	* Remove local buffer in copy_{fpr,vsx}_from_user()
	* Rework the TM ifdefery-removal and remove one of the ifdef
	  pairs altogether

v5:	* Use sizeof(buf) in copy_{vsx,fpr}_from_user() (Thanks David Laight)
	* Rebase on latest linuxppc/next

v4:	* Fix issues identified by Christophe Leroy (Thanks for review)
	* Use __get_user() directly to copy the 8B sigset_t

v3:	* Rebase on latest linuxppc/next
	* Reword confusing commit messages
	* Add missing comma in macro in signal.h which broke compiles without
	  CONFIG_ALTIVEC
	* Validate hash KUAP signal performance improvements

v2:	* Rebase on latest linuxppc/next + Christophe Leroy's PPC32
	  signal series
	* Simplify/remove TM ifdefery similar to PPC32 series and clean
	  up the uaccess begin/end calls
	* Isolate non-inline functions so they are not called when
	  uaccess window is open

Christopher M. Riedl (8):
  powerpc/uaccess: Add unsafe_copy_from_user
  powerpc/signal: Add unsafe_copy_{vsx,fpr}_from_user()
  powerpc/signal64: Remove non-inline calls from setup_sigcontext()
  powerpc: Reference parameter in MSR_TM_ACTIVE() macro
  powerpc/signal64: Remove TM ifdefery in middle of if/else block
  powerpc/signal64: Replace setup_sigcontext() w/
    unsafe_setup_sigcontext()
  powerpc/signal64: Replace restore_sigcontext() w/
    unsafe_restore_sigcontext()
  powerpc/signal: Use __get_user() to copy sigset_t

Daniel Axtens (2):
  powerpc/signal64: Rewrite handle_rt_signal64() to minimise uaccess
    switches
  powerpc/signal64: Rewrite rt_sigreturn() to minimise uaccess switches

 arch/powerpc/include/asm/reg.h     |   2 +-
 arch/powerpc/include/asm/uaccess.h |   3 +
 arch/powerpc/kernel/process.c      |   3 +-
 arch/powerpc/kernel/signal.h       |  33 +++
 arch/powerpc/kernel/signal_32.c    |   2 +-
 arch/powerpc/kernel/signal_64.c    | 315 +++++++++++++++++------------
 6 files changed, 227 insertions(+), 131 deletions(-)

-- 
2.26.1


^ permalink raw reply

* [PATCH v6 08/10] powerpc/signal64: Rewrite handle_rt_signal64() to minimise uaccess switches
From: Christopher M. Riedl @ 2021-02-21  1:23 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Daniel Axtens
In-Reply-To: <20210221012401.22328-1-cmr@codefail.de>

From: Daniel Axtens <dja@axtens.net>

Add uaccess blocks and use the 'unsafe' versions of functions doing user
access where possible to reduce the number of times uaccess has to be
opened/closed.

There is no 'unsafe' version of copy_siginfo_to_user, so move it
slightly to allow for a "longer" uaccess block.

Signed-off-by: Daniel Axtens <dja@axtens.net>
Co-developed-by: Christopher M. Riedl <cmr@codefail.de>
Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
 arch/powerpc/kernel/signal_64.c | 56 ++++++++++++++++++++-------------
 1 file changed, 35 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 76b525261f61..4bf73731533f 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -853,45 +853,52 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 	unsigned long msr = regs->msr;
 
 	frame = get_sigframe(ksig, tsk, sizeof(*frame), 0);
-	if (!access_ok(frame, sizeof(*frame)))
-		goto badframe;
 
-	err |= __put_user(&frame->info, &frame->pinfo);
-	err |= __put_user(&frame->uc, &frame->puc);
-	err |= copy_siginfo_to_user(&frame->info, &ksig->info);
-	if (err)
+	/* This only applies when calling unsafe_setup_sigcontext() and must be
+	 * called before opening the uaccess window.
+	 */
+	if (!MSR_TM_ACTIVE(msr))
+		prepare_setup_sigcontext(tsk);
+
+	if (!user_write_access_begin(frame, sizeof(*frame)))
 		goto badframe;
 
+	unsafe_put_user(&frame->info, &frame->pinfo, badframe_block);
+	unsafe_put_user(&frame->uc, &frame->puc, badframe_block);
+
 	/* Create the ucontext.  */
-	err |= __put_user(0, &frame->uc.uc_flags);
-	err |= __save_altstack(&frame->uc.uc_stack, regs->gpr[1]);
+	unsafe_put_user(0, &frame->uc.uc_flags, badframe_block);
+	unsafe_save_altstack(&frame->uc.uc_stack, regs->gpr[1], badframe_block);
 
 	if (MSR_TM_ACTIVE(msr)) {
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 		/* The ucontext_t passed to userland points to the second
 		 * ucontext_t (for transactional state) with its uc_link ptr.
 		 */
-		err |= __put_user(&frame->uc_transact, &frame->uc.uc_link);
+		unsafe_put_user(&frame->uc_transact, &frame->uc.uc_link, badframe_block);
+
+		user_write_access_end();
+
 		err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext,
 					    &frame->uc_transact.uc_mcontext,
 					    tsk, ksig->sig, NULL,
 					    (unsigned long)ksig->ka.sa.sa_handler,
 					    msr);
+
+		if (!user_write_access_begin(&frame->uc.uc_sigmask,
+					     sizeof(frame->uc.uc_sigmask)))
+			goto badframe;
+
 #endif
 	} else {
-		err |= __put_user(0, &frame->uc.uc_link);
-		prepare_setup_sigcontext(tsk);
-		if (!user_write_access_begin(&frame->uc.uc_mcontext,
-					     sizeof(frame->uc.uc_mcontext)))
-			return -EFAULT;
-		err |= __unsafe_setup_sigcontext(&frame->uc.uc_mcontext, tsk,
-						ksig->sig, NULL,
-						(unsigned long)ksig->ka.sa.sa_handler, 1);
-		user_write_access_end();
+		unsafe_put_user(0, &frame->uc.uc_link, badframe_block);
+		unsafe_setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
+					NULL, (unsigned long)ksig->ka.sa.sa_handler,
+					1, badframe_block);
 	}
-	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
-	if (err)
-		goto badframe;
+
+	unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe_block);
+	user_write_access_end();
 
 	/* Make sure signal handler doesn't get spurious FP exceptions */
 	tsk->thread.fp_state.fpscr = 0;
@@ -906,6 +913,11 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 		regs->nip = (unsigned long) &frame->tramp[0];
 	}
 
+
+	/* Save the siginfo outside of the unsafe block. */
+	if (copy_siginfo_to_user(&frame->info, &ksig->info))
+		goto badframe;
+
 	/* Allocate a dummy caller frame for the signal handler. */
 	newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
 	err |= put_user(regs->gpr[1], (unsigned long __user *)newsp);
@@ -945,6 +957,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 
 	return 0;
 
+badframe_block:
+	user_write_access_end();
 badframe:
 	signal_fault(current, regs, "handle_rt_signal64", frame);
 
-- 
2.26.1


^ permalink raw reply related

* [PATCH v6 10/10] powerpc/signal: Use __get_user() to copy sigset_t
From: Christopher M. Riedl @ 2021-02-21  1:24 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <20210221012401.22328-1-cmr@codefail.de>

Usually sigset_t is exactly 8B which is a "trivial" size and does not
warrant using __copy_from_user(). Use __get_user() directly in
anticipation of future work to remove the trivial size optimizations
from __copy_from_user().

The ppc32 implementation of get_sigset_t() previously called
copy_from_user() which, unlike __copy_from_user(), calls access_ok().
Replacing this w/ __get_user() (no access_ok()) is fine here since both
callsites in signal_32.c are preceded by an earlier access_ok().

Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
 arch/powerpc/kernel/signal.h    | 7 +++++++
 arch/powerpc/kernel/signal_32.c | 2 +-
 arch/powerpc/kernel/signal_64.c | 4 ++--
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index d8dd76b1dc94..1393876f3814 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -19,6 +19,13 @@ extern int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
 extern int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
 			      struct task_struct *tsk);
 
+static inline int __get_user_sigset(sigset_t *dst, const sigset_t __user *src)
+{
+	BUILD_BUG_ON(sizeof(sigset_t) != sizeof(u64));
+
+	return __get_user(dst->sig[0], (u64 __user *)&src->sig[0]);
+}
+
 #ifdef CONFIG_VSX
 extern unsigned long copy_vsx_to_user(void __user *to,
 				      struct task_struct *task);
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 75ee918a120a..c505b444a613 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -144,7 +144,7 @@ static inline int restore_general_regs(struct pt_regs *regs,
 
 static inline int get_sigset_t(sigset_t *set, const sigset_t __user *uset)
 {
-	return copy_from_user(set, uset, sizeof(*uset));
+	return __get_user_sigset(set, uset);
 }
 
 #define to_user_ptr(p)		((unsigned long)(p))
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 3dd89f99e26f..dc5bac727a62 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -707,7 +707,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 	 * We kill the task with a SIGSEGV in this situation.
 	 */
 
-	if (__copy_from_user(&set, &new_ctx->uc_sigmask, sizeof(set)))
+	if (__get_user_sigset(&set, &new_ctx->uc_sigmask))
 		do_exit(SIGSEGV);
 	set_current_blocked(&set);
 
@@ -746,7 +746,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
 	if (!access_ok(uc, sizeof(*uc)))
 		goto badframe;
 
-	if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
+	if (__get_user_sigset(&set, &uc->uc_sigmask))
 		goto badframe;
 	set_current_blocked(&set);
 
-- 
2.26.1


^ permalink raw reply related

* [PATCH v6 09/10] powerpc/signal64: Rewrite rt_sigreturn() to minimise uaccess switches
From: Christopher M. Riedl @ 2021-02-21  1:24 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Daniel Axtens
In-Reply-To: <20210221012401.22328-1-cmr@codefail.de>

From: Daniel Axtens <dja@axtens.net>

Add uaccess blocks and use the 'unsafe' versions of functions doing user
access where possible to reduce the number of times uaccess has to be
opened/closed.

Signed-off-by: Daniel Axtens <dja@axtens.net>
Co-developed-by: Christopher M. Riedl <cmr@codefail.de>
Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
 arch/powerpc/kernel/signal_64.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 4bf73731533f..3dd89f99e26f 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -821,11 +821,11 @@ SYSCALL_DEFINE0(rt_sigreturn)
 		 */
 		current->thread.regs->msr &= ~MSR_TS_MASK;
 		if (!user_read_access_begin(&uc->uc_mcontext, sizeof(uc->uc_mcontext)))
-			return -EFAULT;
-		if (__unsafe_restore_sigcontext(current, NULL, 1, &uc->uc_mcontext)) {
-			user_read_access_end();
 			goto badframe;
-		}
+
+		unsafe_restore_sigcontext(current, NULL, 1, &uc->uc_mcontext,
+					  badframe_block);
+
 		user_read_access_end();
 	}
 
@@ -835,6 +835,8 @@ SYSCALL_DEFINE0(rt_sigreturn)
 	set_thread_flag(TIF_RESTOREALL);
 	return 0;
 
+badframe_block:
+	user_read_access_end();
 badframe:
 	signal_fault(current, regs, "rt_sigreturn", uc);
 
-- 
2.26.1


^ permalink raw reply related

* [PATCH v6 07/10] powerpc/signal64: Replace restore_sigcontext() w/ unsafe_restore_sigcontext()
From: Christopher M. Riedl @ 2021-02-21  1:23 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <20210221012401.22328-1-cmr@codefail.de>

Previously restore_sigcontext() performed a costly KUAP switch on every
uaccess operation. These repeated uaccess switches cause a significant
drop in signal handling performance.

Rewrite restore_sigcontext() to assume that a userspace read access
window is open by replacing all uaccess functions with their 'unsafe'
versions. Modify the callers to first open, call
unsafe_restore_sigcontext(), and then close the uaccess window.

Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
 arch/powerpc/kernel/signal_64.c | 68 ++++++++++++++++++++-------------
 1 file changed, 41 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 3faaa736ed62..76b525261f61 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -326,14 +326,14 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
 /*
  * Restore the sigcontext from the signal frame.
  */
-
-static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig,
-			      struct sigcontext __user *sc)
+#define unsafe_restore_sigcontext(tsk, set, sig, sc, e) \
+	unsafe_op_wrap(__unsafe_restore_sigcontext(tsk, set, sig, sc), e)
+static long notrace __unsafe_restore_sigcontext(struct task_struct *tsk, sigset_t *set,
+						int sig, struct sigcontext __user *sc)
 {
 #ifdef CONFIG_ALTIVEC
 	elf_vrreg_t __user *v_regs;
 #endif
-	unsigned long err = 0;
 	unsigned long save_r13 = 0;
 	unsigned long msr;
 	struct pt_regs *regs = tsk->thread.regs;
@@ -348,27 +348,28 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig,
 		save_r13 = regs->gpr[13];
 
 	/* copy the GPRs */
-	err |= __copy_from_user(regs->gpr, sc->gp_regs, sizeof(regs->gpr));
-	err |= __get_user(regs->nip, &sc->gp_regs[PT_NIP]);
+	unsafe_copy_from_user(regs->gpr, sc->gp_regs, sizeof(regs->gpr),
+			      efault_out);
+	unsafe_get_user(regs->nip, &sc->gp_regs[PT_NIP], efault_out);
 	/* get MSR separately, transfer the LE bit if doing signal return */
-	err |= __get_user(msr, &sc->gp_regs[PT_MSR]);
+	unsafe_get_user(msr, &sc->gp_regs[PT_MSR], efault_out);
 	if (sig)
 		regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
-	err |= __get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3]);
-	err |= __get_user(regs->ctr, &sc->gp_regs[PT_CTR]);
-	err |= __get_user(regs->link, &sc->gp_regs[PT_LNK]);
-	err |= __get_user(regs->xer, &sc->gp_regs[PT_XER]);
-	err |= __get_user(regs->ccr, &sc->gp_regs[PT_CCR]);
+	unsafe_get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3], efault_out);
+	unsafe_get_user(regs->ctr, &sc->gp_regs[PT_CTR], efault_out);
+	unsafe_get_user(regs->link, &sc->gp_regs[PT_LNK], efault_out);
+	unsafe_get_user(regs->xer, &sc->gp_regs[PT_XER], efault_out);
+	unsafe_get_user(regs->ccr, &sc->gp_regs[PT_CCR], efault_out);
 	/* Don't allow userspace to set SOFTE */
 	set_trap_norestart(regs);
-	err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]);
-	err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]);
-	err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]);
+	unsafe_get_user(regs->dar, &sc->gp_regs[PT_DAR], efault_out);
+	unsafe_get_user(regs->dsisr, &sc->gp_regs[PT_DSISR], efault_out);
+	unsafe_get_user(regs->result, &sc->gp_regs[PT_RESULT], efault_out);
 
 	if (!sig)
 		regs->gpr[13] = save_r13;
 	if (set != NULL)
-		err |=  __get_user(set->sig[0], &sc->oldmask);
+		unsafe_get_user(set->sig[0], &sc->oldmask, efault_out);
 
 	/*
 	 * Force reload of FP/VEC.
@@ -378,29 +379,28 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig,
 	regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX);
 
 #ifdef CONFIG_ALTIVEC
-	err |= __get_user(v_regs, &sc->v_regs);
-	if (err)
-		return err;
+	unsafe_get_user(v_regs, &sc->v_regs, efault_out);
 	if (v_regs && !access_ok(v_regs, 34 * sizeof(vector128)))
 		return -EFAULT;
 	/* Copy 33 vec registers (vr0..31 and vscr) from the stack */
 	if (v_regs != NULL && (msr & MSR_VEC) != 0) {
-		err |= __copy_from_user(&tsk->thread.vr_state, v_regs,
-					33 * sizeof(vector128));
+		unsafe_copy_from_user(&tsk->thread.vr_state, v_regs,
+				      33 * sizeof(vector128), efault_out);
 		tsk->thread.used_vr = true;
 	} else if (tsk->thread.used_vr) {
 		memset(&tsk->thread.vr_state, 0, 33 * sizeof(vector128));
 	}
 	/* Always get VRSAVE back */
 	if (v_regs != NULL)
-		err |= __get_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33]);
+		unsafe_get_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33],
+				efault_out);
 	else
 		tsk->thread.vrsave = 0;
 	if (cpu_has_feature(CPU_FTR_ALTIVEC))
 		mtspr(SPRN_VRSAVE, tsk->thread.vrsave);
 #endif /* CONFIG_ALTIVEC */
 	/* restore floating point */
-	err |= copy_fpr_from_user(tsk, &sc->fp_regs);
+	unsafe_copy_fpr_from_user(tsk, &sc->fp_regs, efault_out);
 #ifdef CONFIG_VSX
 	/*
 	 * Get additional VSX data. Update v_regs to point after the
@@ -409,14 +409,17 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig,
 	 */
 	v_regs += ELF_NVRREG;
 	if ((msr & MSR_VSX) != 0) {
-		err |= copy_vsx_from_user(tsk, v_regs);
+		unsafe_copy_vsx_from_user(tsk, v_regs, efault_out);
 		tsk->thread.used_vsr = true;
 	} else {
 		for (i = 0; i < 32 ; i++)
 			tsk->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
 	}
 #endif
-	return err;
+	return 0;
+
+efault_out:
+	return -EFAULT;
 }
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -707,8 +710,14 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 	if (__copy_from_user(&set, &new_ctx->uc_sigmask, sizeof(set)))
 		do_exit(SIGSEGV);
 	set_current_blocked(&set);
-	if (restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext))
+
+	if (!user_read_access_begin(new_ctx, ctx_size))
+		return -EFAULT;
+	if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) {
+		user_read_access_end();
 		do_exit(SIGSEGV);
+	}
+	user_read_access_end();
 
 	/* This returns like rt_sigreturn */
 	set_thread_flag(TIF_RESTOREALL);
@@ -811,8 +820,13 @@ SYSCALL_DEFINE0(rt_sigreturn)
 		 * causing a TM bad thing.
 		 */
 		current->thread.regs->msr &= ~MSR_TS_MASK;
-		if (restore_sigcontext(current, NULL, 1, &uc->uc_mcontext))
+		if (!user_read_access_begin(&uc->uc_mcontext, sizeof(uc->uc_mcontext)))
+			return -EFAULT;
+		if (__unsafe_restore_sigcontext(current, NULL, 1, &uc->uc_mcontext)) {
+			user_read_access_end();
 			goto badframe;
+		}
+		user_read_access_end();
 	}
 
 	if (restore_altstack(&uc->uc_stack))
-- 
2.26.1


^ permalink raw reply related

* [PATCH v6 02/10] powerpc/signal: Add unsafe_copy_{vsx, fpr}_from_user()
From: Christopher M. Riedl @ 2021-02-21  1:23 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Daniel Axtens
In-Reply-To: <20210221012401.22328-1-cmr@codefail.de>

Reuse the "safe" implementation from signal.c but call unsafe_get_user()
directly in a loop to avoid the intermediate copy into a local buffer.

Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
Reviewed-by: Daniel Axtens <dja@axtens.net>
---
 arch/powerpc/kernel/signal.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index 2559a681536e..d8dd76b1dc94 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -53,6 +53,26 @@ unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from);
 				&buf[i], label);\
 } while (0)
 
+#define unsafe_copy_fpr_from_user(task, from, label)	do {		\
+	struct task_struct *__t = task;					\
+	u64 __user *buf = (u64 __user *)from;				\
+	int i;								\
+									\
+	for (i = 0; i < ELF_NFPREG - 1; i++)				\
+		unsafe_get_user(__t->thread.TS_FPR(i), &buf[i], label); \
+	unsafe_get_user(__t->thread.fp_state.fpscr, &buf[i], label);	\
+} while (0)
+
+#define unsafe_copy_vsx_from_user(task, from, label)	do {		\
+	struct task_struct *__t = task;					\
+	u64 __user *buf = (u64 __user *)from;				\
+	int i;								\
+									\
+	for (i = 0; i < ELF_NVSRHALFREG ; i++)				\
+		unsafe_get_user(__t->thread.fp_state.fpr[i][TS_VSRLOWOFFSET], \
+				&buf[i], label);			\
+} while (0)
+
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 #define unsafe_copy_ckfpr_to_user(to, task, label)	do {		\
 	struct task_struct *__t = task;					\
@@ -80,6 +100,10 @@ unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from);
 	unsafe_copy_to_user(to, (task)->thread.fp_state.fpr,	\
 			    ELF_NFPREG * sizeof(double), label)
 
+#define unsafe_copy_fpr_from_user(task, from, label)			\
+	unsafe_copy_from_user((task)->thread.fp_state.fpr, from,	\
+			    ELF_NFPREG * sizeof(double), label)
+
 static inline unsigned long
 copy_fpr_to_user(void __user *to, struct task_struct *task)
 {
@@ -115,6 +139,8 @@ copy_ckfpr_from_user(struct task_struct *task, void __user *from)
 #else
 #define unsafe_copy_fpr_to_user(to, task, label) do { } while (0)
 
+#define unsafe_copy_fpr_from_user(task, from, label) do { } while (0)
+
 static inline unsigned long
 copy_fpr_to_user(void __user *to, struct task_struct *task)
 {
-- 
2.26.1


^ permalink raw reply related

* [PATCH v6 03/10] powerpc/signal64: Remove non-inline calls from setup_sigcontext()
From: Christopher M. Riedl @ 2021-02-21  1:23 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <20210221012401.22328-1-cmr@codefail.de>

The majority of setup_sigcontext() can be refactored to execute in an
"unsafe" context assuming an open uaccess window except for some
non-inline function calls. Move these out into a separate
prepare_setup_sigcontext() function which must be called first and
before opening up a uaccess window. Non-inline function calls should be
avoided during a uaccess window for a few reasons:

	- KUAP should be enabled for as much kernel code as possible.
	  Opening a uaccess window disables KUAP which means any code
	  executed during this time contributes to a potential attack
	  surface.

	- Non-inline functions default to traceable which means they are
	  instrumented for ftrace. This adds more code which could run
	  with KUAP disabled.

	- Powerpc does not currently support the objtool UACCESS checks.
	  All code running with uaccess must be audited manually which
	  means: less code -> less work -> fewer problems (in theory).

A follow-up commit converts setup_sigcontext() to be "unsafe".

Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
 arch/powerpc/kernel/signal_64.c | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index f9e4a1ac440f..6ca546192cbf 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -79,6 +79,24 @@ static elf_vrreg_t __user *sigcontext_vmx_regs(struct sigcontext __user *sc)
 }
 #endif
 
+static void prepare_setup_sigcontext(struct task_struct *tsk)
+{
+#ifdef CONFIG_ALTIVEC
+	/* save altivec registers */
+	if (tsk->thread.used_vr)
+		flush_altivec_to_thread(tsk);
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		tsk->thread.vrsave = mfspr(SPRN_VRSAVE);
+#endif /* CONFIG_ALTIVEC */
+
+	flush_fp_to_thread(tsk);
+
+#ifdef CONFIG_VSX
+	if (tsk->thread.used_vsr)
+		flush_vsx_to_thread(tsk);
+#endif /* CONFIG_VSX */
+}
+
 /*
  * Set up the sigcontext for the signal frame.
  */
@@ -97,7 +115,6 @@ static long setup_sigcontext(struct sigcontext __user *sc,
 	 */
 #ifdef CONFIG_ALTIVEC
 	elf_vrreg_t __user *v_regs = sigcontext_vmx_regs(sc);
-	unsigned long vrsave;
 #endif
 	struct pt_regs *regs = tsk->thread.regs;
 	unsigned long msr = regs->msr;
@@ -112,7 +129,6 @@ static long setup_sigcontext(struct sigcontext __user *sc,
 
 	/* save altivec registers */
 	if (tsk->thread.used_vr) {
-		flush_altivec_to_thread(tsk);
 		/* Copy 33 vec registers (vr0..31 and vscr) to the stack */
 		err |= __copy_to_user(v_regs, &tsk->thread.vr_state,
 				      33 * sizeof(vector128));
@@ -124,17 +140,10 @@ static long setup_sigcontext(struct sigcontext __user *sc,
 	/* We always copy to/from vrsave, it's 0 if we don't have or don't
 	 * use altivec.
 	 */
-	vrsave = 0;
-	if (cpu_has_feature(CPU_FTR_ALTIVEC)) {
-		vrsave = mfspr(SPRN_VRSAVE);
-		tsk->thread.vrsave = vrsave;
-	}
-
-	err |= __put_user(vrsave, (u32 __user *)&v_regs[33]);
+	err |= __put_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33]);
 #else /* CONFIG_ALTIVEC */
 	err |= __put_user(0, &sc->v_regs);
 #endif /* CONFIG_ALTIVEC */
-	flush_fp_to_thread(tsk);
 	/* copy fpr regs and fpscr */
 	err |= copy_fpr_to_user(&sc->fp_regs, tsk);
 
@@ -150,7 +159,6 @@ static long setup_sigcontext(struct sigcontext __user *sc,
 	 * VMX data.
 	 */
 	if (tsk->thread.used_vsr && ctx_has_vsx_region) {
-		flush_vsx_to_thread(tsk);
 		v_regs += ELF_NVRREG;
 		err |= copy_vsx_to_user(v_regs, tsk);
 		/* set MSR_VSX in the MSR value in the frame to
@@ -655,6 +663,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 		ctx_has_vsx_region = 1;
 
 	if (old_ctx != NULL) {
+		prepare_setup_sigcontext(current);
 		if (!access_ok(old_ctx, ctx_size)
 		    || setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL, 0,
 					ctx_has_vsx_region)
@@ -842,6 +851,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 #endif
 	{
 		err |= __put_user(0, &frame->uc.uc_link);
+		prepare_setup_sigcontext(tsk);
 		err |= setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
 					NULL, (unsigned long)ksig->ka.sa.sa_handler,
 					1);
-- 
2.26.1


^ permalink raw reply related

* [PATCH v6 04/10] powerpc: Reference parameter in MSR_TM_ACTIVE() macro
From: Christopher M. Riedl @ 2021-02-21  1:23 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Daniel Axtens
In-Reply-To: <20210221012401.22328-1-cmr@codefail.de>

Unlike the other MSR_TM_* macros, MSR_TM_ACTIVE does not reference or
use its parameter unless CONFIG_PPC_TRANSACTIONAL_MEM is defined. This
causes an 'unused variable' compile warning unless the variable is also
guarded with CONFIG_PPC_TRANSACTIONAL_MEM.

Reference but do nothing with the argument in the macro to avoid a
potential compile warning.

Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
Reviewed-by: Daniel Axtens <dja@axtens.net>
---
 arch/powerpc/include/asm/reg.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index da103e92c112..1be20bc8dce2 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -124,7 +124,7 @@
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 #define MSR_TM_ACTIVE(x) (((x) & MSR_TS_MASK) != 0) /* Transaction active? */
 #else
-#define MSR_TM_ACTIVE(x) 0
+#define MSR_TM_ACTIVE(x) ((void)(x), 0)
 #endif
 
 #if defined(CONFIG_PPC_BOOK3S_64)
-- 
2.26.1


^ permalink raw reply related

* [PATCH v6 01/10] powerpc/uaccess: Add unsafe_copy_from_user
From: Christopher M. Riedl @ 2021-02-21  1:23 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Daniel Axtens
In-Reply-To: <20210221012401.22328-1-cmr@codefail.de>

Just wrap __copy_tofrom_user() for the usual 'unsafe' pattern which
accepts a label to goto on error.

Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
Reviewed-by: Daniel Axtens <dja@axtens.net>
---
 arch/powerpc/include/asm/uaccess.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 78e2a3990eab..33b2de642120 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -487,6 +487,9 @@ user_write_access_begin(const void __user *ptr, size_t len)
 #define unsafe_put_user(x, p, e) \
 	__unsafe_put_user_goto((__typeof__(*(p)))(x), (p), sizeof(*(p)), e)
 
+#define unsafe_copy_from_user(d, s, l, e) \
+	unsafe_op_wrap(__copy_tofrom_user((__force void __user *)d, s, l), e)
+
 #define unsafe_copy_to_user(d, s, l, e) \
 do {									\
 	u8 __user *_dst = (u8 __user *)(d);				\
-- 
2.26.1


^ permalink raw reply related

* Re: [PATCH v2] powerpc/kexec_file: Restore FDT size estimation for kdump kernel
From: Hari Bathini @ 2021-02-20 17:13 UTC (permalink / raw)
  To: Thiago Jung Bauermann, linuxppc-dev
  Cc: Rob Herring, kexec, linux-kernel, Mimi Zohar,
	Lakshmi Ramasubramanian
In-Reply-To: <20210220005204.1417200-1-bauerman@linux.ibm.com>



On 20/02/21 6:22 am, Thiago Jung Bauermann wrote:
> Commit 2377c92e37fe ("powerpc/kexec_file: fix FDT size estimation for kdump
> kernel") fixed how elf64_load() estimates the FDT size needed by the
> crashdump kernel.
> 
> At the same time, commit 130b2d59cec0 ("powerpc: Use common
> of_kexec_alloc_and_setup_fdt()") changed the same code to use the generic
> function of_kexec_alloc_and_setup_fdt() to calculate the FDT size. That
> change made the code overestimate it a bit by counting twice the space
> required for the kernel command line and /chosen properties.
> 
> Therefore change kexec_fdt_totalsize_ppc64() to calculate just the extra
> space needed by the kdump kernel, and change the function name so that it
> better reflects what the function is now doing.

Thanks for fixing this, Thiago.

Reviewed-by: Hari Bathini <hbathini@linux.ibm.com>

> 
> Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
> Reviewed-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
> ---
>   arch/powerpc/include/asm/kexec.h  |  2 +-
>   arch/powerpc/kexec/elf_64.c       |  2 +-
>   arch/powerpc/kexec/file_load_64.c | 26 ++++++++------------------
>   3 files changed, 10 insertions(+), 20 deletions(-)
> 
> Applies on top of next-20210219.
> 
> Changes since v1:
> 
> - Adjusted comment describing kexec_extra_fdt_size_ppc64() as suggested
>    by Lakshmi.
> 
> diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
> index baab158e215c..5a11cc8d2350 100644
> --- a/arch/powerpc/include/asm/kexec.h
> +++ b/arch/powerpc/include/asm/kexec.h
> @@ -128,7 +128,7 @@ int load_crashdump_segments_ppc64(struct kimage *image,
>   int setup_purgatory_ppc64(struct kimage *image, const void *slave_code,
>   			  const void *fdt, unsigned long kernel_load_addr,
>   			  unsigned long fdt_load_addr);
> -unsigned int kexec_fdt_totalsize_ppc64(struct kimage *image);
> +unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image);
>   int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
>   			unsigned long initrd_load_addr,
>   			unsigned long initrd_len, const char *cmdline);
> diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
> index 0492ca6003f3..5a569bb51349 100644
> --- a/arch/powerpc/kexec/elf_64.c
> +++ b/arch/powerpc/kexec/elf_64.c
> @@ -104,7 +104,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
>   
>   	fdt = of_kexec_alloc_and_setup_fdt(image, initrd_load_addr,
>   					   initrd_len, cmdline,
> -					   kexec_fdt_totalsize_ppc64(image));
> +					   kexec_extra_fdt_size_ppc64(image));
>   	if (!fdt) {
>   		pr_err("Error setting up the new device tree.\n");
>   		ret = -EINVAL;
> diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
> index 3609de30a170..297f73795a1f 100644
> --- a/arch/powerpc/kexec/file_load_64.c
> +++ b/arch/powerpc/kexec/file_load_64.c
> @@ -927,37 +927,27 @@ int setup_purgatory_ppc64(struct kimage *image, const void *slave_code,
>   }
>   
>   /**
> - * kexec_fdt_totalsize_ppc64 - Return the estimated size needed to setup FDT
> - *                             for kexec/kdump kernel.
> - * @image:                     kexec image being loaded.
> + * kexec_extra_fdt_size_ppc64 - Return the estimated additional size needed to
> + *                              setup FDT for kexec/kdump kernel.
> + * @image:                      kexec image being loaded.
>    *
> - * Returns the estimated size needed for kexec/kdump kernel FDT.
> + * Returns the estimated extra size needed for kexec/kdump kernel FDT.
>    */
> -unsigned int kexec_fdt_totalsize_ppc64(struct kimage *image)
> +unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image)
>   {
> -	unsigned int fdt_size;
>   	u64 usm_entries;
>   
> -	/*
> -	 * The below estimate more than accounts for a typical kexec case where
> -	 * the additional space is to accommodate things like kexec cmdline,
> -	 * chosen node with properties for initrd start & end addresses and
> -	 * a property to indicate kexec boot..
> -	 */
> -	fdt_size = fdt_totalsize(initial_boot_params) + (2 * COMMAND_LINE_SIZE);
>   	if (image->type != KEXEC_TYPE_CRASH)
> -		return fdt_size;
> +		return 0;
>   
>   	/*
> -	 * For kdump kernel, also account for linux,usable-memory and
> +	 * For kdump kernel, account for linux,usable-memory and
>   	 * linux,drconf-usable-memory properties. Get an approximate on the
>   	 * number of usable memory entries and use for FDT size estimation.
>   	 */
>   	usm_entries = ((memblock_end_of_DRAM() / drmem_lmb_size()) +
>   		       (2 * (resource_size(&crashk_res) / drmem_lmb_size())));
> -	fdt_size += (unsigned int)(usm_entries * sizeof(u64));
> -
> -	return fdt_size;
> +	return (unsigned int)(usm_entries * sizeof(u64));
>   }
>   
>   /**
> 

^ permalink raw reply

* [PATCH] powerpc/sstep: Use bitwise instead of arithmetic operator for flags
From: Yang Li @ 2021-02-20  8:57 UTC (permalink / raw)
  To: mpe; +Cc: Yang Li, paulus, linuxppc-dev, linux-kernel

Fix the following coccinelle warnings:
./arch/powerpc/lib/sstep.c:1090:20-21: WARNING: sum of probable
bitmasks, consider |
./arch/powerpc/lib/sstep.c:1115:20-21: WARNING: sum of probable
bitmasks, consider |
./arch/powerpc/lib/sstep.c:1134:20-21: WARNING: sum of probable
bitmasks, consider |

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
---
 arch/powerpc/lib/sstep.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index ede093e..e568cc5 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1087,7 +1087,7 @@ static nokprobe_inline void add_with_carry(const struct pt_regs *regs,
 
 	if (carry_in)
 		++val;
-	op->type = COMPUTE + SETREG + SETXER;
+	op->type = COMPUTE | SETREG | SETXER;
 	op->reg = rd;
 	op->val = val;
 #ifdef __powerpc64__
@@ -1112,7 +1112,7 @@ static nokprobe_inline void do_cmp_signed(const struct pt_regs *regs,
 {
 	unsigned int crval, shift;
 
-	op->type = COMPUTE + SETCC;
+	op->type = COMPUTE | SETCC;
 	crval = (regs->xer >> 31) & 1;		/* get SO bit */
 	if (v1 < v2)
 		crval |= 8;
@@ -1131,7 +1131,7 @@ static nokprobe_inline void do_cmp_unsigned(const struct pt_regs *regs,
 {
 	unsigned int crval, shift;
 
-	op->type = COMPUTE + SETCC;
+	op->type = COMPUTE | SETCC;
 	crval = (regs->xer >> 31) & 1;		/* get SO bit */
 	if (v1 < v2)
 		crval |= 8;
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH] powerpc: use ARRAY_SIZE instead of division operation
From: Yang Li @ 2021-02-20  8:14 UTC (permalink / raw)
  To: mpe; +Cc: Yang Li, paulus, linuxppc-dev, linux-kernel

This eliminates the following coccicheck warning:
./arch/powerpc/boot/mktree.c:130:31-32: WARNING: Use ARRAY_SIZE

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
---
 arch/powerpc/boot/mktree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/boot/mktree.c b/arch/powerpc/boot/mktree.c
index dc603f3..0b2def5 100644
--- a/arch/powerpc/boot/mktree.c
+++ b/arch/powerpc/boot/mktree.c
@@ -127,7 +127,7 @@ int main(int argc, char *argv[])
 			exit(5);
 		}
 		cp = tmpbuf;
-		for (i = 0; i < sizeof(tmpbuf) / sizeof(unsigned int); i++)
+		for (i = 0; i < ARRAY_SIZE(tmpbuf); i++)
 			cksum += *cp++;
 		if (write(out_fd, tmpbuf, sizeof(tmpbuf)) != sizeof(tmpbuf)) {
 			perror("boot-image write");
-- 
1.8.3.1


^ permalink raw reply related

* Re: [PATCH kernel] powerpc/iommu: Annotate nested lock for lockdep
From: Alexey Kardashevskiy @ 2021-02-20  3:49 UTC (permalink / raw)
  To: Frederic Barrat, linuxppc-dev; +Cc: kvm-ppc
In-Reply-To: <49b1f5cb-107c-296f-c339-13e627a73d6d@linux.ibm.com>



On 18/02/2021 23:59, Frederic Barrat wrote:
> 
> 
> On 16/02/2021 04:20, Alexey Kardashevskiy wrote:
>> The IOMMU table is divided into pools for concurrent mappings and each
>> pool has a separate spinlock. When taking the ownership of an IOMMU group
>> to pass through a device to a VM, we lock these spinlocks which triggers
>> a false negative warning in lockdep (below).
>>
>> This fixes it by annotating the large pool's spinlock as a nest lock.
>>
>> ===
>> WARNING: possible recursive locking detected
>> 5.11.0-le_syzkaller_a+fstn1 #100 Not tainted
>> --------------------------------------------
>> qemu-system-ppc/4129 is trying to acquire lock:
>> c0000000119bddb0 (&(p->lock)/1){....}-{2:2}, at: 
>> iommu_take_ownership+0xac/0x1e0
>>
>> but task is already holding lock:
>> c0000000119bdd30 (&(p->lock)/1){....}-{2:2}, at: 
>> iommu_take_ownership+0xac/0x1e0
>>
>> other info that might help us debug this:
>>   Possible unsafe locking scenario:
>>
>>         CPU0
>>         ----
>>    lock(&(p->lock)/1);
>>    lock(&(p->lock)/1);
>> ===
>>
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>> ---
>>   arch/powerpc/kernel/iommu.c | 2 +-
>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
>> index 557a09dd5b2f..2ee642a6731a 100644
>> --- a/arch/powerpc/kernel/iommu.c
>> +++ b/arch/powerpc/kernel/iommu.c
>> @@ -1089,7 +1089,7 @@ int iommu_take_ownership(struct iommu_table *tbl)
>>       spin_lock_irqsave(&tbl->large_pool.lock, flags);
>>       for (i = 0; i < tbl->nr_pools; i++)
>> -        spin_lock(&tbl->pools[i].lock);
>> +        spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock);
> 
> 
> We have the same pattern and therefore should have the same problem in 
> iommu_release_ownership().
> 
> But as I understand, we're hacking our way around lockdep here, since 
> conceptually, those locks are independent. I was wondering why it seems 
> to fix it by worrying only about the large pool lock. That loop can take 
> many locks (up to 4 with current config). However, if the dma window is 
> less than 1GB, we would only have one, so it would make sense for 
> lockdep to stop complaining. Is it what happened? In which case, this 
> patch doesn't really fix it. Or I'm missing something :-)


My rough undestanding is that when spin_lock_nest_lock is called first 
time, it does some magic with lockdep classes somewhere in 
__lock_acquire()/register_lock_class() and right after that the nested 
lock is not the same as before and it is annotated so  we cannot lock 
nested locks without locking the nest lock first and no (re)annotation 
is needed. I'll try to poke this code once again and see, it is just was 
easier with p9/nested which is gone for now because of little snow in 
one of the southern states :)


> 
>    Fred
> 
> 
> 
>>       iommu_table_release_pages(tbl);
>>

-- 
Alexey

^ permalink raw reply

* Re: [PATCH RFC v1 5/6] xen-swiotlb: convert variables to arrays
From: Boris Ostrovsky @ 2021-02-19 23:59 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk, Christoph Hellwig, jgross
  Cc: ulf.hansson, airlied, joonas.lahtinen, dri-devel, linux-kernel,
	paulus, hpa, mingo, m.szyprowski, sstabellini, Dongli Zhang, x86,
	joe.jin, peterz, mingo, bskeggs, linux-pci, xen-devel,
	matthew.auld, thomas.lendacky, intel-gfx, jani.nikula, bp,
	rodrigo.vivi, bhelgaas, tglx, adrian.hunter, tsbogend, chris,
	nouveau, robin.murphy, linux-mmc, linux-mips, iommu, bauerman,
	daniel, akpm, linuxppc-dev, rppt
In-Reply-To: <YDAgT2ZIdncNwNlf@Konrads-MacBook-Pro.local>


On 2/19/21 3:32 PM, Konrad Rzeszutek Wilk wrote:
> On Sun, Feb 07, 2021 at 04:56:01PM +0100, Christoph Hellwig wrote:
>> On Thu, Feb 04, 2021 at 09:40:23AM +0100, Christoph Hellwig wrote:
>>> So one thing that has been on my mind for a while:  I'd really like
>>> to kill the separate dma ops in Xen swiotlb.  If we compare xen-swiotlb
>>> to swiotlb the main difference seems to be:
>>>
>>>  - additional reasons to bounce I/O vs the plain DMA capable
>>>  - the possibility to do a hypercall on arm/arm64
>>>  - an extra translation layer before doing the phys_to_dma and vice
>>>    versa
>>>  - an special memory allocator
>>>
>>> I wonder if inbetween a few jump labels or other no overhead enablement
>>> options and possibly better use of the dma_range_map we could kill
>>> off most of swiotlb-xen instead of maintaining all this code duplication?
>> So I looked at this a bit more.
>>
>> For x86 with XENFEAT_auto_translated_physmap (how common is that?)
> Juergen, Boris please correct me if I am wrong, but that XENFEAT_auto_translated_physmap
> only works for PVH guests?


That's both HVM and PVH (for dom0 it's only PVH).


-boris



>
>> pfn_to_gfn is a nop, so plain phys_to_dma/dma_to_phys do work as-is.
>>
>> xen_arch_need_swiotlb always returns true for x86, and
>> range_straddles_page_boundary should never be true for the
>> XENFEAT_auto_translated_physmap case.
> Correct. The kernel should have no clue of what the real MFNs are
> for PFNs.
>> So as far as I can tell the mapping fast path for the
>> XENFEAT_auto_translated_physmap can be trivially reused from swiotlb.
>>
>> That leaves us with the next more complicated case, x86 or fully cache
>> coherent arm{,64} without XENFEAT_auto_translated_physmap.  In that case
>> we need to patch in a phys_to_dma/dma_to_phys that performs the MFN
>> lookup, which could be done using alternatives or jump labels.
>> I think if that is done right we should also be able to let that cover
>> the foreign pages in is_xen_swiotlb_buffer/is_swiotlb_buffer, but
>> in that worst case that would need another alternative / jump label.
>>
>> For non-coherent arm{,64} we'd also need to use alternatives or jump
>> labels to for the cache maintainance ops, but that isn't a hard problem
>> either.
>>
>>

^ permalink raw reply

* [PATCH v2] powerpc/kexec_file: Restore FDT size estimation for kdump kernel
From: Thiago Jung Bauermann @ 2021-02-20  0:52 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Rob Herring, kexec, linux-kernel, Mimi Zohar,
	Lakshmi Ramasubramanian, Thiago Jung Bauermann, Hari Bathini

Commit 2377c92e37fe ("powerpc/kexec_file: fix FDT size estimation for kdump
kernel") fixed how elf64_load() estimates the FDT size needed by the
crashdump kernel.

At the same time, commit 130b2d59cec0 ("powerpc: Use common
of_kexec_alloc_and_setup_fdt()") changed the same code to use the generic
function of_kexec_alloc_and_setup_fdt() to calculate the FDT size. That
change made the code overestimate it a bit by counting twice the space
required for the kernel command line and /chosen properties.

Therefore change kexec_fdt_totalsize_ppc64() to calculate just the extra
space needed by the kdump kernel, and change the function name so that it
better reflects what the function is now doing.

Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Reviewed-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
---
 arch/powerpc/include/asm/kexec.h  |  2 +-
 arch/powerpc/kexec/elf_64.c       |  2 +-
 arch/powerpc/kexec/file_load_64.c | 26 ++++++++------------------
 3 files changed, 10 insertions(+), 20 deletions(-)

Applies on top of next-20210219.

Changes since v1:

- Adjusted comment describing kexec_extra_fdt_size_ppc64() as suggested
  by Lakshmi.

diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index baab158e215c..5a11cc8d2350 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -128,7 +128,7 @@ int load_crashdump_segments_ppc64(struct kimage *image,
 int setup_purgatory_ppc64(struct kimage *image, const void *slave_code,
 			  const void *fdt, unsigned long kernel_load_addr,
 			  unsigned long fdt_load_addr);
-unsigned int kexec_fdt_totalsize_ppc64(struct kimage *image);
+unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image);
 int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
 			unsigned long initrd_load_addr,
 			unsigned long initrd_len, const char *cmdline);
diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
index 0492ca6003f3..5a569bb51349 100644
--- a/arch/powerpc/kexec/elf_64.c
+++ b/arch/powerpc/kexec/elf_64.c
@@ -104,7 +104,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
 
 	fdt = of_kexec_alloc_and_setup_fdt(image, initrd_load_addr,
 					   initrd_len, cmdline,
-					   kexec_fdt_totalsize_ppc64(image));
+					   kexec_extra_fdt_size_ppc64(image));
 	if (!fdt) {
 		pr_err("Error setting up the new device tree.\n");
 		ret = -EINVAL;
diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
index 3609de30a170..297f73795a1f 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -927,37 +927,27 @@ int setup_purgatory_ppc64(struct kimage *image, const void *slave_code,
 }
 
 /**
- * kexec_fdt_totalsize_ppc64 - Return the estimated size needed to setup FDT
- *                             for kexec/kdump kernel.
- * @image:                     kexec image being loaded.
+ * kexec_extra_fdt_size_ppc64 - Return the estimated additional size needed to
+ *                              setup FDT for kexec/kdump kernel.
+ * @image:                      kexec image being loaded.
  *
- * Returns the estimated size needed for kexec/kdump kernel FDT.
+ * Returns the estimated extra size needed for kexec/kdump kernel FDT.
  */
-unsigned int kexec_fdt_totalsize_ppc64(struct kimage *image)
+unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image)
 {
-	unsigned int fdt_size;
 	u64 usm_entries;
 
-	/*
-	 * The below estimate more than accounts for a typical kexec case where
-	 * the additional space is to accommodate things like kexec cmdline,
-	 * chosen node with properties for initrd start & end addresses and
-	 * a property to indicate kexec boot..
-	 */
-	fdt_size = fdt_totalsize(initial_boot_params) + (2 * COMMAND_LINE_SIZE);
 	if (image->type != KEXEC_TYPE_CRASH)
-		return fdt_size;
+		return 0;
 
 	/*
-	 * For kdump kernel, also account for linux,usable-memory and
+	 * For kdump kernel, account for linux,usable-memory and
 	 * linux,drconf-usable-memory properties. Get an approximate on the
 	 * number of usable memory entries and use for FDT size estimation.
 	 */
 	usm_entries = ((memblock_end_of_DRAM() / drmem_lmb_size()) +
 		       (2 * (resource_size(&crashk_res) / drmem_lmb_size())));
-	fdt_size += (unsigned int)(usm_entries * sizeof(u64));
-
-	return fdt_size;
+	return (unsigned int)(usm_entries * sizeof(u64));
 }
 
 /**

^ permalink raw reply related

* Re: [PATCH RFC v1 5/6] xen-swiotlb: convert variables to arrays
From: Konrad Rzeszutek Wilk @ 2021-02-19 20:32 UTC (permalink / raw)
  To: Christoph Hellwig, Boris Ostrovsky, jgross
  Cc: ulf.hansson, airlied, joonas.lahtinen, dri-devel, linux-kernel,
	paulus, hpa, mingo, m.szyprowski, sstabellini, adrian.hunter,
	Dongli Zhang, x86, joe.jin, peterz, mingo, bskeggs, linux-pci,
	xen-devel, matthew.auld, thomas.lendacky, intel-gfx, jani.nikula,
	bp, rodrigo.vivi, bhelgaas, boris.ostrovsky, chris, jgross,
	tsbogend, nouveau, robin.murphy, linux-mmc, linux-mips, iommu,
	tglx, bauerman, daniel, akpm, linuxppc-dev, rppt
In-Reply-To: <20210207155601.GA25111@lst.de>

On Sun, Feb 07, 2021 at 04:56:01PM +0100, Christoph Hellwig wrote:
> On Thu, Feb 04, 2021 at 09:40:23AM +0100, Christoph Hellwig wrote:
> > So one thing that has been on my mind for a while:  I'd really like
> > to kill the separate dma ops in Xen swiotlb.  If we compare xen-swiotlb
> > to swiotlb the main difference seems to be:
> > 
> >  - additional reasons to bounce I/O vs the plain DMA capable
> >  - the possibility to do a hypercall on arm/arm64
> >  - an extra translation layer before doing the phys_to_dma and vice
> >    versa
> >  - an special memory allocator
> > 
> > I wonder if inbetween a few jump labels or other no overhead enablement
> > options and possibly better use of the dma_range_map we could kill
> > off most of swiotlb-xen instead of maintaining all this code duplication?
> 
> So I looked at this a bit more.
> 
> For x86 with XENFEAT_auto_translated_physmap (how common is that?)

Juergen, Boris please correct me if I am wrong, but that XENFEAT_auto_translated_physmap
only works for PVH guests?

> pfn_to_gfn is a nop, so plain phys_to_dma/dma_to_phys do work as-is.
> 
> xen_arch_need_swiotlb always returns true for x86, and
> range_straddles_page_boundary should never be true for the
> XENFEAT_auto_translated_physmap case.

Correct. The kernel should have no clue of what the real MFNs are
for PFNs.
> 
> So as far as I can tell the mapping fast path for the
> XENFEAT_auto_translated_physmap can be trivially reused from swiotlb.
> 
> That leaves us with the next more complicated case, x86 or fully cache
> coherent arm{,64} without XENFEAT_auto_translated_physmap.  In that case
> we need to patch in a phys_to_dma/dma_to_phys that performs the MFN
> lookup, which could be done using alternatives or jump labels.
> I think if that is done right we should also be able to let that cover
> the foreign pages in is_xen_swiotlb_buffer/is_swiotlb_buffer, but
> in that worst case that would need another alternative / jump label.
> 
> For non-coherent arm{,64} we'd also need to use alternatives or jump
> labels to for the cache maintainance ops, but that isn't a hard problem
> either.
> 
> 

^ permalink raw reply

* Re: [PATCH] powerpc/kexec_file: Restore FDT size estimation for kdump kernel
From: Thiago Jung Bauermann @ 2021-02-19 23:42 UTC (permalink / raw)
  To: Lakshmi Ramasubramanian
  Cc: Rob Herring, kexec, linux-kernel, Mimi Zohar, linuxppc-dev,
	Hari Bathini
In-Reply-To: <5a28907e-9231-7a19-62ff-3ed1c0282642@linux.microsoft.com>


Lakshmi Ramasubramanian <nramas@linux.microsoft.com> writes:

> On 2/19/21 6:25 AM, Thiago Jung Bauermann wrote:
>
> One small nit in the function header (please see below), but otherwise the
> change looks good.
>
> Reviewed-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>

Thanks for your review. I incorporated your suggestion and will send v2
shortly.

>> --- a/arch/powerpc/kexec/file_load_64.c
>> +++ b/arch/powerpc/kexec/file_load_64.c
>> @@ -927,37 +927,27 @@ int setup_purgatory_ppc64(struct kimage *image, const void *slave_code,
>>   }
>>     /**
>> - * kexec_fdt_totalsize_ppc64 - Return the estimated size needed to setup FDT
>> - *                             for kexec/kdump kernel.
>> - * @image:                     kexec image being loaded.
>> + * kexec_extra_fdt_size_ppc63 - Return the estimated size needed to setup FDT
>
> Perhaps change to
>
> "Return the estimated additional size needed to setup FDT for kexec/kdump
> kernel"?

That's better indeed. I also hadn't noticed that I changed ppc64 to
ppc63. Fixed as well.

-- 
Thiago Jung Bauermann
IBM Linux Technology Center

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox