LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 7/9] fs: remove compat_sys_vmsplice
From: Christoph Hellwig @ 2020-09-25  4:51 UTC (permalink / raw)
  To: Alexander Viro
  Cc: linux-aio, linux-mips, David Howells, linux-mm, keyrings,
	sparclinux, linux-arch, linux-s390, linux-scsi, Arnd Bergmann,
	linux-block, io-uring, linux-arm-kernel, Jens Axboe, linux-parisc,
	netdev, linux-kernel, linux-security-module, David Laight,
	linux-fsdevel, Andrew Morton, linuxppc-dev
In-Reply-To: <20200925045146.1283714-1-hch@lst.de>

Now that import_iovec handles compat iovecs, the native vmsplice syscall
can be used for the compat case as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/arm64/include/asm/unistd32.h             |  2 +-
 arch/mips/kernel/syscalls/syscall_n32.tbl     |  2 +-
 arch/mips/kernel/syscalls/syscall_o32.tbl     |  2 +-
 arch/parisc/kernel/syscalls/syscall.tbl       |  2 +-
 arch/powerpc/kernel/syscalls/syscall.tbl      |  2 +-
 arch/s390/kernel/syscalls/syscall.tbl         |  2 +-
 arch/sparc/kernel/syscalls/syscall.tbl        |  2 +-
 arch/x86/entry/syscall_x32.c                  |  1 +
 arch/x86/entry/syscalls/syscall_32.tbl        |  2 +-
 arch/x86/entry/syscalls/syscall_64.tbl        |  2 +-
 fs/splice.c                                   | 57 +++++--------------
 include/linux/compat.h                        |  4 --
 include/uapi/asm-generic/unistd.h             |  2 +-
 tools/include/uapi/asm-generic/unistd.h       |  2 +-
 .../arch/powerpc/entry/syscalls/syscall.tbl   |  2 +-
 .../perf/arch/s390/entry/syscalls/syscall.tbl |  2 +-
 .../arch/x86/entry/syscalls/syscall_64.tbl    |  2 +-
 17 files changed, 28 insertions(+), 62 deletions(-)

diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 4a236493dca5b9..11dfae3a8563bd 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -697,7 +697,7 @@ __SYSCALL(__NR_sync_file_range2, compat_sys_aarch32_sync_file_range2)
 #define __NR_tee 342
 __SYSCALL(__NR_tee, sys_tee)
 #define __NR_vmsplice 343
-__SYSCALL(__NR_vmsplice, compat_sys_vmsplice)
+__SYSCALL(__NR_vmsplice, sys_vmsplice)
 #define __NR_move_pages 344
 __SYSCALL(__NR_move_pages, compat_sys_move_pages)
 #define __NR_getcpu 345
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index c99a92646f8ee9..5a39d4de0ac85b 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -278,7 +278,7 @@
 267	n32	splice				sys_splice
 268	n32	sync_file_range			sys_sync_file_range
 269	n32	tee				sys_tee
-270	n32	vmsplice			compat_sys_vmsplice
+270	n32	vmsplice			sys_vmsplice
 271	n32	move_pages			compat_sys_move_pages
 272	n32	set_robust_list			compat_sys_set_robust_list
 273	n32	get_robust_list			compat_sys_get_robust_list
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 075064d10661bf..136efc6b8c5444 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -318,7 +318,7 @@
 304	o32	splice				sys_splice
 305	o32	sync_file_range			sys_sync_file_range		sys32_sync_file_range
 306	o32	tee				sys_tee
-307	o32	vmsplice			sys_vmsplice			compat_sys_vmsplice
+307	o32	vmsplice			sys_vmsplice
 308	o32	move_pages			sys_move_pages			compat_sys_move_pages
 309	o32	set_robust_list			sys_set_robust_list		compat_sys_set_robust_list
 310	o32	get_robust_list			sys_get_robust_list		compat_sys_get_robust_list
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 192abde0001d9d..a9e184192caedd 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -330,7 +330,7 @@
 292	32	sync_file_range		parisc_sync_file_range
 292	64	sync_file_range		sys_sync_file_range
 293	common	tee			sys_tee
-294	common	vmsplice		sys_vmsplice			compat_sys_vmsplice
+294	common	vmsplice		sys_vmsplice
 295	common	move_pages		sys_move_pages			compat_sys_move_pages
 296	common	getcpu			sys_getcpu
 297	common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 6f1e2ecf0edad9..0d4985919ca34d 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -369,7 +369,7 @@
 282	common	unshare				sys_unshare
 283	common	splice				sys_splice
 284	common	tee				sys_tee
-285	common	vmsplice			sys_vmsplice			compat_sys_vmsplice
+285	common	vmsplice			sys_vmsplice
 286	common	openat				sys_openat			compat_sys_openat
 287	common	mkdirat				sys_mkdirat
 288	common	mknodat				sys_mknodat
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 6101cf2e004cb4..b5495a42814bd1 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -316,7 +316,7 @@
 306  common	splice			sys_splice			sys_splice
 307  common	sync_file_range		sys_sync_file_range		compat_sys_s390_sync_file_range
 308  common	tee			sys_tee				sys_tee
-309  common	vmsplice		sys_vmsplice			compat_sys_vmsplice
+309  common	vmsplice		sys_vmsplice			sys_vmsplice
 310  common	move_pages		sys_move_pages			compat_sys_move_pages
 311  common	getcpu			sys_getcpu			sys_getcpu
 312  common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index a87ddb282ab16f..f1810c1a35caa5 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -38,7 +38,7 @@
 23	64    	setuid			sys_setuid
 24	32	getuid			sys_getuid16
 24	64   	getuid			sys_getuid
-25	common	vmsplice		sys_vmsplice			compat_sys_vmsplice
+25	common	vmsplice		sys_vmsplice
 26	common	ptrace			sys_ptrace			compat_sys_ptrace
 27	common	alarm			sys_alarm
 28	common	sigaltstack		sys_sigaltstack			compat_sys_sigaltstack
diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
index aa321444a41f63..a4840b9d50ad14 100644
--- a/arch/x86/entry/syscall_x32.c
+++ b/arch/x86/entry/syscall_x32.c
@@ -16,6 +16,7 @@
 #define __x32_sys_writev	__x64_sys_writev
 #define __x32_sys_getsockopt	__x64_sys_getsockopt
 #define __x32_sys_setsockopt	__x64_sys_setsockopt
+#define __x32_sys_vmsplice	__x64_sys_vmsplice
 
 #define __SYSCALL_64(nr, sym)
 
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 54ab4beb517f25..0fb2f172581e51 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -327,7 +327,7 @@
 313	i386	splice			sys_splice
 314	i386	sync_file_range		sys_ia32_sync_file_range
 315	i386	tee			sys_tee
-316	i386	vmsplice		sys_vmsplice			compat_sys_vmsplice
+316	i386	vmsplice		sys_vmsplice
 317	i386	move_pages		sys_move_pages			compat_sys_move_pages
 318	i386	getcpu			sys_getcpu
 319	i386	epoll_pwait		sys_epoll_pwait
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index b1e59957c5c51c..642af919183de4 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -388,7 +388,7 @@
 529	x32	waitid			compat_sys_waitid
 530	x32	set_robust_list		compat_sys_set_robust_list
 531	x32	get_robust_list		compat_sys_get_robust_list
-532	x32	vmsplice		compat_sys_vmsplice
+532	x32	vmsplice		sys_vmsplice
 533	x32	move_pages		compat_sys_move_pages
 534	x32	preadv			compat_sys_preadv64
 535	x32	pwritev			compat_sys_pwritev64
diff --git a/fs/splice.c b/fs/splice.c
index 132d42b9871f9b..18d84544030b39 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -33,7 +33,6 @@
 #include <linux/security.h>
 #include <linux/gfp.h>
 #include <linux/socket.h>
-#include <linux/compat.h>
 #include <linux/sched/signal.h>
 
 #include "internal.h"
@@ -1332,20 +1331,6 @@ static int vmsplice_type(struct fd f, int *type)
  * Currently we punt and implement it as a normal copy, see pipe_to_user().
  *
  */
-static long do_vmsplice(struct file *f, struct iov_iter *iter, unsigned int flags)
-{
-	if (unlikely(flags & ~SPLICE_F_ALL))
-		return -EINVAL;
-
-	if (!iov_iter_count(iter))
-		return 0;
-
-	if (iov_iter_rw(iter) == WRITE)
-		return vmsplice_to_pipe(f, iter, flags);
-	else
-		return vmsplice_to_user(f, iter, flags);
-}
-
 SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov,
 		unsigned long, nr_segs, unsigned int, flags)
 {
@@ -1356,6 +1341,9 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov,
 	struct fd f;
 	int type;
 
+	if (unlikely(flags & ~SPLICE_F_ALL))
+		return -EINVAL;
+
 	f = fdget(fd);
 	error = vmsplice_type(f, &type);
 	if (error)
@@ -1363,40 +1351,21 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov,
 
 	error = import_iovec(type, uiov, nr_segs,
 			     ARRAY_SIZE(iovstack), &iov, &iter);
-	if (error >= 0) {
-		error = do_vmsplice(f.file, &iter, flags);
-		kfree(iov);
-	}
-	fdput(f);
-	return error;
-}
+	if (error < 0)
+		goto out_fdput;
 
-#ifdef CONFIG_COMPAT
-COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, iov32,
-		    unsigned int, nr_segs, unsigned int, flags)
-{
-	struct iovec iovstack[UIO_FASTIOV];
-	struct iovec *iov = iovstack;
-	struct iov_iter iter;
-	ssize_t error;
-	struct fd f;
-	int type;
-
-	f = fdget(fd);
-	error = vmsplice_type(f, &type);
-	if (error)
-		return error;
+	if (!iov_iter_count(&iter))
+		error = 0;
+	else if (iov_iter_rw(&iter) == WRITE)
+		error = vmsplice_to_pipe(f.file, &iter, flags);
+	else
+		error = vmsplice_to_user(f.file, &iter, flags);
 
-	error = import_iovec(type, (struct iovec __user *)iov32, nr_segs,
-			     ARRAY_SIZE(iovstack), &iov, &iter);
-	if (error >= 0) {
-		error = do_vmsplice(f.file, &iter, flags);
-		kfree(iov);
-	}
+	kfree(iov);
+out_fdput:
 	fdput(f);
 	return error;
 }
-#endif
 
 SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
 		int, fd_out, loff_t __user *, off_out,
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 0f1620988267e6..9e8aa148651455 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -597,10 +597,6 @@ asmlinkage long compat_sys_signalfd4(int ufd,
 				     const compat_sigset_t __user *sigmask,
 				     compat_size_t sigsetsize, int flags);
 
-/* fs/splice.c */
-asmlinkage long compat_sys_vmsplice(int fd, const struct compat_iovec __user *,
-				    unsigned int nr_segs, unsigned int flags);
-
 /* fs/stat.c */
 asmlinkage long compat_sys_newfstatat(unsigned int dfd,
 				      const char __user *filename,
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 211c9eacbda6eb..f2dcb0d5703014 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -237,7 +237,7 @@ __SC_COMP(__NR_signalfd4, sys_signalfd4, compat_sys_signalfd4)
 
 /* fs/splice.c */
 #define __NR_vmsplice 75
-__SC_COMP(__NR_vmsplice, sys_vmsplice, compat_sys_vmsplice)
+__SYSCALL(__NR_vmsplice, sys_vmsplice)
 #define __NR_splice 76
 __SYSCALL(__NR_splice, sys_splice)
 #define __NR_tee 77
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 211c9eacbda6eb..f2dcb0d5703014 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -237,7 +237,7 @@ __SC_COMP(__NR_signalfd4, sys_signalfd4, compat_sys_signalfd4)
 
 /* fs/splice.c */
 #define __NR_vmsplice 75
-__SC_COMP(__NR_vmsplice, sys_vmsplice, compat_sys_vmsplice)
+__SYSCALL(__NR_vmsplice, sys_vmsplice)
 #define __NR_splice 76
 __SYSCALL(__NR_splice, sys_splice)
 #define __NR_tee 77
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 46be68029587f9..26f0347c15118b 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -363,7 +363,7 @@
 282	common	unshare				sys_unshare
 283	common	splice				sys_splice
 284	common	tee				sys_tee
-285	common	vmsplice			sys_vmsplice			compat_sys_vmsplice
+285	common	vmsplice			sys_vmsplice
 286	common	openat				sys_openat			compat_sys_openat
 287	common	mkdirat				sys_mkdirat
 288	common	mknodat				sys_mknodat
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index fb5e61ce9d5838..02ad81f69bb7e3 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -316,7 +316,7 @@
 306  common	splice			sys_splice			compat_sys_splice
 307  common	sync_file_range		sys_sync_file_range		compat_sys_s390_sync_file_range
 308  common	tee			sys_tee				compat_sys_tee
-309  common	vmsplice		sys_vmsplice			compat_sys_vmsplice
+309  common	vmsplice		sys_vmsplice			sys_vmsplice
 310  common	move_pages		sys_move_pages			compat_sys_move_pages
 311  common	getcpu			sys_getcpu			compat_sys_getcpu
 312  common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index b1e59957c5c51c..642af919183de4 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -388,7 +388,7 @@
 529	x32	waitid			compat_sys_waitid
 530	x32	set_robust_list		compat_sys_set_robust_list
 531	x32	get_robust_list		compat_sys_get_robust_list
-532	x32	vmsplice		compat_sys_vmsplice
+532	x32	vmsplice		sys_vmsplice
 533	x32	move_pages		compat_sys_move_pages
 534	x32	preadv			compat_sys_preadv64
 535	x32	pwritev			compat_sys_pwritev64
-- 
2.28.0


^ permalink raw reply related

* let import_iovec deal with compat_iovecs as well v4
From: Christoph Hellwig @ 2020-09-25  4:51 UTC (permalink / raw)
  To: Alexander Viro
  Cc: linux-aio, linux-mips, David Howells, linux-mm, keyrings,
	sparclinux, linux-arch, linux-s390, linux-scsi, Arnd Bergmann,
	linux-block, io-uring, linux-arm-kernel, Jens Axboe, linux-parisc,
	netdev, linux-kernel, linux-security-module, David Laight,
	linux-fsdevel, Andrew Morton, linuxppc-dev

Hi Al,

this series changes import_iovec to transparently deal with compat iovec
structures, and then cleanups up a lot of code dupliation.

Changes since v3:
 - fix up changed prototypes in compat.h as well

Changes since v2:
 - revert the switch of the access process vm sysclls to iov_iter
 - refactor the import_iovec internals differently
 - switch aio to use __import_iovec

Changes since v1:
 - improve a commit message
 - drop a pointless unlikely
 - drop the PF_FORCE_COMPAT flag
 - add a few more cleanups (including two from David Laight)

Diffstat:
 arch/arm64/include/asm/unistd32.h                  |   10 
 arch/mips/kernel/syscalls/syscall_n32.tbl          |   10 
 arch/mips/kernel/syscalls/syscall_o32.tbl          |   10 
 arch/parisc/kernel/syscalls/syscall.tbl            |   10 
 arch/powerpc/kernel/syscalls/syscall.tbl           |   10 
 arch/s390/kernel/syscalls/syscall.tbl              |   10 
 arch/sparc/kernel/syscalls/syscall.tbl             |   10 
 arch/x86/entry/syscall_x32.c                       |    5 
 arch/x86/entry/syscalls/syscall_32.tbl             |   10 
 arch/x86/entry/syscalls/syscall_64.tbl             |   10 
 block/scsi_ioctl.c                                 |   12 
 drivers/scsi/sg.c                                  |    9 
 fs/aio.c                                           |   38 --
 fs/io_uring.c                                      |   20 -
 fs/read_write.c                                    |  362 +--------------------
 fs/splice.c                                        |   57 ---
 include/linux/compat.h                             |   24 -
 include/linux/fs.h                                 |   11 
 include/linux/uio.h                                |   10 
 include/uapi/asm-generic/unistd.h                  |   12 
 lib/iov_iter.c                                     |  161 +++++++--
 mm/process_vm_access.c                             |   85 ----
 net/compat.c                                       |    4 
 security/keys/compat.c                             |   37 --
 security/keys/internal.h                           |    5 
 security/keys/keyctl.c                             |    2 
 tools/include/uapi/asm-generic/unistd.h            |   12 
 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl |   10 
 tools/perf/arch/s390/entry/syscalls/syscall.tbl    |   10 
 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl  |   10 
 30 files changed, 280 insertions(+), 706 deletions(-)

^ permalink raw reply

* [PATCH 9/9] security/keys: remove compat_keyctl_instantiate_key_iov
From: Christoph Hellwig @ 2020-09-25  4:51 UTC (permalink / raw)
  To: Alexander Viro
  Cc: linux-aio, linux-mips, David Howells, linux-mm, keyrings,
	sparclinux, linux-arch, linux-s390, linux-scsi, Arnd Bergmann,
	linux-block, io-uring, linux-arm-kernel, Jens Axboe, linux-parisc,
	netdev, linux-kernel, linux-security-module, David Laight,
	linux-fsdevel, Andrew Morton, linuxppc-dev
In-Reply-To: <20200925045146.1283714-1-hch@lst.de>

Now that import_iovec handles compat iovecs, the native version of
keyctl_instantiate_key_iov can be used for the compat case as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 security/keys/compat.c   | 36 ++----------------------------------
 security/keys/internal.h |  5 -----
 security/keys/keyctl.c   |  2 +-
 3 files changed, 3 insertions(+), 40 deletions(-)

diff --git a/security/keys/compat.c b/security/keys/compat.c
index 7ae531db031cf8..1545efdca56227 100644
--- a/security/keys/compat.c
+++ b/security/keys/compat.c
@@ -11,38 +11,6 @@
 #include <linux/slab.h>
 #include "internal.h"
 
-/*
- * Instantiate a key with the specified compatibility multipart payload and
- * link the key into the destination keyring if one is given.
- *
- * The caller must have the appropriate instantiation permit set for this to
- * work (see keyctl_assume_authority).  No other permissions are required.
- *
- * If successful, 0 will be returned.
- */
-static long compat_keyctl_instantiate_key_iov(
-	key_serial_t id,
-	const struct compat_iovec __user *_payload_iov,
-	unsigned ioc,
-	key_serial_t ringid)
-{
-	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
-	struct iov_iter from;
-	long ret;
-
-	if (!_payload_iov)
-		ioc = 0;
-
-	ret = import_iovec(WRITE, (const struct iovec __user *)_payload_iov,
-			   ioc, ARRAY_SIZE(iovstack), &iov, &from);
-	if (ret < 0)
-		return ret;
-
-	ret = keyctl_instantiate_key_common(id, &from, ringid);
-	kfree(iov);
-	return ret;
-}
-
 /*
  * The key control system call, 32-bit compatibility version for 64-bit archs
  */
@@ -113,8 +81,8 @@ COMPAT_SYSCALL_DEFINE5(keyctl, u32, option,
 		return keyctl_reject_key(arg2, arg3, arg4, arg5);
 
 	case KEYCTL_INSTANTIATE_IOV:
-		return compat_keyctl_instantiate_key_iov(
-			arg2, compat_ptr(arg3), arg4, arg5);
+		return keyctl_instantiate_key_iov(arg2, compat_ptr(arg3), arg4,
+						  arg5);
 
 	case KEYCTL_INVALIDATE:
 		return keyctl_invalidate_key(arg2);
diff --git a/security/keys/internal.h b/security/keys/internal.h
index 338a526cbfa516..9b9cf3b6fcbb4d 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -262,11 +262,6 @@ extern long keyctl_instantiate_key_iov(key_serial_t,
 				       const struct iovec __user *,
 				       unsigned, key_serial_t);
 extern long keyctl_invalidate_key(key_serial_t);
-
-struct iov_iter;
-extern long keyctl_instantiate_key_common(key_serial_t,
-					  struct iov_iter *,
-					  key_serial_t);
 extern long keyctl_restrict_keyring(key_serial_t id,
 				    const char __user *_type,
 				    const char __user *_restriction);
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 9febd37a168fd0..e26bbccda7ccee 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -1164,7 +1164,7 @@ static int keyctl_change_reqkey_auth(struct key *key)
  *
  * If successful, 0 will be returned.
  */
-long keyctl_instantiate_key_common(key_serial_t id,
+static long keyctl_instantiate_key_common(key_serial_t id,
 				   struct iov_iter *from,
 				   key_serial_t ringid)
 {
-- 
2.28.0


^ permalink raw reply related

* [PATCH 3/9] iov_iter: refactor rw_copy_check_uvector and import_iovec
From: Christoph Hellwig @ 2020-09-25  4:51 UTC (permalink / raw)
  To: Alexander Viro
  Cc: linux-aio, linux-mips, David Howells, linux-mm, keyrings,
	sparclinux, linux-arch, linux-s390, linux-scsi, Arnd Bergmann,
	linux-block, io-uring, linux-arm-kernel, Jens Axboe, linux-parisc,
	netdev, linux-kernel, linux-security-module, David Laight,
	linux-fsdevel, Andrew Morton, linuxppc-dev
In-Reply-To: <20200925045146.1283714-1-hch@lst.de>

Split rw_copy_check_uvector into two new helpers with more sensible
calling conventions:

 - iovec_from_user copies a iovec from userspace either into the provided
   stack buffer if it fits, or allocates a new buffer for it.  Returns
   the actually used iovec.  It also verifies that iov_len does fit a
   signed type, and handles compat iovecs if the compat flag is set.
 - __import_iovec consolidates the native and compat versions of
   import_iovec. It calls iovec_from_user, then validates each iovec
   actually points to user addresses, and ensures the total length
   doesn't overflow.

This has two major implications:

 - the access_process_vm case loses the total lenght checking, which
   wasn't required anyway, given that each call receives two iovecs
   for the local and remote side of the operation, and it verifies
   the total length on the local side already.
 - instead of a single loop there now are two loops over the iovecs.
   Given that the iovecs are cache hot this doesn't make a major
   difference

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/compat.h |   6 -
 include/linux/fs.h     |  13 --
 include/linux/uio.h    |  12 +-
 lib/iov_iter.c         | 300 ++++++++++++++++-------------------------
 mm/process_vm_access.c |  34 +++--
 5 files changed, 138 insertions(+), 227 deletions(-)

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 654c1ec36671a4..b930de791ff16b 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -451,12 +451,6 @@ extern long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 
 struct epoll_event;	/* fortunately, this one is fixed-layout */
 
-extern ssize_t compat_rw_copy_check_uvector(int type,
-		const struct compat_iovec __user *uvector,
-		unsigned long nr_segs,
-		unsigned long fast_segs, struct iovec *fast_pointer,
-		struct iovec **ret_pointer);
-
 extern void __user *compat_alloc_user_space(unsigned long len);
 
 int compat_restore_altstack(const compat_stack_t __user *uss);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7519ae003a082c..e69b45b6cc7b5f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -178,14 +178,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 /* File supports async buffered reads */
 #define FMODE_BUF_RASYNC	((__force fmode_t)0x40000000)
 
-/*
- * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
- * that indicates that they should check the contents of the iovec are
- * valid, but not check the memory that the iovec elements
- * points too.
- */
-#define CHECK_IOVEC_ONLY -1
-
 /*
  * Attribute flags.  These should be or-ed together to figure out what
  * has been changed!
@@ -1887,11 +1879,6 @@ static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
 	return file->f_op->mmap(file, vma);
 }
 
-ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
-			      unsigned long nr_segs, unsigned long fast_segs,
-			      struct iovec *fast_pointer,
-			      struct iovec **ret_pointer);
-
 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
 extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 3835a8a8e9eae0..92c11fe41c6228 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -266,9 +266,15 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct
 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
 		struct iov_iter *i);
 
-ssize_t import_iovec(int type, const struct iovec __user * uvector,
-		 unsigned nr_segs, unsigned fast_segs,
-		 struct iovec **iov, struct iov_iter *i);
+struct iovec *iovec_from_user(const struct iovec __user *uvector,
+		unsigned long nr_segs, unsigned long fast_segs,
+		struct iovec *fast_iov, bool compat);
+ssize_t import_iovec(int type, const struct iovec __user *uvec,
+		 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
+		 struct iov_iter *i);
+ssize_t __import_iovec(int type, const struct iovec __user *uvec,
+		 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
+		 struct iov_iter *i, bool compat);
 
 #ifdef CONFIG_COMPAT
 struct compat_iovec;
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index ccea9db3f72be8..d5d8afe31fca16 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -7,6 +7,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/splice.h>
+#include <linux/compat.h>
 #include <net/checksum.h>
 #include <linux/scatterlist.h>
 #include <linux/instrumented.h>
@@ -1650,107 +1651,133 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
 }
 EXPORT_SYMBOL(dup_iter);
 
-/**
- * rw_copy_check_uvector() - Copy an array of &struct iovec from userspace
- *     into the kernel and check that it is valid.
- *
- * @type: One of %CHECK_IOVEC_ONLY, %READ, or %WRITE.
- * @uvector: Pointer to the userspace array.
- * @nr_segs: Number of elements in userspace array.
- * @fast_segs: Number of elements in @fast_pointer.
- * @fast_pointer: Pointer to (usually small on-stack) kernel array.
- * @ret_pointer: (output parameter) Pointer to a variable that will point to
- *     either @fast_pointer, a newly allocated kernel array, or NULL,
- *     depending on which array was used.
- *
- * This function copies an array of &struct iovec of @nr_segs from
- * userspace into the kernel and checks that each element is valid (e.g.
- * it does not point to a kernel address or cause overflow by being too
- * large, etc.).
- *
- * As an optimization, the caller may provide a pointer to a small
- * on-stack array in @fast_pointer, typically %UIO_FASTIOV elements long
- * (the size of this array, or 0 if unused, should be given in @fast_segs).
- *
- * @ret_pointer will always point to the array that was used, so the
- * caller must take care not to call kfree() on it e.g. in case the
- * @fast_pointer array was used and it was allocated on the stack.
- *
- * Return: The total number of bytes covered by the iovec array on success
- *   or a negative error code on error.
- */
-ssize_t rw_copy_check_uvector(int type, const struct iovec __user *uvector,
-		unsigned long nr_segs, unsigned long fast_segs,
-		struct iovec *fast_pointer, struct iovec **ret_pointer)
+static int copy_compat_iovec_from_user(struct iovec *iov,
+		const struct iovec __user *uvec, unsigned long nr_segs)
+{
+	const struct compat_iovec __user *uiov =
+		(const struct compat_iovec __user *)uvec;
+	int ret = -EFAULT, i;
+
+	if (!user_access_begin(uvec, nr_segs * sizeof(*uvec)))
+		return -EFAULT;
+
+	for (i = 0; i < nr_segs; i++) {
+		compat_uptr_t buf;
+		compat_ssize_t len;
+
+		unsafe_get_user(len, &uiov[i].iov_len, uaccess_end);
+		unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end);
+
+		/* check for compat_size_t not fitting in compat_ssize_t .. */
+		if (len < 0) {
+			ret = -EINVAL;
+			goto uaccess_end;
+		}
+		iov[i].iov_base = compat_ptr(buf);
+		iov[i].iov_len = len;
+	}
+
+	ret = 0;
+uaccess_end:
+	user_access_end();
+	return ret;
+}
+		
+static int copy_iovec_from_user(struct iovec *iov,
+		const struct iovec __user *uvec, unsigned long nr_segs)
 {
 	unsigned long seg;
-	ssize_t ret;
-	struct iovec *iov = fast_pointer;
 
-	/*
-	 * SuS says "The readv() function *may* fail if the iovcnt argument
-	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
-	 * traditionally returned zero for zero segments, so...
-	 */
-	if (nr_segs == 0) {
-		ret = 0;
-		goto out;
+	if (copy_from_user(iov, uvec, nr_segs * sizeof(*uvec)))
+		return -EFAULT;
+	for (seg = 0; seg < nr_segs; seg++) {
+		if ((ssize_t)iov[seg].iov_len < 0)
+			return -EINVAL;
 	}
 
+	return 0;
+}
+
+struct iovec *iovec_from_user(const struct iovec __user *uvec,
+		unsigned long nr_segs, unsigned long fast_segs,
+		struct iovec *fast_iov, bool compat)
+{
+	struct iovec *iov = fast_iov;
+	int ret;
+
 	/*
-	 * First get the "struct iovec" from user memory and
-	 * verify all the pointers
+	 * SuS says "The readv() function *may* fail if the iovcnt argument was
+	 * less than or equal to 0, or greater than {IOV_MAX}.  Linux has
+	 * traditionally returned zero for zero segments, so...
 	 */
-	if (nr_segs > UIO_MAXIOV) {
-		ret = -EINVAL;
-		goto out;
-	}
+	if (nr_segs == 0)
+		return iov;
+	if (nr_segs > UIO_MAXIOV)
+		return ERR_PTR(-EINVAL);
 	if (nr_segs > fast_segs) {
 		iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
-		if (iov == NULL) {
-			ret = -ENOMEM;
-			goto out;
-		}
+		if (!iov)
+			return ERR_PTR(-ENOMEM);
 	}
-	if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
-		ret = -EFAULT;
-		goto out;
+
+	if (compat)
+		ret = copy_compat_iovec_from_user(iov, uvec, nr_segs);
+	else
+		ret = copy_iovec_from_user(iov, uvec, nr_segs);
+	if (ret) {
+		if (iov != fast_iov)
+			kfree(iov);
+		return ERR_PTR(ret);
+	}
+
+	return iov;
+}
+
+ssize_t __import_iovec(int type, const struct iovec __user *uvec,
+		 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
+		 struct iov_iter *i, bool compat)
+{
+	ssize_t total_len = 0;
+	unsigned long seg;
+	struct iovec *iov;
+
+	iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat);
+	if (IS_ERR(iov)) {
+		*iovp = NULL;
+		return PTR_ERR(iov);
 	}
 
 	/*
-	 * According to the Single Unix Specification we should return EINVAL
-	 * if an element length is < 0 when cast to ssize_t or if the
-	 * total length would overflow the ssize_t return value of the
-	 * system call.
+	 * According to the Single Unix Specification we should return EINVAL if
+	 * an element length is < 0 when cast to ssize_t or if the total length
+	 * would overflow the ssize_t return value of the system call.
 	 *
 	 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
 	 * overflow case.
 	 */
-	ret = 0;
 	for (seg = 0; seg < nr_segs; seg++) {
-		void __user *buf = iov[seg].iov_base;
 		ssize_t len = (ssize_t)iov[seg].iov_len;
 
-		/* see if we we're about to use an invalid len or if
-		 * it's about to overflow ssize_t */
-		if (len < 0) {
-			ret = -EINVAL;
-			goto out;
+		if (!access_ok(iov[seg].iov_base, len)) {
+			if (iov != *iovp)
+				kfree(iov);
+			*iovp = NULL;
+			return -EFAULT;
 		}
-		if (type >= 0
-		    && unlikely(!access_ok(buf, len))) {
-			ret = -EFAULT;
-			goto out;
-		}
-		if (len > MAX_RW_COUNT - ret) {
-			len = MAX_RW_COUNT - ret;
+
+		if (len > MAX_RW_COUNT - total_len) {
+			len = MAX_RW_COUNT - total_len;
 			iov[seg].iov_len = len;
 		}
-		ret += len;
+		total_len += len;
 	}
-out:
-	*ret_pointer = iov;
-	return ret;
+
+	iov_iter_init(i, type, iov, nr_segs, total_len);
+	if (iov == *iovp)
+		*iovp = NULL;
+	else
+		*iovp = iov;
+	return total_len;
 }
 
 /**
@@ -1759,10 +1786,10 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user *uvector,
  *     &struct iov_iter iterator to access it.
  *
  * @type: One of %READ or %WRITE.
- * @uvector: Pointer to the userspace array.
+ * @uvec: Pointer to the userspace array.
  * @nr_segs: Number of elements in userspace array.
  * @fast_segs: Number of elements in @iov.
- * @iov: (input and output parameter) Pointer to pointer to (usually small
+ * @iovp: (input and output parameter) Pointer to pointer to (usually small
  *     on-stack) kernel array.
  * @i: Pointer to iterator that will be initialized on success.
  *
@@ -1775,120 +1802,21 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user *uvector,
  *
  * Return: Negative error code on error, bytes imported on success
  */
-ssize_t import_iovec(int type, const struct iovec __user * uvector,
+ssize_t import_iovec(int type, const struct iovec __user *uvec,
 		 unsigned nr_segs, unsigned fast_segs,
-		 struct iovec **iov, struct iov_iter *i)
+		 struct iovec **iovp, struct iov_iter *i)
 {
-	ssize_t n;
-	struct iovec *p;
-	n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
-				  *iov, &p);
-	if (n < 0) {
-		if (p != *iov)
-			kfree(p);
-		*iov = NULL;
-		return n;
-	}
-	iov_iter_init(i, type, p, nr_segs, n);
-	*iov = p == *iov ? NULL : p;
-	return n;
+	return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i, false);
 }
 EXPORT_SYMBOL(import_iovec);
 
 #ifdef CONFIG_COMPAT
-#include <linux/compat.h>
-
-ssize_t compat_rw_copy_check_uvector(int type,
-		const struct compat_iovec __user *uvector,
-		unsigned long nr_segs, unsigned long fast_segs,
-		struct iovec *fast_pointer, struct iovec **ret_pointer)
-{
-	compat_ssize_t tot_len;
-	struct iovec *iov = *ret_pointer = fast_pointer;
-	ssize_t ret = 0;
-	int seg;
-
-	/*
-	 * SuS says "The readv() function *may* fail if the iovcnt argument
-	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
-	 * traditionally returned zero for zero segments, so...
-	 */
-	if (nr_segs == 0)
-		goto out;
-
-	ret = -EINVAL;
-	if (nr_segs > UIO_MAXIOV)
-		goto out;
-	if (nr_segs > fast_segs) {
-		ret = -ENOMEM;
-		iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
-		if (iov == NULL)
-			goto out;
-	}
-	*ret_pointer = iov;
-
-	ret = -EFAULT;
-	if (!access_ok(uvector, nr_segs*sizeof(*uvector)))
-		goto out;
-
-	/*
-	 * Single unix specification:
-	 * We should -EINVAL if an element length is not >= 0 and fitting an
-	 * ssize_t.
-	 *
-	 * In Linux, the total length is limited to MAX_RW_COUNT, there is
-	 * no overflow possibility.
-	 */
-	tot_len = 0;
-	ret = -EINVAL;
-	for (seg = 0; seg < nr_segs; seg++) {
-		compat_uptr_t buf;
-		compat_ssize_t len;
-
-		if (__get_user(len, &uvector->iov_len) ||
-		   __get_user(buf, &uvector->iov_base)) {
-			ret = -EFAULT;
-			goto out;
-		}
-		if (len < 0)	/* size_t not fitting in compat_ssize_t .. */
-			goto out;
-		if (type >= 0 &&
-		    !access_ok(compat_ptr(buf), len)) {
-			ret = -EFAULT;
-			goto out;
-		}
-		if (len > MAX_RW_COUNT - tot_len)
-			len = MAX_RW_COUNT - tot_len;
-		tot_len += len;
-		iov->iov_base = compat_ptr(buf);
-		iov->iov_len = (compat_size_t) len;
-		uvector++;
-		iov++;
-	}
-	ret = tot_len;
-
-out:
-	return ret;
-}
-
-ssize_t compat_import_iovec(int type,
-		const struct compat_iovec __user * uvector,
-		unsigned nr_segs, unsigned fast_segs,
-		struct iovec **iov, struct iov_iter *i)
+ssize_t compat_import_iovec(int type, const struct compat_iovec __user *uvec,
+		unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
+		struct iov_iter *i)
 {
-	ssize_t n;
-	struct iovec *p;
-	n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
-				  *iov, &p);
-	if (n < 0) {
-		if (p != *iov)
-			kfree(p);
-		*iov = NULL;
-		return n;
-	}
-	iov_iter_init(i, type, p, nr_segs, n);
-	*iov = p == *iov ? NULL : p;
-	return n;
+	return __import_iovec(type, (const struct iovec __user *)uvec, nr_segs,
+			     fast_segs, iovp, i, true);
 }
 EXPORT_SYMBOL(compat_import_iovec);
 #endif
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index 29c052099affdc..5e728c20c2bead 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -276,20 +276,17 @@ static ssize_t process_vm_rw(pid_t pid,
 	if (rc < 0)
 		return rc;
 	if (!iov_iter_count(&iter))
-		goto free_iovecs;
-
-	rc = rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, UIO_FASTIOV,
-				   iovstack_r, &iov_r);
-	if (rc <= 0)
-		goto free_iovecs;
-
+		goto free_iov_l;
+	iov_r = iovec_from_user(rvec, riovcnt, UIO_FASTIOV, iovstack_r, false);
+	if (IS_ERR(iov_r)) {
+		rc = PTR_ERR(iov_r);
+		goto free_iov_l;
+	}
 	rc = process_vm_rw_core(pid, &iter, iov_r, riovcnt, flags, vm_write);
-
-free_iovecs:
 	if (iov_r != iovstack_r)
 		kfree(iov_r);
+free_iov_l:
 	kfree(iov_l);
-
 	return rc;
 }
 
@@ -333,18 +330,17 @@ compat_process_vm_rw(compat_pid_t pid,
 	if (rc < 0)
 		return rc;
 	if (!iov_iter_count(&iter))
-		goto free_iovecs;
-	rc = compat_rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt,
-					  UIO_FASTIOV, iovstack_r,
-					  &iov_r);
-	if (rc <= 0)
-		goto free_iovecs;
-
+		goto free_iov_l;
+	iov_r = iovec_from_user((const struct iovec __user *)rvec, riovcnt,
+				UIO_FASTIOV, iovstack_r, true);
+	if (IS_ERR(iov_r)) {
+		rc = PTR_ERR(iov_r);
+		goto free_iov_l;
+	}
 	rc = process_vm_rw_core(pid, &iter, iov_r, riovcnt, flags, vm_write);
-
-free_iovecs:
 	if (iov_r != iovstack_r)
 		kfree(iov_r);
+free_iov_l:
 	kfree(iov_l);
 	return rc;
 }
-- 
2.28.0


^ permalink raw reply related

* [PATCH 2/9] iov_iter: move rw_copy_check_uvector() into lib/iov_iter.c
From: Christoph Hellwig @ 2020-09-25  4:51 UTC (permalink / raw)
  To: Alexander Viro
  Cc: linux-aio, linux-mips, David Howells, linux-mm, keyrings,
	sparclinux, linux-arch, linux-s390, linux-scsi, Arnd Bergmann,
	linux-block, io-uring, linux-arm-kernel, Jens Axboe, linux-parisc,
	netdev, linux-kernel, linux-security-module, David Laight,
	linux-fsdevel, Andrew Morton, linuxppc-dev
In-Reply-To: <20200925045146.1283714-1-hch@lst.de>

From: David Laight <David.Laight@ACULAB.COM>

This lets the compiler inline it into import_iovec() generating
much better code.

Signed-off-by: David Laight <david.laight@aculab.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/read_write.c | 179 ------------------------------------------------
 lib/iov_iter.c  | 176 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 176 insertions(+), 179 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index 5db58b8c78d0dd..e5e891a88442ef 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -752,185 +752,6 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
 	return ret;
 }
 
-/**
- * rw_copy_check_uvector() - Copy an array of &struct iovec from userspace
- *     into the kernel and check that it is valid.
- *
- * @type: One of %CHECK_IOVEC_ONLY, %READ, or %WRITE.
- * @uvector: Pointer to the userspace array.
- * @nr_segs: Number of elements in userspace array.
- * @fast_segs: Number of elements in @fast_pointer.
- * @fast_pointer: Pointer to (usually small on-stack) kernel array.
- * @ret_pointer: (output parameter) Pointer to a variable that will point to
- *     either @fast_pointer, a newly allocated kernel array, or NULL,
- *     depending on which array was used.
- *
- * This function copies an array of &struct iovec of @nr_segs from
- * userspace into the kernel and checks that each element is valid (e.g.
- * it does not point to a kernel address or cause overflow by being too
- * large, etc.).
- *
- * As an optimization, the caller may provide a pointer to a small
- * on-stack array in @fast_pointer, typically %UIO_FASTIOV elements long
- * (the size of this array, or 0 if unused, should be given in @fast_segs).
- *
- * @ret_pointer will always point to the array that was used, so the
- * caller must take care not to call kfree() on it e.g. in case the
- * @fast_pointer array was used and it was allocated on the stack.
- *
- * Return: The total number of bytes covered by the iovec array on success
- *   or a negative error code on error.
- */
-ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
-			      unsigned long nr_segs, unsigned long fast_segs,
-			      struct iovec *fast_pointer,
-			      struct iovec **ret_pointer)
-{
-	unsigned long seg;
-	ssize_t ret;
-	struct iovec *iov = fast_pointer;
-
-	/*
-	 * SuS says "The readv() function *may* fail if the iovcnt argument
-	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
-	 * traditionally returned zero for zero segments, so...
-	 */
-	if (nr_segs == 0) {
-		ret = 0;
-		goto out;
-	}
-
-	/*
-	 * First get the "struct iovec" from user memory and
-	 * verify all the pointers
-	 */
-	if (nr_segs > UIO_MAXIOV) {
-		ret = -EINVAL;
-		goto out;
-	}
-	if (nr_segs > fast_segs) {
-		iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
-		if (iov == NULL) {
-			ret = -ENOMEM;
-			goto out;
-		}
-	}
-	if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
-		ret = -EFAULT;
-		goto out;
-	}
-
-	/*
-	 * According to the Single Unix Specification we should return EINVAL
-	 * if an element length is < 0 when cast to ssize_t or if the
-	 * total length would overflow the ssize_t return value of the
-	 * system call.
-	 *
-	 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
-	 * overflow case.
-	 */
-	ret = 0;
-	for (seg = 0; seg < nr_segs; seg++) {
-		void __user *buf = iov[seg].iov_base;
-		ssize_t len = (ssize_t)iov[seg].iov_len;
-
-		/* see if we we're about to use an invalid len or if
-		 * it's about to overflow ssize_t */
-		if (len < 0) {
-			ret = -EINVAL;
-			goto out;
-		}
-		if (type >= 0
-		    && unlikely(!access_ok(buf, len))) {
-			ret = -EFAULT;
-			goto out;
-		}
-		if (len > MAX_RW_COUNT - ret) {
-			len = MAX_RW_COUNT - ret;
-			iov[seg].iov_len = len;
-		}
-		ret += len;
-	}
-out:
-	*ret_pointer = iov;
-	return ret;
-}
-
-#ifdef CONFIG_COMPAT
-ssize_t compat_rw_copy_check_uvector(int type,
-		const struct compat_iovec __user *uvector, unsigned long nr_segs,
-		unsigned long fast_segs, struct iovec *fast_pointer,
-		struct iovec **ret_pointer)
-{
-	compat_ssize_t tot_len;
-	struct iovec *iov = *ret_pointer = fast_pointer;
-	ssize_t ret = 0;
-	int seg;
-
-	/*
-	 * SuS says "The readv() function *may* fail if the iovcnt argument
-	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
-	 * traditionally returned zero for zero segments, so...
-	 */
-	if (nr_segs == 0)
-		goto out;
-
-	ret = -EINVAL;
-	if (nr_segs > UIO_MAXIOV)
-		goto out;
-	if (nr_segs > fast_segs) {
-		ret = -ENOMEM;
-		iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
-		if (iov == NULL)
-			goto out;
-	}
-	*ret_pointer = iov;
-
-	ret = -EFAULT;
-	if (!access_ok(uvector, nr_segs*sizeof(*uvector)))
-		goto out;
-
-	/*
-	 * Single unix specification:
-	 * We should -EINVAL if an element length is not >= 0 and fitting an
-	 * ssize_t.
-	 *
-	 * In Linux, the total length is limited to MAX_RW_COUNT, there is
-	 * no overflow possibility.
-	 */
-	tot_len = 0;
-	ret = -EINVAL;
-	for (seg = 0; seg < nr_segs; seg++) {
-		compat_uptr_t buf;
-		compat_ssize_t len;
-
-		if (__get_user(len, &uvector->iov_len) ||
-		   __get_user(buf, &uvector->iov_base)) {
-			ret = -EFAULT;
-			goto out;
-		}
-		if (len < 0)	/* size_t not fitting in compat_ssize_t .. */
-			goto out;
-		if (type >= 0 &&
-		    !access_ok(compat_ptr(buf), len)) {
-			ret = -EFAULT;
-			goto out;
-		}
-		if (len > MAX_RW_COUNT - tot_len)
-			len = MAX_RW_COUNT - tot_len;
-		tot_len += len;
-		iov->iov_base = compat_ptr(buf);
-		iov->iov_len = (compat_size_t) len;
-		uvector++;
-		iov++;
-	}
-	ret = tot_len;
-
-out:
-	return ret;
-}
-#endif
-
 static ssize_t do_iter_read(struct file *file, struct iov_iter *iter,
 		loff_t *pos, rwf_t flags)
 {
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 5e40786c8f1232..ccea9db3f72be8 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1650,6 +1650,109 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
 }
 EXPORT_SYMBOL(dup_iter);
 
+/**
+ * rw_copy_check_uvector() - Copy an array of &struct iovec from userspace
+ *     into the kernel and check that it is valid.
+ *
+ * @type: One of %CHECK_IOVEC_ONLY, %READ, or %WRITE.
+ * @uvector: Pointer to the userspace array.
+ * @nr_segs: Number of elements in userspace array.
+ * @fast_segs: Number of elements in @fast_pointer.
+ * @fast_pointer: Pointer to (usually small on-stack) kernel array.
+ * @ret_pointer: (output parameter) Pointer to a variable that will point to
+ *     either @fast_pointer, a newly allocated kernel array, or NULL,
+ *     depending on which array was used.
+ *
+ * This function copies an array of &struct iovec of @nr_segs from
+ * userspace into the kernel and checks that each element is valid (e.g.
+ * it does not point to a kernel address or cause overflow by being too
+ * large, etc.).
+ *
+ * As an optimization, the caller may provide a pointer to a small
+ * on-stack array in @fast_pointer, typically %UIO_FASTIOV elements long
+ * (the size of this array, or 0 if unused, should be given in @fast_segs).
+ *
+ * @ret_pointer will always point to the array that was used, so the
+ * caller must take care not to call kfree() on it e.g. in case the
+ * @fast_pointer array was used and it was allocated on the stack.
+ *
+ * Return: The total number of bytes covered by the iovec array on success
+ *   or a negative error code on error.
+ */
+ssize_t rw_copy_check_uvector(int type, const struct iovec __user *uvector,
+		unsigned long nr_segs, unsigned long fast_segs,
+		struct iovec *fast_pointer, struct iovec **ret_pointer)
+{
+	unsigned long seg;
+	ssize_t ret;
+	struct iovec *iov = fast_pointer;
+
+	/*
+	 * SuS says "The readv() function *may* fail if the iovcnt argument
+	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
+	 * traditionally returned zero for zero segments, so...
+	 */
+	if (nr_segs == 0) {
+		ret = 0;
+		goto out;
+	}
+
+	/*
+	 * First get the "struct iovec" from user memory and
+	 * verify all the pointers
+	 */
+	if (nr_segs > UIO_MAXIOV) {
+		ret = -EINVAL;
+		goto out;
+	}
+	if (nr_segs > fast_segs) {
+		iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
+		if (iov == NULL) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	}
+	if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	/*
+	 * According to the Single Unix Specification we should return EINVAL
+	 * if an element length is < 0 when cast to ssize_t or if the
+	 * total length would overflow the ssize_t return value of the
+	 * system call.
+	 *
+	 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
+	 * overflow case.
+	 */
+	ret = 0;
+	for (seg = 0; seg < nr_segs; seg++) {
+		void __user *buf = iov[seg].iov_base;
+		ssize_t len = (ssize_t)iov[seg].iov_len;
+
+		/* see if we we're about to use an invalid len or if
+		 * it's about to overflow ssize_t */
+		if (len < 0) {
+			ret = -EINVAL;
+			goto out;
+		}
+		if (type >= 0
+		    && unlikely(!access_ok(buf, len))) {
+			ret = -EFAULT;
+			goto out;
+		}
+		if (len > MAX_RW_COUNT - ret) {
+			len = MAX_RW_COUNT - ret;
+			iov[seg].iov_len = len;
+		}
+		ret += len;
+	}
+out:
+	*ret_pointer = iov;
+	return ret;
+}
+
 /**
  * import_iovec() - Copy an array of &struct iovec from userspace
  *     into the kernel, check that it is valid, and initialize a new
@@ -1695,6 +1798,79 @@ EXPORT_SYMBOL(import_iovec);
 #ifdef CONFIG_COMPAT
 #include <linux/compat.h>
 
+ssize_t compat_rw_copy_check_uvector(int type,
+		const struct compat_iovec __user *uvector,
+		unsigned long nr_segs, unsigned long fast_segs,
+		struct iovec *fast_pointer, struct iovec **ret_pointer)
+{
+	compat_ssize_t tot_len;
+	struct iovec *iov = *ret_pointer = fast_pointer;
+	ssize_t ret = 0;
+	int seg;
+
+	/*
+	 * SuS says "The readv() function *may* fail if the iovcnt argument
+	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
+	 * traditionally returned zero for zero segments, so...
+	 */
+	if (nr_segs == 0)
+		goto out;
+
+	ret = -EINVAL;
+	if (nr_segs > UIO_MAXIOV)
+		goto out;
+	if (nr_segs > fast_segs) {
+		ret = -ENOMEM;
+		iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
+		if (iov == NULL)
+			goto out;
+	}
+	*ret_pointer = iov;
+
+	ret = -EFAULT;
+	if (!access_ok(uvector, nr_segs*sizeof(*uvector)))
+		goto out;
+
+	/*
+	 * Single unix specification:
+	 * We should -EINVAL if an element length is not >= 0 and fitting an
+	 * ssize_t.
+	 *
+	 * In Linux, the total length is limited to MAX_RW_COUNT, there is
+	 * no overflow possibility.
+	 */
+	tot_len = 0;
+	ret = -EINVAL;
+	for (seg = 0; seg < nr_segs; seg++) {
+		compat_uptr_t buf;
+		compat_ssize_t len;
+
+		if (__get_user(len, &uvector->iov_len) ||
+		   __get_user(buf, &uvector->iov_base)) {
+			ret = -EFAULT;
+			goto out;
+		}
+		if (len < 0)	/* size_t not fitting in compat_ssize_t .. */
+			goto out;
+		if (type >= 0 &&
+		    !access_ok(compat_ptr(buf), len)) {
+			ret = -EFAULT;
+			goto out;
+		}
+		if (len > MAX_RW_COUNT - tot_len)
+			len = MAX_RW_COUNT - tot_len;
+		tot_len += len;
+		iov->iov_base = compat_ptr(buf);
+		iov->iov_len = (compat_size_t) len;
+		uvector++;
+		iov++;
+	}
+	ret = tot_len;
+
+out:
+	return ret;
+}
+
 ssize_t compat_import_iovec(int type,
 		const struct compat_iovec __user * uvector,
 		unsigned nr_segs, unsigned fast_segs,
-- 
2.28.0


^ permalink raw reply related

* [PATCH 1/9] compat.h: fix a spelling error in <linux/compat.h>
From: Christoph Hellwig @ 2020-09-25  4:51 UTC (permalink / raw)
  To: Alexander Viro
  Cc: linux-aio, linux-mips, David Howells, linux-mm, keyrings,
	sparclinux, linux-arch, linux-s390, linux-scsi, Arnd Bergmann,
	linux-block, io-uring, linux-arm-kernel, Jens Axboe, linux-parisc,
	netdev, linux-kernel, linux-security-module, David Laight,
	linux-fsdevel, Andrew Morton, linuxppc-dev
In-Reply-To: <20200925045146.1283714-1-hch@lst.de>

There is no compat_sys_readv64v2 syscall, only a compat_sys_preadv64v2
one.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/compat.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/compat.h b/include/linux/compat.h
index b354ce58966e2d..654c1ec36671a4 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -812,7 +812,7 @@ asmlinkage ssize_t compat_sys_pwritev2(compat_ulong_t fd,
 		const struct compat_iovec __user *vec,
 		compat_ulong_t vlen, u32 pos_low, u32 pos_high, rwf_t flags);
 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2
-asmlinkage long  compat_sys_readv64v2(unsigned long fd,
+asmlinkage long  compat_sys_preadv64v2(unsigned long fd,
 		const struct compat_iovec __user *vec,
 		unsigned long vlen, loff_t pos, rwf_t flags);
 #endif
-- 
2.28.0


^ permalink raw reply related

* Re: [PATCH kernel] powerpc/dma: Fix dma_map_ops::get_required_mask
From: Christoph Hellwig @ 2020-09-25  4:56 UTC (permalink / raw)
  To: Alexey Kardashevskiy
  Cc: Oliver OHalloran, linuxppc-dev@lists.ozlabs.org,
	Christoph Hellwig, Cédric Le Goater
In-Reply-To: <1acaab45-1796-7420-b4fd-b6add7f0d28f@ozlabs.ru>

On Thu, Sep 24, 2020 at 05:03:11PM +1000, Alexey Kardashevskiy wrote:
> May be... The current behavior is not wrong (after the fix) but not
> optimal either. Even with legacy PCI it should just result in failing
> attempt to set 64bit mask which drivers should still handle, i.e. choose
> a shorter mask.

Err, no.

> Why not ditch the whole dma_get_required_mask() and just fail on setting
> a bigger mask? Are these failures not handled in some drivers? Or there
> are cases when a shorter mask is better? Thanks,

Because that is a complete pain.  Think of it, the device/driver knows
what it supports.  For 98% of the modern devices that means all 64-bit
bits, and for most others this means 32-bits, with a few wackos that
support 48 bits or something like that.  The 98% just take any address
thrown at them, and the others just care that they never see an
address larger than what they support.  They could not care any less
if the systems supports 31, 36, 41, 48, 52, 55, 61 or 63-bit addressing,
an they most certainly should not implement stupid boilerplate code to
guess what addressing mode the system implements.  They just declare
what they support.

Then you have the 12 drivers for devices that can do optimizations if
they never see large DMA addresses.  They use the somewhat misnamed
dma_get_required_mask API to query what the largest address they might
see is and act based on that, while not putting any burden on all the
sane devices/drivers.

^ permalink raw reply

* Re: [PATCH v3] powerpc/pci: unmap legacy INTx interrupts when a PHB is removed
From: Cédric Le Goater @ 2020-09-25  5:00 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Alexey Kardashevskiy, Oliver O'Halloran, linuxppc-dev
In-Reply-To: <20200923074058.203393-1-clg@kaod.org>

On 9/23/20 9:40 AM, Cédric Le Goater wrote:
> When a passthrough IO adapter is removed from a pseries machine using
> hash MMU and the XIVE interrupt mode, the POWER hypervisor expects the
> guest OS to clear all page table entries related to the adapter. If
> some are still present, the RTAS call which isolates the PCI slot
> returns error 9001 "valid outstanding translations" and the removal of
> the IO adapter fails. This is because when the PHBs are scanned, Linux
> maps automatically the INTx interrupts in the Linux interrupt number
> space but these are never removed.
> 
> To solve this problem, we introduce a PPC platform specific
> pcibios_remove_bus() routine which clears all interrupt mappings when
> the bus is removed. This also clears the associated page table entries
> of the ESB pages when using XIVE.
> 
> For this purpose, we record the logical interrupt numbers of the
> mapped interrupt under the PHB structure and let pcibios_remove_bus()
> do the clean up.
> 
> Since some PCI adapters, like GPUs, use the "interrupt-map" property
> to describe interrupt mappings other than the legacy INTx interrupts,
> we can not restrict the size of the mapping array to PCI_NUM_INTX. The
> number of interrupt mappings is computed from the "interrupt-map"
> property and the mapping array is allocated accordingly.
> 
> Cc: "Oliver O'Halloran" <oohall@gmail.com>
> Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
> Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
> 
>  Changes in v3 :
> 
>  - NULLified 'irq_map' in pci_irq_map_dispose()


Forge that. I am going to move the kfree() in the routine freeing the 
PCI controller structure.

Thanks,

C. 

^ permalink raw reply

* [PATCH] powerpc/pci: Fix PHB removal/rescan on PowerNV
From: Cédric Le Goater @ 2020-09-25  9:22 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Alexey Kardashevskiy, Oliver O'Halloran, linuxppc-dev,
	Cédric Le Goater

To fix an issue with PHB hotplug on pSeries machine (HPT/XIVE), commit
3a3181e16fbd introduced a PPC specific pcibios_remove_bus() routine to
clear all interrupt mappings when the bus is removed. This routine
frees an array allocated in pcibios_scan_phb().

This broke PHB hotplug on PowerNV because, when a PHB is removed and
re-scanned through sysfs, the PCI layer un-assigns and re-assigns
resources to the PHB but does not destroy and recreate the PCI
controller structure. Since pcibios_remove_bus() does not clear the
'irq_map' array pointer, a second removal of the PHB will try to free
the array a second time and corrupt memory.

Free the 'irq_map' array in pcibios_free_controller() to fix
corruption and clear interrupt mapping after it has been
disposed. This to avoid filling up the array with successive
remove/rescan of a bus.

Cc: "Oliver O'Halloran" <oohall@gmail.com>
Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
Fixes: 3a3181e16fbd ("powerpc/pci: unmap legacy INTx interrupts when a PHB is removed")
Signed-off-by: Cédric Le Goater <clg@kaod.org>
---

Michael, I am not sure the Fixes tag is required. Feel free to drop
it. 

---
 arch/powerpc/kernel/pci-common.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index deb831f0ae13..6fc228e0359d 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -143,6 +143,8 @@ void pcibios_free_controller(struct pci_controller *phb)
 	list_del(&phb->list_node);
 	spin_unlock(&hose_spinlock);
 
+	kfree(phb->irq_map);
+
 	if (phb->is_dynamic)
 		kfree(phb);
 }
@@ -450,10 +452,10 @@ static void pci_irq_map_dispose(struct pci_bus *bus)
 
 	pr_debug("PCI: Clearing interrupt mappings for PHB %04x:%02x...\n",
 		 pci_domain_nr(bus), bus->number);
-	for (i = 0; i < phb->irq_count; i++)
+	for (i = 0; i < phb->irq_count; i++) {
 		irq_dispose_mapping(phb->irq_map[i]);
-
-	kfree(phb->irq_map);
+		phb->irq_map[i] = 0;
+	}
 }
 
 void pcibios_remove_bus(struct pci_bus *bus)
-- 
2.25.4


^ permalink raw reply related

* [PATCH v2 3/3] selftests/lkdtm: Enable selftest for SLB multihit
From: Ganesh Goudar @ 2020-09-25 10:31 UTC (permalink / raw)
  To: linuxppc-dev, mpe; +Cc: msuchanek, Ganesh Goudar, keescook, npiggin, mahesh
In-Reply-To: <20200925103123.21102-1-ganeshgr@linux.ibm.com>

Add PPC_SLB_MULTIHIT to lkdtm selftest framework.

Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com>
---
 tools/testing/selftests/lkdtm/tests.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt
index 9d266e79c6a2..7eb3cf91c89e 100644
--- a/tools/testing/selftests/lkdtm/tests.txt
+++ b/tools/testing/selftests/lkdtm/tests.txt
@@ -70,3 +70,4 @@ USERCOPY_KERNEL
 USERCOPY_KERNEL_DS
 STACKLEAK_ERASING OK: the rest of the thread stack is properly erased
 CFI_FORWARD_PROTO
+PPC_SLB_MULTIHIT Recovered
-- 
2.26.2


^ permalink raw reply related

* [PATCH v2 2/3] lkdtm/powerpc: Add SLB multihit test
From: Ganesh Goudar @ 2020-09-25 10:31 UTC (permalink / raw)
  To: linuxppc-dev, mpe; +Cc: msuchanek, Ganesh Goudar, keescook, npiggin, mahesh
In-Reply-To: <20200925103123.21102-1-ganeshgr@linux.ibm.com>

Add support to inject slb multihit errors, to test machine
check handling.

Based on work by Mahesh Salgaonkar and Michal Suchánek.

Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
Cc: Michal Suchánek <msuchanek@suse.de>
Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com>
---
 drivers/misc/lkdtm/Makefile  |   4 ++
 drivers/misc/lkdtm/core.c    |   3 +
 drivers/misc/lkdtm/lkdtm.h   |   3 +
 drivers/misc/lkdtm/powerpc.c | 132 +++++++++++++++++++++++++++++++++++
 4 files changed, 142 insertions(+)
 create mode 100644 drivers/misc/lkdtm/powerpc.c

diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile
index c70b3822013f..6a82f407fbcd 100644
--- a/drivers/misc/lkdtm/Makefile
+++ b/drivers/misc/lkdtm/Makefile
@@ -11,6 +11,10 @@ lkdtm-$(CONFIG_LKDTM)		+= usercopy.o
 lkdtm-$(CONFIG_LKDTM)		+= stackleak.o
 lkdtm-$(CONFIG_LKDTM)		+= cfi.o
 
+ifeq ($(CONFIG_PPC64),y)
+lkdtm-$(CONFIG_LKDTM)		+= powerpc.o
+endif
+
 KASAN_SANITIZE_stackleak.o	:= n
 KCOV_INSTRUMENT_rodata.o	:= n
 
diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c
index a5e344df9166..8d5db42baa90 100644
--- a/drivers/misc/lkdtm/core.c
+++ b/drivers/misc/lkdtm/core.c
@@ -178,6 +178,9 @@ static const struct crashtype crashtypes[] = {
 #ifdef CONFIG_X86_32
 	CRASHTYPE(DOUBLE_FAULT),
 #endif
+#ifdef CONFIG_PPC64
+	CRASHTYPE(PPC_SLB_MULTIHIT),
+#endif
 };
 
 
diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h
index 8878538b2c13..b305bd511ee5 100644
--- a/drivers/misc/lkdtm/lkdtm.h
+++ b/drivers/misc/lkdtm/lkdtm.h
@@ -104,4 +104,7 @@ void lkdtm_STACKLEAK_ERASING(void);
 /* cfi.c */
 void lkdtm_CFI_FORWARD_PROTO(void);
 
+/* powerpc.c */
+void lkdtm_PPC_SLB_MULTIHIT(void);
+
 #endif
diff --git a/drivers/misc/lkdtm/powerpc.c b/drivers/misc/lkdtm/powerpc.c
new file mode 100644
index 000000000000..d6db18444757
--- /dev/null
+++ b/drivers/misc/lkdtm/powerpc.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+static inline unsigned long get_slb_index(void)
+{
+	unsigned long index;
+
+	index = get_paca()->stab_rr;
+
+	/*
+	 * simple round-robin replacement of slb starting at SLB_NUM_BOLTED.
+	 */
+	if (index < (mmu_slb_size - 1))
+		index++;
+	else
+		index = SLB_NUM_BOLTED;
+	get_paca()->stab_rr = index;
+	return index;
+}
+
+#define slb_esid_mask(ssize)	\
+	(((ssize) == MMU_SEGSIZE_256M) ? ESID_MASK : ESID_MASK_1T)
+
+static inline unsigned long mk_esid_data(unsigned long ea, int ssize,
+					 unsigned long slot)
+{
+	return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | slot;
+}
+
+#define slb_vsid_shift(ssize)	\
+	((ssize) == MMU_SEGSIZE_256M ? SLB_VSID_SHIFT : SLB_VSID_SHIFT_1T)
+
+static inline unsigned long mk_vsid_data(unsigned long ea, int ssize,
+					 unsigned long flags)
+{
+	return (get_kernel_vsid(ea, ssize) << slb_vsid_shift(ssize)) | flags |
+		((unsigned long)ssize << SLB_VSID_SSIZE_SHIFT);
+}
+
+static void insert_slb_entry(char *p, int ssize)
+{
+	unsigned long flags, entry;
+
+	flags = SLB_VSID_KERNEL | mmu_psize_defs[MMU_PAGE_64K].sllp;
+	preempt_disable();
+
+	entry = get_slb_index();
+	asm volatile("slbmte %0,%1" :
+			: "r" (mk_vsid_data((unsigned long)p, ssize, flags)),
+			  "r" (mk_esid_data((unsigned long)p, ssize, entry))
+			: "memory");
+
+	entry = get_slb_index();
+	asm volatile("slbmte %0,%1" :
+			: "r" (mk_vsid_data((unsigned long)p, ssize, flags)),
+			  "r" (mk_esid_data((unsigned long)p, ssize, entry))
+			: "memory");
+	preempt_enable();
+	p[0] = '!';
+}
+
+static void inject_vmalloc_slb_multihit(void)
+{
+	char *p;
+
+	p = vmalloc(2048);
+	if (!p)
+		return;
+
+	insert_slb_entry(p, MMU_SEGSIZE_1T);
+	vfree(p);
+}
+
+static void inject_kmalloc_slb_multihit(void)
+{
+	char *p;
+
+	p = kmalloc(2048, GFP_KERNEL);
+	if (!p)
+		return;
+
+	insert_slb_entry(p, MMU_SEGSIZE_1T);
+	kfree(p);
+}
+
+static void insert_dup_slb_entry_0(void)
+{
+	unsigned long test_address = 0xC000000000000000;
+	volatile unsigned long *test_ptr;
+	unsigned long entry, i = 0;
+	unsigned long esid, vsid;
+
+	test_ptr = (unsigned long *)test_address;
+	preempt_disable();
+
+	asm volatile("slbmfee  %0,%1" : "=r" (esid) : "r" (i));
+	asm volatile("slbmfev  %0,%1" : "=r" (vsid) : "r" (i));
+	entry = get_slb_index();
+
+	/* for i !=0 we would need to mask out the old entry number */
+	asm volatile("slbmte %0,%1" :
+			: "r" (vsid),
+			  "r" (esid | entry)
+			: "memory");
+
+	asm volatile("slbmfee  %0,%1" : "=r" (esid) : "r" (i));
+	asm volatile("slbmfev  %0,%1" : "=r" (vsid) : "r" (i));
+	entry = get_slb_index();
+
+	/* for i !=0 we would need to mask out the old entry number */
+	asm volatile("slbmte %0,%1" :
+			: "r" (vsid),
+			  "r" (esid | entry)
+			: "memory");
+
+	pr_info("lkdtm: %s accessing test address 0x%lx: 0x%lx\n",
+		__func__, test_address, *test_ptr);
+
+	preempt_enable();
+}
+
+void lkdtm_PPC_SLB_MULTIHIT(void)
+{
+	if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
+		inject_vmalloc_slb_multihit();
+		inject_kmalloc_slb_multihit();
+		insert_dup_slb_entry_0();
+	}
+	pr_info("Recovered from SLB multihit. (Ignore this message on non HPTE machines)");
+}
-- 
2.26.2


^ permalink raw reply related

* [PATCH v2 1/3] powerpc/mce: remove nmi_enter/exit from real mode handler
From: Ganesh Goudar @ 2020-09-25 10:31 UTC (permalink / raw)
  To: linuxppc-dev, mpe; +Cc: msuchanek, Ganesh Goudar, keescook, npiggin, mahesh
In-Reply-To: <20200925103123.21102-1-ganeshgr@linux.ibm.com>

Use of nmi_enter/exit in real mode handler causes the kernel to panic
and reboot on injecting slb mutihit on pseries machine running in hash
mmu mode, As these calls try to accesses memory outside RMO region in
real mode handler where translation is disabled.

Add check to not to use these calls on pseries machine running in hash
mmu mode.

Fixes: 116ac378bb3f ("powerpc/64s: machine check interrupt update NMI accounting")
Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com>
---
 arch/powerpc/kernel/mce.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index ada59f6c4298..3bf39dd5dd43 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -591,12 +591,14 @@ EXPORT_SYMBOL_GPL(machine_check_print_event_info);
 long notrace machine_check_early(struct pt_regs *regs)
 {
 	long handled = 0;
-	bool nested = in_nmi();
+	bool is_pseries_hpt_guest;
 	u8 ftrace_enabled = this_cpu_get_ftrace_enabled();
 
 	this_cpu_set_ftrace_enabled(0);
-
-	if (!nested)
+	is_pseries_hpt_guest = machine_is(pseries) &&
+			       mmu_has_feature(MMU_FTR_HPTE_TABLE);
+	/* Do not use nmi_enter/exit for pseries hpte guest */
+	if (!is_pseries_hpt_guest)
 		nmi_enter();
 
 	hv_nmi_check_nonrecoverable(regs);
@@ -607,7 +609,7 @@ long notrace machine_check_early(struct pt_regs *regs)
 	if (ppc_md.machine_check_early)
 		handled = ppc_md.machine_check_early(regs);
 
-	if (!nested)
+	if (!is_pseries_hpt_guest)
 		nmi_exit();
 
 	this_cpu_set_ftrace_enabled(ftrace_enabled);
-- 
2.26.2


^ permalink raw reply related

* [PATCH v2 0/3] powerpc/mce: Fix mce handler and add selftest
From: Ganesh Goudar @ 2020-09-25 10:31 UTC (permalink / raw)
  To: linuxppc-dev, mpe; +Cc: msuchanek, Ganesh Goudar, keescook, npiggin, mahesh

This patch series fixes mce handling for pseries, Adds LKDTM test
for SLB multihit recovery and enables selftest for the same,
basically to test MCE handling on pseries/powernv machines running
in hash mmu mode.

v2:
* Remove in_nmi check before calling nmi_enter/exit,
  as nesting is supported.
* Fix build errors and remove unused variables.
* Integrate error injection code into LKDTM.
* Add support to inject multihit in paca.

Ganesh Goudar (3):
  powerpc/mce: remove nmi_enter/exit from real mode handler
  lkdtm/powerpc: Add SLB multihit test
  selftests/lkdtm: Enable selftest for SLB multihit

 arch/powerpc/kernel/mce.c               |  10 +-
 drivers/misc/lkdtm/Makefile             |   4 +
 drivers/misc/lkdtm/core.c               |   3 +
 drivers/misc/lkdtm/lkdtm.h              |   3 +
 drivers/misc/lkdtm/powerpc.c            | 132 ++++++++++++++++++++++++
 tools/testing/selftests/lkdtm/tests.txt |   1 +
 6 files changed, 149 insertions(+), 4 deletions(-)
 create mode 100644 drivers/misc/lkdtm/powerpc.c

-- 
2.26.2


^ permalink raw reply

* Re: [PATCH v2 2/3] lkdtm/powerpc: Add SLB multihit test
From: kernel test robot @ 2020-09-25 14:04 UTC (permalink / raw)
  To: Ganesh Goudar, linuxppc-dev, mpe
  Cc: kbuild-all, keescook, mahesh, npiggin, Ganesh Goudar, msuchanek
In-Reply-To: <20200925103123.21102-3-ganeshgr@linux.ibm.com>

[-- Attachment #1: Type: text/plain, Size: 1950 bytes --]

Hi Ganesh,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on char-misc/char-misc-testing]
[also build test WARNING on powerpc/next v5.9-rc6 next-20200925]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Ganesh-Goudar/powerpc-mce-Fix-mce-handler-and-add-selftest/20200925-184048
base:   https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git 9eb29f2ed95edda511ce28651b1d7cdef3614c12
config: powerpc-allyesconfig (attached as .config)
compiler: powerpc64-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/c066c175c2699a6aec1b0a25f6b95746590d802a
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Ganesh-Goudar/powerpc-mce-Fix-mce-handler-and-add-selftest/20200925-184048
        git checkout c066c175c2699a6aec1b0a25f6b95746590d802a
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=powerpc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> drivers/misc/lkdtm/powerpc.c:124:6: warning: no previous prototype for 'lkdtm_PPC_SLB_MULTIHIT' [-Wmissing-prototypes]
     124 | void lkdtm_PPC_SLB_MULTIHIT(void)
         |      ^~~~~~~~~~~~~~~~~~~~~~

vim +/lkdtm_PPC_SLB_MULTIHIT +124 drivers/misc/lkdtm/powerpc.c

   123	
 > 124	void lkdtm_PPC_SLB_MULTIHIT(void)

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 70393 bytes --]

^ permalink raw reply

* Re: let import_iovec deal with compat_iovecs as well v4
From: Al Viro @ 2020-09-25 15:23 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-aio, linux-mips, David Howells, linux-mm, keyrings,
	sparclinux, linux-arch, linux-s390, linux-scsi, Arnd Bergmann,
	linux-block, io-uring, linux-arm-kernel, Jens Axboe, linux-parisc,
	netdev, linux-kernel, linux-security-module, David Laight,
	linux-fsdevel, Andrew Morton, linuxppc-dev
In-Reply-To: <20200925045146.1283714-1-hch@lst.de>

On Fri, Sep 25, 2020 at 06:51:37AM +0200, Christoph Hellwig wrote:
> Hi Al,
> 
> this series changes import_iovec to transparently deal with compat iovec
> structures, and then cleanups up a lot of code dupliation.

OK, I can live with that.  Applied, let's see if it passes smoke tests
into -next it goes.

^ permalink raw reply

* Re: [PATCH] rpadlpar_io:Add MODULE_DESCRIPTION entries to kernel modules
From: Bjorn Helgaas @ 2020-09-25 19:43 UTC (permalink / raw)
  To: Oliver O'Halloran
  Cc: Tyrel Datwyler, linux-pci, Linux Kernel Mailing List,
	Mamatha Inamdar, Bjorn Helgaas, linuxppc-dev
In-Reply-To: <CAOSf1CEv3v940FR_we70qCBME0qFXPizPT8EFbf3XyK2-fPDrw@mail.gmail.com>

On Thu, Sep 24, 2020 at 04:41:39PM +1000, Oliver O'Halloran wrote:
> On Thu, Sep 24, 2020 at 3:15 PM Mamatha Inamdar
> <mamatha4@linux.vnet.ibm.com> wrote:
> >
> > This patch adds a brief MODULE_DESCRIPTION to rpadlpar_io kernel modules
> > (descriptions taken from Kconfig file)
> >
> > Signed-off-by: Mamatha Inamdar <mamatha4@linux.vnet.ibm.com>
> > ---
> >  drivers/pci/hotplug/rpadlpar_core.c |    1 +
> >  1 file changed, 1 insertion(+)
> >
> > diff --git a/drivers/pci/hotplug/rpadlpar_core.c b/drivers/pci/hotplug/rpadlpar_core.c
> > index f979b70..bac65ed 100644
> > --- a/drivers/pci/hotplug/rpadlpar_core.c
> > +++ b/drivers/pci/hotplug/rpadlpar_core.c
> > @@ -478,3 +478,4 @@ static void __exit rpadlpar_io_exit(void)
> >  module_init(rpadlpar_io_init);
> >  module_exit(rpadlpar_io_exit);
> >  MODULE_LICENSE("GPL");
> > +MODULE_DESCRIPTION("RPA Dynamic Logical Partitioning driver for I/O slots");
> 
> RPA as a spec was superseded by PAPR in the early 2000s. Can we rename
> this already?
> 
> The only potential problem I can see is scripts doing: modprobe
> rpadlpar_io or similar
> 
> However, we should be able to fix that with a module alias.

Is MODULE_DESCRIPTION() connected with how modprobe works?

If this patch just improves documentation, without breaking users of
modprobe, I'm fine with it, even if it would be nice to rename to PAPR
or something in the future.

But, please use "git log --oneline drivers/pci/hotplug/rpadlpar*" and
match the style, and also look through the rest of drivers/pci/ to see
if we should do the same thing to any other modules.

Bjorn

^ permalink raw reply

* Re: [PATCH v2 2/3] lkdtm/powerpc: Add SLB multihit test
From: Kees Cook @ 2020-09-25 19:57 UTC (permalink / raw)
  To: Ganesh Goudar; +Cc: msuchanek, linuxppc-dev, mahesh, npiggin
In-Reply-To: <20200925103123.21102-3-ganeshgr@linux.ibm.com>

On Fri, Sep 25, 2020 at 04:01:22PM +0530, Ganesh Goudar wrote:
> Add support to inject slb multihit errors, to test machine
> check handling.

Thank you for more tests in here!

> 
> Based on work by Mahesh Salgaonkar and Michal Suchánek.
> 
> Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
> Cc: Michal Suchánek <msuchanek@suse.de>

Should these be Co-developed-by: with S-o-b?

> Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com>
> ---
>  drivers/misc/lkdtm/Makefile  |   4 ++
>  drivers/misc/lkdtm/core.c    |   3 +
>  drivers/misc/lkdtm/lkdtm.h   |   3 +
>  drivers/misc/lkdtm/powerpc.c | 132 +++++++++++++++++++++++++++++++++++
>  4 files changed, 142 insertions(+)
>  create mode 100644 drivers/misc/lkdtm/powerpc.c
> 
> diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile
> index c70b3822013f..6a82f407fbcd 100644
> --- a/drivers/misc/lkdtm/Makefile
> +++ b/drivers/misc/lkdtm/Makefile
> @@ -11,6 +11,10 @@ lkdtm-$(CONFIG_LKDTM)		+= usercopy.o
>  lkdtm-$(CONFIG_LKDTM)		+= stackleak.o
>  lkdtm-$(CONFIG_LKDTM)		+= cfi.o
>  
> +ifeq ($(CONFIG_PPC64),y)
> +lkdtm-$(CONFIG_LKDTM)		+= powerpc.o
> +endif

This can just be:

lkdtm-$(CONFIG_PPC64)		+= powerpc.o

> +
>  KASAN_SANITIZE_stackleak.o	:= n
>  KCOV_INSTRUMENT_rodata.o	:= n
>  
> diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c
> index a5e344df9166..8d5db42baa90 100644
> --- a/drivers/misc/lkdtm/core.c
> +++ b/drivers/misc/lkdtm/core.c
> @@ -178,6 +178,9 @@ static const struct crashtype crashtypes[] = {
>  #ifdef CONFIG_X86_32
>  	CRASHTYPE(DOUBLE_FAULT),
>  #endif
> +#ifdef CONFIG_PPC64
> +	CRASHTYPE(PPC_SLB_MULTIHIT),
> +#endif
>  };
>  
>  
> diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h
> index 8878538b2c13..b305bd511ee5 100644
> --- a/drivers/misc/lkdtm/lkdtm.h
> +++ b/drivers/misc/lkdtm/lkdtm.h
> @@ -104,4 +104,7 @@ void lkdtm_STACKLEAK_ERASING(void);
>  /* cfi.c */
>  void lkdtm_CFI_FORWARD_PROTO(void);
>  
> +/* powerpc.c */
> +void lkdtm_PPC_SLB_MULTIHIT(void);
> +
>  #endif
> diff --git a/drivers/misc/lkdtm/powerpc.c b/drivers/misc/lkdtm/powerpc.c
> new file mode 100644
> index 000000000000..d6db18444757
> --- /dev/null
> +++ b/drivers/misc/lkdtm/powerpc.c
> @@ -0,0 +1,132 @@
> +// SPDX-License-Identifier: GPL-2.0
> +

Please #include "lkdtm.h" here to get the correct pr_fmt heading (and
any future header adjustments).

> +#include <linux/slab.h>
> +#include <linux/vmalloc.h>
> +
> +static inline unsigned long get_slb_index(void)
> +{
> +	unsigned long index;
> +
> +	index = get_paca()->stab_rr;
> +
> +	/*
> +	 * simple round-robin replacement of slb starting at SLB_NUM_BOLTED.
> +	 */
> +	if (index < (mmu_slb_size - 1))
> +		index++;
> +	else
> +		index = SLB_NUM_BOLTED;
> +	get_paca()->stab_rr = index;
> +	return index;
> +}
> +
> +#define slb_esid_mask(ssize)	\
> +	(((ssize) == MMU_SEGSIZE_256M) ? ESID_MASK : ESID_MASK_1T)
> +
> +static inline unsigned long mk_esid_data(unsigned long ea, int ssize,
> +					 unsigned long slot)
> +{
> +	return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | slot;
> +}
> +
> +#define slb_vsid_shift(ssize)	\
> +	((ssize) == MMU_SEGSIZE_256M ? SLB_VSID_SHIFT : SLB_VSID_SHIFT_1T)
> +
> +static inline unsigned long mk_vsid_data(unsigned long ea, int ssize,
> +					 unsigned long flags)
> +{
> +	return (get_kernel_vsid(ea, ssize) << slb_vsid_shift(ssize)) | flags |
> +		((unsigned long)ssize << SLB_VSID_SSIZE_SHIFT);
> +}
> +
> +static void insert_slb_entry(char *p, int ssize)
> +{
> +	unsigned long flags, entry;
> +
> +	flags = SLB_VSID_KERNEL | mmu_psize_defs[MMU_PAGE_64K].sllp;
> +	preempt_disable();
> +
> +	entry = get_slb_index();
> +	asm volatile("slbmte %0,%1" :
> +			: "r" (mk_vsid_data((unsigned long)p, ssize, flags)),
> +			  "r" (mk_esid_data((unsigned long)p, ssize, entry))
> +			: "memory");
> +
> +	entry = get_slb_index();
> +	asm volatile("slbmte %0,%1" :
> +			: "r" (mk_vsid_data((unsigned long)p, ssize, flags)),
> +			  "r" (mk_esid_data((unsigned long)p, ssize, entry))
> +			: "memory");
> +	preempt_enable();
> +	p[0] = '!';
> +}

Can you add some comments to these helpers? It'll help people (me) with
understanding what is actually being done here (and more importantly,
what is _expected_ to happen).

> +
> +static void inject_vmalloc_slb_multihit(void)
> +{
> +	char *p;
> +
> +	p = vmalloc(2048);
> +	if (!p)
> +		return;
> +
> +	insert_slb_entry(p, MMU_SEGSIZE_1T);
> +	vfree(p);
> +}
> +
> +static void inject_kmalloc_slb_multihit(void)
> +{
> +	char *p;
> +
> +	p = kmalloc(2048, GFP_KERNEL);
> +	if (!p)
> +		return;
> +
> +	insert_slb_entry(p, MMU_SEGSIZE_1T);
> +	kfree(p);
> +}

It looks like the expected failure injection is actually the "p[0] = '!'" line in the
"insert" helper? I would expect pr_info/pr_err wrappers, etc, as in
other lkdtm tests.

If this is the negative test, what does the positive test look like?
e.g. in other lkdtm tests, first a positive test is done, then a
negative: first run a legit function, then run a function from a bad
location.

> +
> +static void insert_dup_slb_entry_0(void)
> +{
> +	unsigned long test_address = 0xC000000000000000;
> +	volatile unsigned long *test_ptr;
> +	unsigned long entry, i = 0;
> +	unsigned long esid, vsid;
> +
> +	test_ptr = (unsigned long *)test_address;
> +	preempt_disable();
> +
> +	asm volatile("slbmfee  %0,%1" : "=r" (esid) : "r" (i));
> +	asm volatile("slbmfev  %0,%1" : "=r" (vsid) : "r" (i));
> +	entry = get_slb_index();
> +
> +	/* for i !=0 we would need to mask out the old entry number */
> +	asm volatile("slbmte %0,%1" :
> +			: "r" (vsid),
> +			  "r" (esid | entry)
> +			: "memory");
> +
> +	asm volatile("slbmfee  %0,%1" : "=r" (esid) : "r" (i));
> +	asm volatile("slbmfev  %0,%1" : "=r" (vsid) : "r" (i));
> +	entry = get_slb_index();
> +
> +	/* for i !=0 we would need to mask out the old entry number */
> +	asm volatile("slbmte %0,%1" :
> +			: "r" (vsid),
> +			  "r" (esid | entry)
> +			: "memory");
> +
> +	pr_info("lkdtm: %s accessing test address 0x%lx: 0x%lx\n",
> +		__func__, test_address, *test_ptr);
> +
> +	preempt_enable();
> +}

What does this do?

> +
> +void lkdtm_PPC_SLB_MULTIHIT(void)
> +{
> +	if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
> +		inject_vmalloc_slb_multihit();
> +		inject_kmalloc_slb_multihit();
> +		insert_dup_slb_entry_0();
> +	}
> +	pr_info("Recovered from SLB multihit. (Ignore this message on non HPTE machines)");

Is this bad? If so, I'd expect pr_err("FAIL: ...") Can HPTE machines be
detected so that an XFAIL can be emitted instead?

Since there are three (two?) distinct regions being tested, should these
be separate tests? Right now there is no way to separate a vmalloc
failure from a kmalloc failure, and no way to fail the first without
hiding the result from the latter (or maybe the machine cannot survive
this test? ... which should also be a comment.)

And finally, assuming a successful test (or testing from a separate
thread later), so there any state that needs to be restored (or cleaned
up before doing the "insert" calls)?

Thanks!

-- 
Kees Cook

^ permalink raw reply

* Re: [PATCH v2 3/3] selftests/lkdtm: Enable selftest for SLB multihit
From: Kees Cook @ 2020-09-25 19:59 UTC (permalink / raw)
  To: Ganesh Goudar; +Cc: msuchanek, linuxppc-dev, mahesh, npiggin
In-Reply-To: <20200925103123.21102-4-ganeshgr@linux.ibm.com>

On Fri, Sep 25, 2020 at 04:01:23PM +0530, Ganesh Goudar wrote:
> Add PPC_SLB_MULTIHIT to lkdtm selftest framework.
> 
> Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com>
> ---
>  tools/testing/selftests/lkdtm/tests.txt | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt
> index 9d266e79c6a2..7eb3cf91c89e 100644
> --- a/tools/testing/selftests/lkdtm/tests.txt
> +++ b/tools/testing/selftests/lkdtm/tests.txt
> @@ -70,3 +70,4 @@ USERCOPY_KERNEL
>  USERCOPY_KERNEL_DS
>  STACKLEAK_ERASING OK: the rest of the thread stack is properly erased
>  CFI_FORWARD_PROTO
> +PPC_SLB_MULTIHIT Recovered

Please squash this into the lkdtm patch -- I'd like test implementation
and kselftest awareness to go in together.

-- 
Kees Cook

^ permalink raw reply

* Re: [PATCH] fsl: imx-audmix : Replace seq_printf with seq_puts
From: Mark Brown @ 2020-09-25 20:41 UTC (permalink / raw)
  To: lgirdwood, tiwai, shawnguo, nicoleotsuka, Xiubo.Lee, linux-imx,
	linuxppc-dev, linux-arm-kernel, perex, festevam, alsa-devel,
	Xu Wang, shengjiu.wang, s.hauer, kernel, timur
  Cc: linux-kernel
In-Reply-To: <20200916061420.10403-1-vulab@iscas.ac.cn>

On Wed, 16 Sep 2020 06:14:20 +0000, Xu Wang wrote:
> A multiplication for the size determination of a memory allocation
> indicated that an array data structure should be processed.
> Thus use the corresponding function "devm_kcalloc".

Applied to

   https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git for-next

Thanks!

[1/1] ASoC: fsl: imx-audmix: Use devm_kcalloc() instead of devm_kzalloc()
      commit: f95cc5c18c15a425c3dceec48df6b4e27a202dda

All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.

You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.

If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.

Please add any relevant lists and maintainers to the CCs when replying
to this mail.

Thanks,
Mark

^ permalink raw reply

* Re: [PATCH] ASoC: fsl_sai: Instantiate snd_soc_dai_driver
From: Mark Brown @ 2020-09-25 20:41 UTC (permalink / raw)
  To: lgirdwood, tiwai, festevam, nicoleotsuka, Xiubo.Lee,
	Shengjiu Wang, perex, alsa-devel, timur
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <1600424760-32071-1-git-send-email-shengjiu.wang@nxp.com>

On Fri, 18 Sep 2020 18:26:00 +0800, Shengjiu Wang wrote:
> Instantiate snd_soc_dai_driver for independent symmetric control.
> Otherwise the symmetric setting may be overwritten by other
> instance.

Applied to

   https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git for-next

Thanks!

[1/1] ASoC: fsl_sai: Instantiate snd_soc_dai_driver
      commit: 22a16145af824f91014d07f8664114859900b9e6

All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.

You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.

If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.

Please add any relevant lists and maintainers to the CCs when replying
to this mail.

Thanks,
Mark

^ permalink raw reply

* Re: [PATCH] fsl: imx-audmix : Use devm_kcalloc() instead of devm_kzalloc()
From: Mark Brown @ 2020-09-25 20:42 UTC (permalink / raw)
  To: lgirdwood, tiwai, linux-arm-kernel, festevam, Xiubo.Lee,
	linuxppc-dev, nicoleotsuka, shawnguo, perex, linux-imx, s.hauer,
	Xu Wang, shengjiu.wang, alsa-devel, kernel, timur
  Cc: linux-kernel
In-Reply-To: <20200921015918.24157-1-vulab@iscas.ac.cn>

On Mon, 21 Sep 2020 01:59:18 +0000, Xu Wang wrote:
> A multiplication for the size determination of a memory allocation
> indicated that an array data structure should be processed.
> Thus use the corresponding function "devm_kcalloc".

Applied to

   https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git for-next

Thanks!

[1/1] ASoC: fsl: imx-audmix: Use devm_kcalloc() instead of devm_kzalloc()
      commit: f95cc5c18c15a425c3dceec48df6b4e27a202dda

All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.

You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.

If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.

Please add any relevant lists and maintainers to the CCs when replying
to this mail.

Thanks,
Mark

^ permalink raw reply

* RE: [PATCH 1/5] Documentation: dt: binding: fsl: Add 'fsl,ippdexpcr1-alt-addr' property
From: Ran Wang @ 2020-09-27  7:23 UTC (permalink / raw)
  To: Rob Herring
  Cc: devicetree@vger.kernel.org, Biwen Li, Shawn Guo,
	linux-kernel@vger.kernel.org, Leo Li,
	linuxppc-dev@lists.ozlabs.org,
	linux-arm-kernel@lists.infradead.org
In-Reply-To: <AM6PR04MB5413BB2F8D044B2312DAEC4FF1380@AM6PR04MB5413.eurprd04.prod.outlook.com>

Hi Rob

Not sure whether you have missed this mail with my query.

Regards,
Ran

On Wednesday, September 23, 2020 2:44 PM Ran Wang wrote:
> 
> Hi Rob,
> 
> On Wednesday, September 23, 2020 10:33 AM, Rob Herring wrote:
> >
> > On Wed, Sep 16, 2020 at 04:18:27PM +0800, Ran Wang wrote:
> > > From: Biwen Li <biwen.li@nxp.com>
> > >
> > > The 'fsl,ippdexpcr1-alt-addr' property is used to handle an errata
> > > A-008646 on LS1021A
> > >
> > > Signed-off-by: Biwen Li <biwen.li@nxp.com>
> > > Signed-off-by: Ran Wang <ran.wang_1@nxp.com>
> > > ---
> > >  Documentation/devicetree/bindings/soc/fsl/rcpm.txt | 19
> > > +++++++++++++++++++
> > >  1 file changed, 19 insertions(+)
> > >
> > > diff --git a/Documentation/devicetree/bindings/soc/fsl/rcpm.txt
> > > b/Documentation/devicetree/bindings/soc/fsl/rcpm.txt
> > > index 5a33619..1be58a3 100644
> > > --- a/Documentation/devicetree/bindings/soc/fsl/rcpm.txt
> > > +++ b/Documentation/devicetree/bindings/soc/fsl/rcpm.txt
> > > @@ -34,6 +34,11 @@ Chassis Version		Example Chips
> > >  Optional properties:
> > >   - little-endian : RCPM register block is Little Endian. Without it RCPM
> > >     will be Big Endian (default case).
> > > + - fsl,ippdexpcr1-alt-addr : The property is related to a hardware issue
> > > +   on SoC LS1021A and only needed on SoC LS1021A.
> > > +   Must include 2 entries:
> > > +   The first entry must be a link to the SCFG device node.
> > > +   The 2nd entry must be offset of register IPPDEXPCR1 in SCFG.
> >
> > You don't need a DT change for this. You can find SCFG node by its
> > compatible string and then the offset should be known given this issue is
> only on 1 SoC.
> 
> Did you mean that RCPM driver just to access IPPDEXPCR1 shadowed register
> in SCFG directly without fetching it's offset info. from DT?
> 
> Regards,
> Ran


^ permalink raw reply

* Re: [RFC PATCH 18/18] powerpc/powermac: Move PHB discovery
From: Christophe Leroy @ 2020-09-27  7:25 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev
In-Reply-To: <20200924063819.262830-18-oohall@gmail.com>



Le 24/09/2020 à 08:38, Oliver O'Halloran a écrit :
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>

Tested-by: Christophe Leroy <christophe.leroy@csgroup.eu>

This series is a really good step forward to the elimination of
early support for ioremap(), thanks.

Tested with pmac32_defconfig on QEMU MAC99.

Before the series we have 9000 kbytes mapped as early ioremap

ioremap() called early from pmac_feature_init+0xc8/0xac8. Use early_ioremap() instead
ioremap() called early from probe_one_macio+0x170/0x2a8. Use early_ioremap() instead
ioremap() called early from udbg_scc_init+0x1d8/0x494. Use early_ioremap() instead
ioremap() called early from find_via_cuda+0xa8/0x3f8. Use early_ioremap() instead
ioremap() called early from pmac_pci_init+0x214/0x778. Use early_ioremap() instead
ioremap() called early from pmac_pci_init+0x228/0x778. Use early_ioremap() instead
ioremap() called early from pci_process_bridge_OF_ranges+0x158/0x2d0. Use early_ioremap() instead
ioremap() called early from pmac_setup_arch+0x110/0x298. Use early_ioremap() instead
ioremap() called early from pmac_nvram_init+0x144/0x534. Use early_ioremap() instead
   * 0xfeb36000..0xff400000  : early ioremap
   * 0xf1000000..0xfeb36000  : vmalloc & ioremap

After the series we have 800 kbytes mapped as early ioremap

ioremap() called early from pmac_feature_init+0xc8/0xac8. Use early_ioremap() instead
ioremap() called early from probe_one_macio+0x170/0x2a8. Use early_ioremap() instead
ioremap() called early from udbg_scc_init+0x1d8/0x494. Use early_ioremap() instead
ioremap() called early from find_via_cuda+0xa8/0x3f8. Use early_ioremap() instead
ioremap() called early from pmac_setup_arch+0x10c/0x294. Use early_ioremap() instead
ioremap() called early from pmac_nvram_init+0x144/0x534. Use early_ioremap() instead
   * 0xff338000..0xff400000  : early ioremap
   * 0xf1000000..0xff338000  : vmalloc & ioremap

Christophe


> ---
> compile tested with pmac32_defconfig and g5_defconfig
> ---
>   arch/powerpc/platforms/powermac/setup.c | 4 +---
>   1 file changed, 1 insertion(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
> index f002b0fa69b8..0f8669139a21 100644
> --- a/arch/powerpc/platforms/powermac/setup.c
> +++ b/arch/powerpc/platforms/powermac/setup.c
> @@ -298,9 +298,6 @@ static void __init pmac_setup_arch(void)
>   		of_node_put(ic);
>   	}
>   
> -	/* Lookup PCI hosts */
> -	pmac_pci_init();
> -
>   #ifdef CONFIG_PPC32
>   	ohare_init();
>   	l2cr_init();
> @@ -600,6 +597,7 @@ define_machine(powermac) {
>   	.name			= "PowerMac",
>   	.probe			= pmac_probe,
>   	.setup_arch		= pmac_setup_arch,
> +	.discover_phbs		= pmac_pci_init,
>   	.show_cpuinfo		= pmac_show_cpuinfo,
>   	.init_IRQ		= pmac_pic_init,
>   	.get_irq		= NULL,	/* changed later */
> 

^ permalink raw reply

* Re: [PATCH v8 2/8] powerpc/vdso: Remove __kernel_datapage_offset and simplify __get_datapage()
From: Christophe Leroy @ 2020-09-27  7:43 UTC (permalink / raw)
  To: Will Deacon, Michael Ellerman
  Cc: nathanl, linux-arch, Arnd Bergmann, Dmitry Safonov, open list,
	Paul Mackerras, Andy Lutomirski, Thomas Gleixner,
	Vincenzo Frascino, linuxppc-dev
In-Reply-To: <20200921112638.GC2139@willie-the-truck>



Le 21/09/2020 à 13:26, Will Deacon a écrit :
> On Fri, Aug 28, 2020 at 12:14:28PM +1000, Michael Ellerman wrote:
>> Dmitry Safonov <0x7f454c46@gmail.com> writes:
>>> On Wed, 26 Aug 2020 at 15:39, Michael Ellerman <mpe@ellerman.id.au> wrote:
>>>> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>>>> We added a test for vdso unmap recently because it happened to trigger a
>>>> KAUP failure, and someone actually hit it & reported it.
>>>
>>> You right, CRIU cares much more about moving vDSO.
>>> It's done for each restoree and as on most setups vDSO is premapped and
>>> used by the application - it's actively tested.
>>> Speaking about vDSO unmap - that's concerning only for heterogeneous C/R,
>>> i.e when an application is migrated from a system that uses vDSO to the one
>>> which doesn't - it's much rare scenario.
>>> (for arm it's !CONFIG_VDSO, for x86 it's `vdso=0` boot parameter)
>>
>> Ah OK that explains it.
>>
>> The case we hit of VDSO unmapping was some strange "library OS" thing
>> which had explicitly unmapped the VDSO, so also very rare.
>>
>>> Looking at the code, it seems quite easy to provide/maintain .close() for
>>> vm_special_mapping. A bit harder to add a test from CRIU side
>>> (as glibc won't know on restore that it can't use vdso anymore),
>>> but totally not impossible.
>>>
>>>> Running that test on arm64 segfaults:
>>>>
>>>>    # ./sigreturn_vdso
>>>>    VDSO is at 0xffff8191f000-0xffff8191ffff (4096 bytes)
>>>>    Signal delivered OK with VDSO mapped
>>>>    VDSO moved to 0xffff8191a000-0xffff8191afff (4096 bytes)
>>>>    Signal delivered OK with VDSO moved
>>>>    Unmapped VDSO
>>>>    Remapped the stack executable
>>>>    [   48.556191] potentially unexpected fatal signal 11.
>>>>    [   48.556752] CPU: 0 PID: 140 Comm: sigreturn_vdso Not tainted 5.9.0-rc2-00057-g2ac69819ba9e #190
>>>>    [   48.556990] Hardware name: linux,dummy-virt (DT)
>>>>    [   48.557336] pstate: 60001000 (nZCv daif -PAN -UAO BTYPE=--)
>>>>    [   48.557475] pc : 0000ffff8191a7bc
>>>>    [   48.557603] lr : 0000ffff8191a7bc
>>>>    [   48.557697] sp : 0000ffffc13c9e90
>>>>    [   48.557873] x29: 0000ffffc13cb0e0 x28: 0000000000000000
>>>>    [   48.558201] x27: 0000000000000000 x26: 0000000000000000
>>>>    [   48.558337] x25: 0000000000000000 x24: 0000000000000000
>>>>    [   48.558754] x23: 0000000000000000 x22: 0000000000000000
>>>>    [   48.558893] x21: 00000000004009b0 x20: 0000000000000000
>>>>    [   48.559046] x19: 0000000000400ff0 x18: 0000000000000000
>>>>    [   48.559180] x17: 0000ffff817da300 x16: 0000000000412010
>>>>    [   48.559312] x15: 0000000000000000 x14: 000000000000001c
>>>>    [   48.559443] x13: 656c626174756365 x12: 7865206b63617473
>>>>    [   48.559625] x11: 0000000000000003 x10: 0101010101010101
>>>>    [   48.559828] x9 : 0000ffff818afda8 x8 : 0000000000000081
>>>>    [   48.559973] x7 : 6174732065687420 x6 : 64657070616d6552
>>>>    [   48.560115] x5 : 000000000e0388bd x4 : 000000000040135d
>>>>    [   48.560270] x3 : 0000000000000000 x2 : 0000000000000001
>>>>    [   48.560412] x1 : 0000000000000003 x0 : 00000000004120b8
>>>>    Segmentation fault
>>>>    #
>>>>
>>>> So I think we need to keep the unmap hook. Maybe it should be handled by
>>>> the special_mapping stuff generically.
>>>
>>> I'll cook a patch for vm_special_mapping if you don't mind :-)
>>
>> That would be great, thanks!
> 
> I lost track of this one. Is there a patch kicking around to resolve this,
> or is the segfault expected behaviour?
> 

IIUC dmitry said he will cook a patch. I have not seen any patch yet.

AFAIKS, among the architectures having VDSO sigreturn trampolines, only SH, X86 and POWERPC provide 
alternative trampoline on stack when VDSO is not there.

All other architectures just having a VDSO don't expect VDSO to not be mapped.

As far as nowadays stacks are mapped non-executable, getting a segfaut is expected behaviour. 
However, I think we should really make it cleaner. Today it segfaults because it is still pointing 
to the VDSO trampoline that has been unmapped. But should the user map some other code at the same 
address, we'll run in the weed on signal return instead of segfaulting.

So VDSO unmapping should really be properly managed, the reference should be properly cleared in 
order to segfault in a controllable manner.

Only powerpc has a hook to properly clear the VDSO pointer when VDSO is unmapped.

Christophe

^ permalink raw reply

* [PATCH v1 04/30] powerpc/vdso: Remove get_page() in vdso_pagelist initialization
From: Christophe Leroy @ 2020-09-27  9:16 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1601197618.git.christophe.leroy@csgroup.eu>

Partly copied from commit 16fb1a9bec61 ("arm64: vdso: clean up
vdso_pagelist initialization").

No need to get_page() the vdso text/data - these are part of the
kernel image.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/kernel/vdso.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 6d106fcafb9e..dfaa4be258d2 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -748,7 +748,7 @@ static int __init vdso_init(void)
 	BUG_ON(vdso32_pagelist == NULL);
 	for (i = 0; i < vdso32_pages; i++) {
 		struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
-		get_page(pg);
+
 		vdso32_pagelist[i] = pg;
 	}
 	vdso32_pagelist[i++] = virt_to_page(vdso_data);
@@ -761,15 +761,13 @@ static int __init vdso_init(void)
 	BUG_ON(vdso64_pagelist == NULL);
 	for (i = 0; i < vdso64_pages; i++) {
 		struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
-		get_page(pg);
+
 		vdso64_pagelist[i] = pg;
 	}
 	vdso64_pagelist[i++] = virt_to_page(vdso_data);
 	vdso64_pagelist[i] = NULL;
 #endif /* CONFIG_PPC64 */
 
-	get_page(virt_to_page(vdso_data));
-
 	smp_wmb();
 	vdso_ready = 1;
 
-- 
2.25.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox