* [PATCH 1/9] kernel: add a PF_FORCE_COMPAT flag
From: Christoph Hellwig @ 2020-09-18 12:45 UTC (permalink / raw)
To: Alexander Viro
Cc: linux-aio, linux-mips, David Howells, linux-mm, keyrings,
sparclinux, linux-arch, linux-s390, linux-scsi, x86,
Arnd Bergmann, linux-block, io-uring, linux-arm-kernel,
Jens Axboe, linux-parisc, netdev, linux-kernel,
linux-security-module, linux-fsdevel, Andrew Morton, linuxppc-dev
In-Reply-To: <20200918124533.3487701-1-hch@lst.de>
Add a flag to force processing a syscall as a compat syscall. This is
required so that in_compat_syscall() works for I/O submitted by io_uring
helper threads on behalf of compat syscalls.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
arch/sparc/include/asm/compat.h | 3 ++-
arch/x86/include/asm/compat.h | 2 +-
fs/io_uring.c | 9 +++++++++
include/linux/compat.h | 5 ++++-
include/linux/sched.h | 1 +
5 files changed, 17 insertions(+), 3 deletions(-)
diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h
index 40a267b3bd5208..fee6c51d36e869 100644
--- a/arch/sparc/include/asm/compat.h
+++ b/arch/sparc/include/asm/compat.h
@@ -211,7 +211,8 @@ static inline int is_compat_task(void)
static inline bool in_compat_syscall(void)
{
/* Vector 0x110 is LINUX_32BIT_SYSCALL_TRAP */
- return pt_regs_trap_type(current_pt_regs()) == 0x110;
+ return pt_regs_trap_type(current_pt_regs()) == 0x110 ||
+ (current->flags & PF_FORCE_COMPAT);
}
#define in_compat_syscall in_compat_syscall
#endif
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index d4edf281fff49d..fbab072d4e5b31 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -208,7 +208,7 @@ static inline bool in_32bit_syscall(void)
#ifdef CONFIG_COMPAT
static inline bool in_compat_syscall(void)
{
- return in_32bit_syscall();
+ return in_32bit_syscall() || (current->flags & PF_FORCE_COMPAT);
}
#define in_compat_syscall in_compat_syscall /* override the generic impl */
#endif
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 3790c7fe9fee22..5755d557c3f7bc 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5449,6 +5449,9 @@ static int io_req_defer_prep(struct io_kiocb *req,
if (unlikely(ret))
return ret;
+ if (req->ctx->compat)
+ current->flags |= PF_FORCE_COMPAT;
+
switch (req->opcode) {
case IORING_OP_NOP:
break;
@@ -5546,6 +5549,7 @@ static int io_req_defer_prep(struct io_kiocb *req,
break;
}
+ current->flags &= ~PF_FORCE_COMPAT;
return ret;
}
@@ -5669,6 +5673,9 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
struct io_ring_ctx *ctx = req->ctx;
int ret;
+ if (ctx->compat)
+ current->flags |= PF_FORCE_COMPAT;
+
switch (req->opcode) {
case IORING_OP_NOP:
ret = io_nop(req, cs);
@@ -5898,6 +5905,8 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
break;
}
+ current->flags &= ~PF_FORCE_COMPAT;
+
if (ret)
return ret;
diff --git a/include/linux/compat.h b/include/linux/compat.h
index b354ce58966e2d..685066f7ad325f 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -891,7 +891,10 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args);
*/
#ifndef in_compat_syscall
-static inline bool in_compat_syscall(void) { return is_compat_task(); }
+static inline bool in_compat_syscall(void)
+{
+ return is_compat_task() || (current->flags & PF_FORCE_COMPAT);
+}
#endif
/**
diff --git a/include/linux/sched.h b/include/linux/sched.h
index afe01e232935fa..c8b183b5655a1e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1491,6 +1491,7 @@ extern struct pid *cad_pid;
*/
#define PF_IDLE 0x00000002 /* I am an IDLE thread */
#define PF_EXITING 0x00000004 /* Getting shut down */
+#define PF_FORCE_COMPAT 0x00000008 /* acting as compat task */
#define PF_VCPU 0x00000010 /* I'm a virtual CPU */
#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
#define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */
--
2.28.0
^ permalink raw reply related
* [PATCH 5/9] fs: remove various compat readv/writev helpers
From: Christoph Hellwig @ 2020-09-18 12:45 UTC (permalink / raw)
To: Alexander Viro
Cc: linux-aio, linux-mips, David Howells, linux-mm, keyrings,
sparclinux, linux-arch, linux-s390, linux-scsi, x86,
Arnd Bergmann, linux-block, io-uring, linux-arm-kernel,
Jens Axboe, linux-parisc, netdev, linux-kernel,
linux-security-module, linux-fsdevel, Andrew Morton, linuxppc-dev
In-Reply-To: <20200918124533.3487701-1-hch@lst.de>
Now that import_iovec handles compat iovecs as well, all the duplicated
code in the compat readv/writev helpers is not needed. Remove them
and switch the compat syscall handlers to use the native helpers.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/read_write.c | 179 ++++++++----------------------------------------
1 file changed, 30 insertions(+), 149 deletions(-)
diff --git a/fs/read_write.c b/fs/read_write.c
index 2f961c653ce561..9eb63c53da78f2 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1211,226 +1211,107 @@ SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
return do_pwritev(fd, vec, vlen, pos, flags);
}
+/*
+ * Various compat syscalls. Note that they all pretend to take a native
+ * iovec - import_iovec will properly treat those as compat_iovecs based on
+ * in_compat_syscall().
+ */
#ifdef CONFIG_COMPAT
-static size_t compat_readv(struct file *file,
- const struct compat_iovec __user *vec,
- unsigned long vlen, loff_t *pos, rwf_t flags)
-{
- struct iovec iovstack[UIO_FASTIOV];
- struct iovec *iov = iovstack;
- struct iov_iter iter;
- ssize_t ret;
-
- ret = import_iovec(READ, (const struct iovec __user *)vec, vlen,
- UIO_FASTIOV, &iov, &iter);
- if (ret >= 0) {
- ret = do_iter_read(file, &iter, pos, flags);
- kfree(iov);
- }
- if (ret > 0)
- add_rchar(current, ret);
- inc_syscr(current);
- return ret;
-}
-
-static size_t do_compat_readv(compat_ulong_t fd,
- const struct compat_iovec __user *vec,
- compat_ulong_t vlen, rwf_t flags)
-{
- struct fd f = fdget_pos(fd);
- ssize_t ret;
- loff_t pos;
-
- if (!f.file)
- return -EBADF;
- pos = f.file->f_pos;
- ret = compat_readv(f.file, vec, vlen, &pos, flags);
- if (ret >= 0)
- f.file->f_pos = pos;
- fdput_pos(f);
- return ret;
-
-}
-
COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
- const struct compat_iovec __user *,vec,
+ const struct iovec __user *, vec,
compat_ulong_t, vlen)
{
- return do_compat_readv(fd, vec, vlen, 0);
-}
-
-static long do_compat_preadv64(unsigned long fd,
- const struct compat_iovec __user *vec,
- unsigned long vlen, loff_t pos, rwf_t flags)
-{
- struct fd f;
- ssize_t ret;
-
- if (pos < 0)
- return -EINVAL;
- f = fdget(fd);
- if (!f.file)
- return -EBADF;
- ret = -ESPIPE;
- if (f.file->f_mode & FMODE_PREAD)
- ret = compat_readv(f.file, vec, vlen, &pos, flags);
- fdput(f);
- return ret;
+ return do_readv(fd, vec, vlen, 0);
}
#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64
COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
- const struct compat_iovec __user *,vec,
+ const struct iovec __user *, vec,
unsigned long, vlen, loff_t, pos)
{
- return do_compat_preadv64(fd, vec, vlen, pos, 0);
+ return do_preadv(fd, vec, vlen, pos, 0);
}
#endif
COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
- const struct compat_iovec __user *,vec,
+ const struct iovec __user *, vec,
compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
{
loff_t pos = ((loff_t)pos_high << 32) | pos_low;
- return do_compat_preadv64(fd, vec, vlen, pos, 0);
+ return do_preadv(fd, vec, vlen, pos, 0);
}
#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2
COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd,
- const struct compat_iovec __user *,vec,
+ const struct iovec __user *, vec,
unsigned long, vlen, loff_t, pos, rwf_t, flags)
{
if (pos == -1)
- return do_compat_readv(fd, vec, vlen, flags);
-
- return do_compat_preadv64(fd, vec, vlen, pos, flags);
+ return do_readv(fd, vec, vlen, flags);
+ return do_preadv(fd, vec, vlen, pos, flags);
}
#endif
COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
- const struct compat_iovec __user *,vec,
+ const struct iovec __user *, vec,
compat_ulong_t, vlen, u32, pos_low, u32, pos_high,
rwf_t, flags)
{
loff_t pos = ((loff_t)pos_high << 32) | pos_low;
if (pos == -1)
- return do_compat_readv(fd, vec, vlen, flags);
-
- return do_compat_preadv64(fd, vec, vlen, pos, flags);
-}
-
-static size_t compat_writev(struct file *file,
- const struct compat_iovec __user *vec,
- unsigned long vlen, loff_t *pos, rwf_t flags)
-{
- struct iovec iovstack[UIO_FASTIOV];
- struct iovec *iov = iovstack;
- struct iov_iter iter;
- ssize_t ret;
-
- ret = import_iovec(WRITE, (const struct iovec __user *)vec, vlen,
- UIO_FASTIOV, &iov, &iter);
- if (ret >= 0) {
- file_start_write(file);
- ret = do_iter_write(file, &iter, pos, flags);
- file_end_write(file);
- kfree(iov);
- }
- if (ret > 0)
- add_wchar(current, ret);
- inc_syscw(current);
- return ret;
-}
-
-static size_t do_compat_writev(compat_ulong_t fd,
- const struct compat_iovec __user* vec,
- compat_ulong_t vlen, rwf_t flags)
-{
- struct fd f = fdget_pos(fd);
- ssize_t ret;
- loff_t pos;
-
- if (!f.file)
- return -EBADF;
- pos = f.file->f_pos;
- ret = compat_writev(f.file, vec, vlen, &pos, flags);
- if (ret >= 0)
- f.file->f_pos = pos;
- fdput_pos(f);
- return ret;
+ return do_readv(fd, vec, vlen, flags);
+ return do_preadv(fd, vec, vlen, pos, flags);
}
COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
- const struct compat_iovec __user *, vec,
+ const struct iovec __user *, vec,
compat_ulong_t, vlen)
{
- return do_compat_writev(fd, vec, vlen, 0);
-}
-
-static long do_compat_pwritev64(unsigned long fd,
- const struct compat_iovec __user *vec,
- unsigned long vlen, loff_t pos, rwf_t flags)
-{
- struct fd f;
- ssize_t ret;
-
- if (pos < 0)
- return -EINVAL;
- f = fdget(fd);
- if (!f.file)
- return -EBADF;
- ret = -ESPIPE;
- if (f.file->f_mode & FMODE_PWRITE)
- ret = compat_writev(f.file, vec, vlen, &pos, flags);
- fdput(f);
- return ret;
+ return do_writev(fd, vec, vlen, 0);
}
#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64
COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
- const struct compat_iovec __user *,vec,
+ const struct iovec __user *, vec,
unsigned long, vlen, loff_t, pos)
{
- return do_compat_pwritev64(fd, vec, vlen, pos, 0);
+ return do_pwritev(fd, vec, vlen, pos, 0);
}
#endif
COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
- const struct compat_iovec __user *,vec,
+ const struct iovec __user *,vec,
compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
{
loff_t pos = ((loff_t)pos_high << 32) | pos_low;
- return do_compat_pwritev64(fd, vec, vlen, pos, 0);
+ return do_pwritev(fd, vec, vlen, pos, 0);
}
#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd,
- const struct compat_iovec __user *,vec,
+ const struct iovec __user *, vec,
unsigned long, vlen, loff_t, pos, rwf_t, flags)
{
if (pos == -1)
- return do_compat_writev(fd, vec, vlen, flags);
-
- return do_compat_pwritev64(fd, vec, vlen, pos, flags);
+ return do_writev(fd, vec, vlen, flags);
+ return do_pwritev(fd, vec, vlen, pos, flags);
}
#endif
COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd,
- const struct compat_iovec __user *,vec,
+ const struct iovec __user *,vec,
compat_ulong_t, vlen, u32, pos_low, u32, pos_high, rwf_t, flags)
{
loff_t pos = ((loff_t)pos_high << 32) | pos_low;
if (pos == -1)
- return do_compat_writev(fd, vec, vlen, flags);
-
- return do_compat_pwritev64(fd, vec, vlen, pos, flags);
+ return do_writev(fd, vec, vlen, flags);
+ return do_pwritev(fd, vec, vlen, pos, flags);
}
-
-#endif
+#endif /* CONFIG_COMPAT */
static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
size_t count, loff_t max)
--
2.28.0
^ permalink raw reply related
* [PATCH 6/9] fs: remove the compat readv/writev syscalls
From: Christoph Hellwig @ 2020-09-18 12:45 UTC (permalink / raw)
To: Alexander Viro
Cc: linux-aio, linux-mips, David Howells, linux-mm, keyrings,
sparclinux, linux-arch, linux-s390, linux-scsi, x86,
Arnd Bergmann, linux-block, io-uring, linux-arm-kernel,
Jens Axboe, linux-parisc, netdev, linux-kernel,
linux-security-module, linux-fsdevel, Andrew Morton, linuxppc-dev
In-Reply-To: <20200918124533.3487701-1-hch@lst.de>
Now that import_iovec handles compat iovecs, the native readv and writev
syscalls can be used for the compat case as well.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
arch/arm64/include/asm/unistd32.h | 4 ++--
arch/mips/kernel/syscalls/syscall_n32.tbl | 4 ++--
arch/mips/kernel/syscalls/syscall_o32.tbl | 4 ++--
arch/parisc/kernel/syscalls/syscall.tbl | 4 ++--
arch/powerpc/kernel/syscalls/syscall.tbl | 4 ++--
arch/s390/kernel/syscalls/syscall.tbl | 4 ++--
arch/sparc/kernel/syscalls/syscall.tbl | 4 ++--
arch/x86/entry/syscall_x32.c | 2 ++
arch/x86/entry/syscalls/syscall_32.tbl | 4 ++--
arch/x86/entry/syscalls/syscall_64.tbl | 4 ++--
fs/read_write.c | 14 --------------
include/linux/compat.h | 4 ----
include/uapi/asm-generic/unistd.h | 4 ++--
tools/include/uapi/asm-generic/unistd.h | 4 ++--
tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 4 ++--
tools/perf/arch/s390/entry/syscalls/syscall.tbl | 4 ++--
tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 4 ++--
17 files changed, 30 insertions(+), 46 deletions(-)
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 734860ac7cf9d5..4a236493dca5b9 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -301,9 +301,9 @@ __SYSCALL(__NR_flock, sys_flock)
#define __NR_msync 144
__SYSCALL(__NR_msync, sys_msync)
#define __NR_readv 145
-__SYSCALL(__NR_readv, compat_sys_readv)
+__SYSCALL(__NR_readv, sys_readv)
#define __NR_writev 146
-__SYSCALL(__NR_writev, compat_sys_writev)
+__SYSCALL(__NR_writev, sys_writev)
#define __NR_getsid 147
__SYSCALL(__NR_getsid, sys_getsid)
#define __NR_fdatasync 148
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index f9df9edb67a407..c99a92646f8ee9 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -25,8 +25,8 @@
15 n32 ioctl compat_sys_ioctl
16 n32 pread64 sys_pread64
17 n32 pwrite64 sys_pwrite64
-18 n32 readv compat_sys_readv
-19 n32 writev compat_sys_writev
+18 n32 readv sys_readv
+19 n32 writev sys_writev
20 n32 access sys_access
21 n32 pipe sysm_pipe
22 n32 _newselect compat_sys_select
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 195b43cf27c848..075064d10661bf 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -156,8 +156,8 @@
142 o32 _newselect sys_select compat_sys_select
143 o32 flock sys_flock
144 o32 msync sys_msync
-145 o32 readv sys_readv compat_sys_readv
-146 o32 writev sys_writev compat_sys_writev
+145 o32 readv sys_readv
+146 o32 writev sys_writev
147 o32 cacheflush sys_cacheflush
148 o32 cachectl sys_cachectl
149 o32 sysmips __sys_sysmips
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index def64d221cd4fb..192abde0001d9d 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -159,8 +159,8 @@
142 common _newselect sys_select compat_sys_select
143 common flock sys_flock
144 common msync sys_msync
-145 common readv sys_readv compat_sys_readv
-146 common writev sys_writev compat_sys_writev
+145 common readv sys_readv
+146 common writev sys_writev
147 common getsid sys_getsid
148 common fdatasync sys_fdatasync
149 common _sysctl sys_ni_syscall
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index c2d737ff2e7bec..6f1e2ecf0edad9 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -193,8 +193,8 @@
142 common _newselect sys_select compat_sys_select
143 common flock sys_flock
144 common msync sys_msync
-145 common readv sys_readv compat_sys_readv
-146 common writev sys_writev compat_sys_writev
+145 common readv sys_readv
+146 common writev sys_writev
147 common getsid sys_getsid
148 common fdatasync sys_fdatasync
149 nospu _sysctl sys_ni_syscall
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 10456bc936fb09..6101cf2e004cb4 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -134,8 +134,8 @@
142 64 select sys_select -
143 common flock sys_flock sys_flock
144 common msync sys_msync sys_msync
-145 common readv sys_readv compat_sys_readv
-146 common writev sys_writev compat_sys_writev
+145 common readv sys_readv sys_readv
+146 common writev sys_writev sys_writev
147 common getsid sys_getsid sys_getsid
148 common fdatasync sys_fdatasync sys_fdatasync
149 common _sysctl - -
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index 4af114e84f2022..a87ddb282ab16f 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -149,8 +149,8 @@
117 common getrusage sys_getrusage compat_sys_getrusage
118 common getsockopt sys_getsockopt sys_getsockopt
119 common getcwd sys_getcwd
-120 common readv sys_readv compat_sys_readv
-121 common writev sys_writev compat_sys_writev
+120 common readv sys_readv
+121 common writev sys_writev
122 common settimeofday sys_settimeofday compat_sys_settimeofday
123 32 fchown sys_fchown16
123 64 fchown sys_fchown
diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
index 1583831f61a9df..aa321444a41f63 100644
--- a/arch/x86/entry/syscall_x32.c
+++ b/arch/x86/entry/syscall_x32.c
@@ -12,6 +12,8 @@
* Reuse the 64-bit entry points for the x32 versions that occupy different
* slots in the syscall table.
*/
+#define __x32_sys_readv __x64_sys_readv
+#define __x32_sys_writev __x64_sys_writev
#define __x32_sys_getsockopt __x64_sys_getsockopt
#define __x32_sys_setsockopt __x64_sys_setsockopt
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 9d11028736661b..54ab4beb517f25 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -156,8 +156,8 @@
142 i386 _newselect sys_select compat_sys_select
143 i386 flock sys_flock
144 i386 msync sys_msync
-145 i386 readv sys_readv compat_sys_readv
-146 i386 writev sys_writev compat_sys_writev
+145 i386 readv sys_readv
+146 i386 writev sys_writev
147 i386 getsid sys_getsid
148 i386 fdatasync sys_fdatasync
149 i386 _sysctl sys_ni_syscall
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index f30d6ae9a6883c..b1e59957c5c51c 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -371,8 +371,8 @@
512 x32 rt_sigaction compat_sys_rt_sigaction
513 x32 rt_sigreturn compat_sys_x32_rt_sigreturn
514 x32 ioctl compat_sys_ioctl
-515 x32 readv compat_sys_readv
-516 x32 writev compat_sys_writev
+515 x32 readv sys_readv
+516 x32 writev sys_writev
517 x32 recvfrom compat_sys_recvfrom
518 x32 sendmsg compat_sys_sendmsg
519 x32 recvmsg compat_sys_recvmsg
diff --git a/fs/read_write.c b/fs/read_write.c
index 9eb63c53da78f2..560d1b0bdef7bc 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1217,13 +1217,6 @@ SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
* in_compat_syscall().
*/
#ifdef CONFIG_COMPAT
-COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
- const struct iovec __user *, vec,
- compat_ulong_t, vlen)
-{
- return do_readv(fd, vec, vlen, 0);
-}
-
#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64
COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
const struct iovec __user *, vec,
@@ -1265,13 +1258,6 @@ COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
return do_preadv(fd, vec, vlen, pos, flags);
}
-COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
- const struct iovec __user *, vec,
- compat_ulong_t, vlen)
-{
- return do_writev(fd, vec, vlen, 0);
-}
-
#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64
COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
const struct iovec __user *, vec,
diff --git a/include/linux/compat.h b/include/linux/compat.h
index ad6dc56e8828d6..0ff848234df8ba 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -545,10 +545,6 @@ asmlinkage long compat_sys_getdents(unsigned int fd,
/* fs/read_write.c */
asmlinkage long compat_sys_lseek(unsigned int, compat_off_t, unsigned int);
-asmlinkage ssize_t compat_sys_readv(compat_ulong_t fd,
- const struct compat_iovec __user *vec, compat_ulong_t vlen);
-asmlinkage ssize_t compat_sys_writev(compat_ulong_t fd,
- const struct compat_iovec __user *vec, compat_ulong_t vlen);
/* No generic prototype for pread64 and pwrite64 */
asmlinkage ssize_t compat_sys_preadv(compat_ulong_t fd,
const struct compat_iovec __user *vec,
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 995b36c2ea7d8a..211c9eacbda6eb 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -207,9 +207,9 @@ __SYSCALL(__NR_read, sys_read)
#define __NR_write 64
__SYSCALL(__NR_write, sys_write)
#define __NR_readv 65
-__SC_COMP(__NR_readv, sys_readv, compat_sys_readv)
+__SC_COMP(__NR_readv, sys_readv, sys_readv)
#define __NR_writev 66
-__SC_COMP(__NR_writev, sys_writev, compat_sys_writev)
+__SC_COMP(__NR_writev, sys_writev, sys_writev)
#define __NR_pread64 67
__SC_COMP(__NR_pread64, sys_pread64, compat_sys_pread64)
#define __NR_pwrite64 68
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 995b36c2ea7d8a..211c9eacbda6eb 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -207,9 +207,9 @@ __SYSCALL(__NR_read, sys_read)
#define __NR_write 64
__SYSCALL(__NR_write, sys_write)
#define __NR_readv 65
-__SC_COMP(__NR_readv, sys_readv, compat_sys_readv)
+__SC_COMP(__NR_readv, sys_readv, sys_readv)
#define __NR_writev 66
-__SC_COMP(__NR_writev, sys_writev, compat_sys_writev)
+__SC_COMP(__NR_writev, sys_writev, sys_writev)
#define __NR_pread64 67
__SC_COMP(__NR_pread64, sys_pread64, compat_sys_pread64)
#define __NR_pwrite64 68
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 3ca6fe057a0b1f..46be68029587f9 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -189,8 +189,8 @@
142 common _newselect sys_select compat_sys_select
143 common flock sys_flock
144 common msync sys_msync
-145 common readv sys_readv compat_sys_readv
-146 common writev sys_writev compat_sys_writev
+145 common readv sys_readv
+146 common writev sys_writev
147 common getsid sys_getsid
148 common fdatasync sys_fdatasync
149 nospu _sysctl sys_ni_syscall
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index 6a0bbea225db0d..fb5e61ce9d5838 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -134,8 +134,8 @@
142 64 select sys_select -
143 common flock sys_flock sys_flock
144 common msync sys_msync compat_sys_msync
-145 common readv sys_readv compat_sys_readv
-146 common writev sys_writev compat_sys_writev
+145 common readv sys_readv
+146 common writev sys_writev
147 common getsid sys_getsid sys_getsid
148 common fdatasync sys_fdatasync sys_fdatasync
149 common _sysctl - -
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index f30d6ae9a6883c..b1e59957c5c51c 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -371,8 +371,8 @@
512 x32 rt_sigaction compat_sys_rt_sigaction
513 x32 rt_sigreturn compat_sys_x32_rt_sigreturn
514 x32 ioctl compat_sys_ioctl
-515 x32 readv compat_sys_readv
-516 x32 writev compat_sys_writev
+515 x32 readv sys_readv
+516 x32 writev sys_writev
517 x32 recvfrom compat_sys_recvfrom
518 x32 sendmsg compat_sys_sendmsg
519 x32 recvmsg compat_sys_recvmsg
--
2.28.0
^ permalink raw reply related
* [PATCH 4/9] fs: handle the compat case in import_iovec
From: Christoph Hellwig @ 2020-09-18 12:45 UTC (permalink / raw)
To: Alexander Viro
Cc: linux-aio, linux-mips, David Howells, linux-mm, keyrings,
sparclinux, linux-arch, linux-s390, linux-scsi, x86,
Arnd Bergmann, linux-block, io-uring, linux-arm-kernel,
Jens Axboe, linux-parisc, netdev, linux-kernel,
linux-security-module, linux-fsdevel, Andrew Morton, linuxppc-dev
In-Reply-To: <20200918124533.3487701-1-hch@lst.de>
Use in compat_syscall to import either native or the compat iovecs, and
remove the now superflous compat_import_iovec.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
block/scsi_ioctl.c | 12 +---
drivers/scsi/sg.c | 9 +--
fs/aio.c | 38 +++++-------
fs/io_uring.c | 12 +---
fs/read_write.c | 127 +++++++++++++++--------------------------
fs/splice.c | 2 +-
include/linux/compat.h | 6 --
include/linux/fs.h | 7 +--
include/linux/uio.h | 7 ---
lib/iov_iter.c | 30 +---------
mm/process_vm_access.c | 9 +--
net/compat.c | 4 +-
security/keys/compat.c | 5 +-
13 files changed, 83 insertions(+), 185 deletions(-)
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index ef722f04f88a93..e08df86866ee5d 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -333,16 +333,8 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
struct iov_iter i;
struct iovec *iov = NULL;
-#ifdef CONFIG_COMPAT
- if (in_compat_syscall())
- ret = compat_import_iovec(rq_data_dir(rq),
- hdr->dxferp, hdr->iovec_count,
- 0, &iov, &i);
- else
-#endif
- ret = import_iovec(rq_data_dir(rq),
- hdr->dxferp, hdr->iovec_count,
- 0, &iov, &i);
+ ret = import_iovec(rq_data_dir(rq), hdr->dxferp,
+ hdr->iovec_count, 0, &iov, &i);
if (ret < 0)
goto out_free_cdb;
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 20472aaaf630a4..bfa8d77322d732 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1820,14 +1820,7 @@ sg_start_req(Sg_request *srp, unsigned char *cmd)
struct iovec *iov = NULL;
struct iov_iter i;
-#ifdef CONFIG_COMPAT
- if (in_compat_syscall())
- res = compat_import_iovec(rw, hp->dxferp, iov_count,
- 0, &iov, &i);
- else
-#endif
- res = import_iovec(rw, hp->dxferp, iov_count,
- 0, &iov, &i);
+ res = import_iovec(rw, hp->dxferp, iov_count, 0, &iov, &i);
if (res < 0)
return res;
diff --git a/fs/aio.c b/fs/aio.c
index d5ec303855669d..b377f5c2048e18 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1478,8 +1478,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
}
static ssize_t aio_setup_rw(int rw, const struct iocb *iocb,
- struct iovec **iovec, bool vectored, bool compat,
- struct iov_iter *iter)
+ struct iovec **iovec, bool vectored, struct iov_iter *iter)
{
void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf;
size_t len = iocb->aio_nbytes;
@@ -1489,11 +1488,6 @@ static ssize_t aio_setup_rw(int rw, const struct iocb *iocb,
*iovec = NULL;
return ret;
}
-#ifdef CONFIG_COMPAT
- if (compat)
- return compat_import_iovec(rw, buf, len, UIO_FASTIOV, iovec,
- iter);
-#endif
return import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter);
}
@@ -1517,8 +1511,7 @@ static inline void aio_rw_done(struct kiocb *req, ssize_t ret)
}
}
-static int aio_read(struct kiocb *req, const struct iocb *iocb,
- bool vectored, bool compat)
+static int aio_read(struct kiocb *req, const struct iocb *iocb, bool vectored)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct iov_iter iter;
@@ -1535,7 +1528,7 @@ static int aio_read(struct kiocb *req, const struct iocb *iocb,
if (unlikely(!file->f_op->read_iter))
return -EINVAL;
- ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter);
+ ret = aio_setup_rw(READ, iocb, &iovec, vectored, &iter);
if (ret < 0)
return ret;
ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter));
@@ -1545,8 +1538,7 @@ static int aio_read(struct kiocb *req, const struct iocb *iocb,
return ret;
}
-static int aio_write(struct kiocb *req, const struct iocb *iocb,
- bool vectored, bool compat)
+static int aio_write(struct kiocb *req, const struct iocb *iocb, bool vectored)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct iov_iter iter;
@@ -1563,7 +1555,7 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb,
if (unlikely(!file->f_op->write_iter))
return -EINVAL;
- ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter);
+ ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, &iter);
if (ret < 0)
return ret;
ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter));
@@ -1799,8 +1791,7 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
}
static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
- struct iocb __user *user_iocb, struct aio_kiocb *req,
- bool compat)
+ struct iocb __user *user_iocb, struct aio_kiocb *req)
{
req->ki_filp = fget(iocb->aio_fildes);
if (unlikely(!req->ki_filp))
@@ -1833,13 +1824,13 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
switch (iocb->aio_lio_opcode) {
case IOCB_CMD_PREAD:
- return aio_read(&req->rw, iocb, false, compat);
+ return aio_read(&req->rw, iocb, false);
case IOCB_CMD_PWRITE:
- return aio_write(&req->rw, iocb, false, compat);
+ return aio_write(&req->rw, iocb, false);
case IOCB_CMD_PREADV:
- return aio_read(&req->rw, iocb, true, compat);
+ return aio_read(&req->rw, iocb, true);
case IOCB_CMD_PWRITEV:
- return aio_write(&req->rw, iocb, true, compat);
+ return aio_write(&req->rw, iocb, true);
case IOCB_CMD_FSYNC:
return aio_fsync(&req->fsync, iocb, false);
case IOCB_CMD_FDSYNC:
@@ -1852,8 +1843,7 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
}
}
-static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
- bool compat)
+static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb)
{
struct aio_kiocb *req;
struct iocb iocb;
@@ -1882,7 +1872,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
if (unlikely(!req))
return -EAGAIN;
- err = __io_submit_one(ctx, &iocb, user_iocb, req, compat);
+ err = __io_submit_one(ctx, &iocb, user_iocb, req);
/* Done with the synchronous reference */
iocb_put(req);
@@ -1941,7 +1931,7 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
break;
}
- ret = io_submit_one(ctx, user_iocb, false);
+ ret = io_submit_one(ctx, user_iocb);
if (ret)
break;
}
@@ -1983,7 +1973,7 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
break;
}
- ret = io_submit_one(ctx, compat_ptr(user_iocb), true);
+ ret = io_submit_one(ctx, compat_ptr(user_iocb));
if (ret)
break;
}
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5755d557c3f7bc..dc888f911f04b4 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2837,12 +2837,6 @@ static ssize_t __io_import_iovec(int rw, struct io_kiocb *req,
return ret;
}
-#ifdef CONFIG_COMPAT
- if (req->ctx->compat)
- return compat_import_iovec(rw, buf, sqe_len, UIO_FASTIOV,
- iovec, iter);
-#endif
-
return import_iovec(rw, buf, sqe_len, UIO_FASTIOV, iovec, iter);
}
@@ -4220,9 +4214,9 @@ static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
sr->len = iomsg->iov[0].iov_len;
iomsg->iov = NULL;
} else {
- ret = compat_import_iovec(READ, uiov, len, UIO_FASTIOV,
- &iomsg->iov,
- &iomsg->msg.msg_iter);
+ ret = import_iovec(READ, (struct iovec __user *)uiov, len,
+ UIO_FASTIOV, &iomsg->iov,
+ &iomsg->msg.msg_iter);
if (ret < 0)
return ret;
}
diff --git a/fs/read_write.c b/fs/read_write.c
index f153116bc5399b..2f961c653ce561 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -752,6 +752,38 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
return ret;
}
+static int compat_copy_iovecs_from_user(struct iovec *iov,
+ const struct iovec __user *uvector, unsigned long nr_segs)
+{
+ const struct compat_iovec __user *uiov =
+ (const struct compat_iovec __user *)uvector;
+ unsigned long i;
+ int ret = -EFAULT;
+
+ if (!user_access_begin(uvector, nr_segs * sizeof(*uvector)))
+ return -EFAULT;
+
+ for (i = 0; i < nr_segs; i++) {
+ compat_uptr_t buf;
+ compat_ssize_t len;
+
+ unsafe_get_user(len, &uiov[i].iov_len, out);
+ unsafe_get_user(buf, &uiov[i].iov_base, out);
+
+ /* check for compat_size_t not fitting in compat_ssize_t .. */
+ if (len < 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+ iov[i].iov_base = compat_ptr(buf);
+ iov[i].iov_len = len;
+ }
+ ret = 0;
+out:
+ user_access_end();
+ return ret;
+}
+
/**
* rw_copy_check_uvector() - Copy an array of &struct iovec from userspace
* into the kernel and check that it is valid.
@@ -808,6 +840,7 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
ret = -EINVAL;
goto out;
}
+
if (nr_segs > fast_segs) {
iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
if (iov == NULL) {
@@ -815,9 +848,16 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
goto out;
}
}
- if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
- ret = -EFAULT;
- goto out;
+
+ if (in_compat_syscall()) {
+ ret = compat_copy_iovecs_from_user(iov, uvector, nr_segs);
+ if (ret)
+ goto out;
+ } else {
+ if (copy_from_user(iov, uvector, nr_segs * sizeof(*uvector))) {
+ ret = -EFAULT;
+ goto out;
+ }
}
/*
@@ -855,81 +895,6 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
return ret;
}
-#ifdef CONFIG_COMPAT
-ssize_t compat_rw_copy_check_uvector(int type,
- const struct compat_iovec __user *uvector, unsigned long nr_segs,
- unsigned long fast_segs, struct iovec *fast_pointer,
- struct iovec **ret_pointer)
-{
- compat_ssize_t tot_len;
- struct iovec *iov = *ret_pointer = fast_pointer;
- ssize_t ret = 0;
- int seg;
-
- /*
- * SuS says "The readv() function *may* fail if the iovcnt argument
- * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
- * traditionally returned zero for zero segments, so...
- */
- if (nr_segs == 0)
- goto out;
-
- ret = -EINVAL;
- if (nr_segs > UIO_MAXIOV)
- goto out;
- if (nr_segs > fast_segs) {
- ret = -ENOMEM;
- iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
- if (iov == NULL)
- goto out;
- }
- *ret_pointer = iov;
-
- ret = -EFAULT;
- if (!access_ok(uvector, nr_segs*sizeof(*uvector)))
- goto out;
-
- /*
- * Single unix specification:
- * We should -EINVAL if an element length is not >= 0 and fitting an
- * ssize_t.
- *
- * In Linux, the total length is limited to MAX_RW_COUNT, there is
- * no overflow possibility.
- */
- tot_len = 0;
- ret = -EINVAL;
- for (seg = 0; seg < nr_segs; seg++) {
- compat_uptr_t buf;
- compat_ssize_t len;
-
- if (__get_user(len, &uvector->iov_len) ||
- __get_user(buf, &uvector->iov_base)) {
- ret = -EFAULT;
- goto out;
- }
- if (len < 0) /* size_t not fitting in compat_ssize_t .. */
- goto out;
- if (type != CHECK_IOVEC_ONLY &&
- !access_ok(compat_ptr(buf), len)) {
- ret = -EFAULT;
- goto out;
- }
- if (len > MAX_RW_COUNT - tot_len)
- len = MAX_RW_COUNT - tot_len;
- tot_len += len;
- iov->iov_base = compat_ptr(buf);
- iov->iov_len = (compat_size_t) len;
- uvector++;
- iov++;
- }
- ret = tot_len;
-
-out:
- return ret;
-}
-#endif
-
static ssize_t do_iter_read(struct file *file, struct iov_iter *iter,
loff_t *pos, rwf_t flags)
{
@@ -1256,7 +1221,8 @@ static size_t compat_readv(struct file *file,
struct iov_iter iter;
ssize_t ret;
- ret = compat_import_iovec(READ, vec, vlen, UIO_FASTIOV, &iov, &iter);
+ ret = import_iovec(READ, (const struct iovec __user *)vec, vlen,
+ UIO_FASTIOV, &iov, &iter);
if (ret >= 0) {
ret = do_iter_read(file, &iter, pos, flags);
kfree(iov);
@@ -1364,7 +1330,8 @@ static size_t compat_writev(struct file *file,
struct iov_iter iter;
ssize_t ret;
- ret = compat_import_iovec(WRITE, vec, vlen, UIO_FASTIOV, &iov, &iter);
+ ret = import_iovec(WRITE, (const struct iovec __user *)vec, vlen,
+ UIO_FASTIOV, &iov, &iter);
if (ret >= 0) {
file_start_write(file);
ret = do_iter_write(file, &iter, pos, flags);
diff --git a/fs/splice.c b/fs/splice.c
index d7c8a7c4db07ff..132d42b9871f9b 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1387,7 +1387,7 @@ COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, io
if (error)
return error;
- error = compat_import_iovec(type, iov32, nr_segs,
+ error = import_iovec(type, (struct iovec __user *)iov32, nr_segs,
ARRAY_SIZE(iovstack), &iov, &iter);
if (error >= 0) {
error = do_vmsplice(f.file, &iter, flags);
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 69968c124b3cad..ad6dc56e8828d6 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -451,12 +451,6 @@ extern long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
struct epoll_event; /* fortunately, this one is fixed-layout */
-extern ssize_t compat_rw_copy_check_uvector(int type,
- const struct compat_iovec __user *uvector,
- unsigned long nr_segs,
- unsigned long fast_segs, struct iovec *fast_pointer,
- struct iovec **ret_pointer);
-
extern void __user *compat_alloc_user_space(unsigned long len);
int compat_restore_altstack(const compat_stack_t __user *uss);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7519ae003a082c..3cc0ee0de45648 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -179,10 +179,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
#define FMODE_BUF_RASYNC ((__force fmode_t)0x40000000)
/*
- * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
- * that indicates that they should check the contents of the iovec are
- * valid, but not check the memory that the iovec elements
- * points too.
+ * Flag for rw_copy_check_uvector that indicates that they should check the
+ * contents of the iovec are valid, but not check the memory that the iovec
+ * elements points too.
*/
#define CHECK_IOVEC_ONLY -1
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 3835a8a8e9eae0..2c14e55687fec6 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -270,13 +270,6 @@ ssize_t import_iovec(int type, const struct iovec __user * uvector,
unsigned nr_segs, unsigned fast_segs,
struct iovec **iov, struct iov_iter *i);
-#ifdef CONFIG_COMPAT
-struct compat_iovec;
-ssize_t compat_import_iovec(int type, const struct compat_iovec __user * uvector,
- unsigned nr_segs, unsigned fast_segs,
- struct iovec **iov, struct iov_iter *i);
-#endif
-
int import_single_range(int type, void __user *buf, size_t len,
struct iovec *iov, struct iov_iter *i);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 5e40786c8f1232..792f31c1cd96ba 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -10,6 +10,7 @@
#include <net/checksum.h>
#include <linux/scatterlist.h>
#include <linux/instrumented.h>
+#include <linux/compat.h>
#define PIPE_PARANOIA /* for now */
@@ -1678,32 +1679,8 @@ ssize_t import_iovec(int type, const struct iovec __user * uvector,
{
ssize_t n;
struct iovec *p;
- n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
- *iov, &p);
- if (n < 0) {
- if (p != *iov)
- kfree(p);
- *iov = NULL;
- return n;
- }
- iov_iter_init(i, type, p, nr_segs, n);
- *iov = p == *iov ? NULL : p;
- return n;
-}
-EXPORT_SYMBOL(import_iovec);
-#ifdef CONFIG_COMPAT
-#include <linux/compat.h>
-
-ssize_t compat_import_iovec(int type,
- const struct compat_iovec __user * uvector,
- unsigned nr_segs, unsigned fast_segs,
- struct iovec **iov, struct iov_iter *i)
-{
- ssize_t n;
- struct iovec *p;
- n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
- *iov, &p);
+ n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs, *iov, &p);
if (n < 0) {
if (p != *iov)
kfree(p);
@@ -1714,8 +1691,7 @@ ssize_t compat_import_iovec(int type,
*iov = p == *iov ? NULL : p;
return n;
}
-EXPORT_SYMBOL(compat_import_iovec);
-#endif
+EXPORT_SYMBOL(import_iovec);
int import_single_range(int rw, void __user *buf, size_t len,
struct iovec *iov, struct iov_iter *i)
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index 29c052099affdc..f21feebbd48f39 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -329,14 +329,15 @@ compat_process_vm_rw(compat_pid_t pid,
if (flags != 0)
return -EINVAL;
- rc = compat_import_iovec(dir, lvec, liovcnt, UIO_FASTIOV, &iov_l, &iter);
+ rc = import_iovec(dir, (const struct iovec __user *)lvec, liovcnt,
+ UIO_FASTIOV, &iov_l, &iter);
if (rc < 0)
return rc;
if (!iov_iter_count(&iter))
goto free_iovecs;
- rc = compat_rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt,
- UIO_FASTIOV, iovstack_r,
- &iov_r);
+ rc = rw_copy_check_uvector(CHECK_IOVEC_ONLY,
+ (const struct iovec __user *)rvec, riovcnt,
+ UIO_FASTIOV, iovstack_r, &iov_r);
if (rc <= 0)
goto free_iovecs;
diff --git a/net/compat.c b/net/compat.c
index 95ce707a30a31d..ddd15af3a2837b 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -98,8 +98,8 @@ int get_compat_msghdr(struct msghdr *kmsg,
if (err)
return err;
- err = compat_import_iovec(save_addr ? READ : WRITE, compat_ptr(ptr),
- len, UIO_FASTIOV, iov, &kmsg->msg_iter);
+ err = import_iovec(save_addr ? READ : WRITE, compat_ptr(ptr), len,
+ UIO_FASTIOV, iov, &kmsg->msg_iter);
return err < 0 ? err : 0;
}
diff --git a/security/keys/compat.c b/security/keys/compat.c
index 6ee9d8f6a4a5bb..7ae531db031cf8 100644
--- a/security/keys/compat.c
+++ b/security/keys/compat.c
@@ -33,9 +33,8 @@ static long compat_keyctl_instantiate_key_iov(
if (!_payload_iov)
ioc = 0;
- ret = compat_import_iovec(WRITE, _payload_iov, ioc,
- ARRAY_SIZE(iovstack), &iov,
- &from);
+ ret = import_iovec(WRITE, (const struct iovec __user *)_payload_iov,
+ ioc, ARRAY_SIZE(iovstack), &iov, &from);
if (ret < 0)
return ret;
--
2.28.0
^ permalink raw reply related
* [GIT PULL] Please pull powerpc/linux.git powerpc-5.9-5 tag
From: Michael Ellerman @ 2020-09-18 12:20 UTC (permalink / raw)
To: Linus Torvalds
Cc: ego, aik, aneesh.kumar, linux-kernel, vaibhav, linuxppc-dev
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA256
Hi Linus,
Please pull some more powerpc fixes for 5.9:
The following changes since commit 4a133eb351ccc275683ad49305d0b04dde903733:
powerpc/32s: Disable VMAP stack which CONFIG_ADB_PMU (2020-08-28 12:03:18 +1000)
are available in the git repository at:
https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git tags/powerpc-5.9-5
for you to fetch changes up to 0460534b532e5518c657c7d6492b9337d975eaa3:
powerpc/papr_scm: Limit the readability of 'perf_stats' sysfs attribute (2020-09-09 14:44:38 +1000)
- ------------------------------------------------------------------
powerpc fixes for 5.9 #5
Opt us out of the DEBUG_VM_PGTABLE support for now as it's causing crashes.
Fix a long standing bug in our DMA mask handling that was hidden until recently,
and which caused problems with some drivers.
Fix a boot failure on systems with large amounts of RAM, and no hugepage support
and using Radix MMU, only seen in the lab.
A few other minor fixes.
Thanks to:
Alexey Kardashevskiy, Aneesh Kumar K.V, Gautham R. Shenoy, Hari Bathini, Ira
Weiny, Nick Desaulniers, Shirisha Ganta, Vaibhav Jain, Vaidyanathan
Srinivasan.
- ------------------------------------------------------------------
Alexey Kardashevskiy (1):
powerpc/dma: Fix dma_map_ops::get_required_mask
Aneesh Kumar K.V (2):
powerpc/book3s64/radix: Fix boot failure with large amount of guest memory
powerpc/mm: Remove DEBUG_VM_PGTABLE support on powerpc
Gautham R. Shenoy (1):
cpuidle: pseries: Fix CEDE latency conversion from tb to us
Michael Ellerman (2):
selftests/powerpc: Skip PROT_SAO test in guests/LPARS
Revert "powerpc/build: vdso linker warning for orphan sections"
Vaibhav Jain (1):
powerpc/papr_scm: Limit the readability of 'perf_stats' sysfs attribute
Documentation/features/debug/debug-vm-pgtable/arch-support.txt | 2 +-
arch/powerpc/Kconfig | 1 -
arch/powerpc/include/asm/book3s/64/mmu.h | 10 +++++-----
arch/powerpc/kernel/dma-iommu.c | 3 ++-
arch/powerpc/kernel/vdso32/Makefile | 2 +-
arch/powerpc/kernel/vdso32/vdso32.lds.S | 1 -
arch/powerpc/kernel/vdso64/Makefile | 2 +-
arch/powerpc/kernel/vdso64/vdso64.lds.S | 3 +--
arch/powerpc/mm/book3s64/radix_pgtable.c | 15 ---------------
arch/powerpc/mm/init_64.c | 11 +++++++++--
arch/powerpc/platforms/pseries/papr_scm.c | 2 +-
drivers/cpuidle/cpuidle-pseries.c | 15 +++++++++++----
tools/testing/selftests/powerpc/mm/prot_sao.c | 9 +++++++--
13 files changed, 39 insertions(+), 37 deletions(-)
-----BEGIN PGP SIGNATURE-----
iQIzBAEBCAAdFiEEJFGtCPCthwEv2Y/bUevqPMjhpYAFAl9kpeIACgkQUevqPMjh
pYC4aQ/9HwhZgP4aLepJ51l+SajCw7GkDco50MorgjJQLgP2t2Yua/bE2VdB4V4E
PFKq0ZFXytFRT6/njIFZVDsvrL5RbEyN5vHq7hwrmR3B+VJybQOzdyxncZUBlP7n
RZAQS/MvMAu+OR2NFG0QLo9zx4FA9QKMEVYbr20Eftw4l613hP6yV+ubxYR/ww/S
JvZw7JlyBRQfvpH8rb2x2sa1CLtPWGrZyUKWOQx8CTdIClgO7oghMGKAz5PyL+li
AyaIT5e9QKJw5qNUI7Mv56oat+dBHz0xRKSEhrYhjU9LfJ7HBCK23C7l3Wzw5OQO
94t3aAaCIa67uPk2TMdblM8aUN75hKxmRHg5GIBfTyhQKlWptb7A4M6BVd9sDm+d
ggoF+LOZfypoM/xgFPAvVtdyacmRHfhZ+OHILPsTL3IKZRK2Lmr6CgJgY12Dzelk
HudpQW58Egq/ZxwHH66UN6JzteYq7H6oKW7qmiJFftWm00Z78uSFlwFv9dX6aj1W
CGVcANquLY5x6WjrYr2HZa5dfU3rnSRNMrTrXz9S6+ctboGl63pkIfWS1dQ4nB/7
9SmPVivCK/gb1Sdv9LogGYTAgPgBUbC5lYzg1NlR3hOXyWre5P+do68RZFzujCtS
EO4Phx+h+duzLXth35dQQ3tkhn2u3S7tuuXq0s4KT8ZD2TRWc8I=
=UdKu
-----END PGP SIGNATURE-----
^ permalink raw reply
* Re: [PATCH v2 1/4] mm: fix exec activate_mm vs TLB shootdown and lazy tlb switching race
From: Michael Ellerman @ 2020-09-18 12:18 UTC (permalink / raw)
To: Nicholas Piggin, peterz
Cc: Jens Axboe, linux-arch, Dave Hansen, Aneesh Kumar K . V,
linux-kernel, Andy Lutomirski, linux-mm @ kvack . org, sparclinux,
Andrew Morton, linuxppc-dev, David S . Miller
In-Reply-To: <1600137586.nypnz3sbcl.astroid@bobo.none>
Nicholas Piggin <npiggin@gmail.com> writes:
> Excerpts from peterz@infradead.org's message of September 14, 2020 8:56 pm:
>> On Mon, Sep 14, 2020 at 02:52:16PM +1000, Nicholas Piggin wrote:
>>> Reading and modifying current->mm and current->active_mm and switching
>>> mm should be done with irqs off, to prevent races seeing an intermediate
>>> state.
...
>>>
>>> This is a bit ugly, but in the interest of fixing the bug and backporting
>>> before all architectures are converted this is a compromise.
>>>
>>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>>
>> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
>>
>> I'm thinking we want this selected on x86 as well. Andy?
>
> Thanks for the ack. The plan was to take it through the powerpc tree,
> but if you'd want x86 to select it, maybe a topic branch?
I've put this series in a topic branch based on v5.9-rc2:
https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git/log/?h=topic/irqs-off-activate-mm
I plan to merge it into the powerpc/next tree for v5.10, but if anyone
else wants to merge it that's fine too.
cheers
^ permalink raw reply
* Re: [PATCH 2/3] powerpc/mce: Add debugfs interface to inject MCE
From: Ganesh @ 2020-09-18 12:07 UTC (permalink / raw)
To: Michael Ellerman, linuxppc-dev; +Cc: mahesh, msuchanek, Kees Cook, npiggin
In-Reply-To: <87a6xnoadk.fsf@mpe.ellerman.id.au>
On 9/18/20 12:10 PM, Michael Ellerman wrote:
> Hi Ganesh,
>
> Ganesh Goudar <ganeshgr@linux.ibm.com> writes:
>> To test machine check handling, add debugfs interface to inject
>> slb multihit errors.
>>
>> To inject slb multihit:
>> #echo 1 > /sys/kernel/debug/powerpc/mce_error_inject/inject_slb_multihit
> Rather than creating a new ad-hoc way to trigger this, can you please
> integrate it into drivers/misc/lkdtm.
>
> There's enough code here that I think you should create
> drivers/misc/lkdtm/powerpc.c and put the code in there. Then add an
> LKDTM entry point for this, maybe called PPC_SLB_MULTIHIT.
>
> Please Cc Kees when you repost.
Sure, Thanks
> cheers
>
>
>> arch/powerpc/Kconfig.debug | 9 ++
>> arch/powerpc/sysdev/Makefile | 2 +
>> arch/powerpc/sysdev/mce_error_inject.c | 148 +++++++++++++++++++++++++
>> 3 files changed, 159 insertions(+)
>> create mode 100644 arch/powerpc/sysdev/mce_error_inject.c
>>
>> diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
>> index b88900f4832f..61db133f2f0d 100644
>> --- a/arch/powerpc/Kconfig.debug
>> +++ b/arch/powerpc/Kconfig.debug
>> @@ -398,3 +398,12 @@ config KASAN_SHADOW_OFFSET
>> hex
>> depends on KASAN
>> default 0xe0000000
>> +
>> +config MCE_ERROR_INJECT
>> + bool "Enable MCE error injection through debugfs"
>> + depends on DEBUG_FS
>> + default y
>> + help
>> + This option creates an mce_error_inject directory in the
>> + powerpc debugfs directory that allows limited injection of
>> + Machine Check Errors (MCEs).
>> diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
>> index 026b3f01a991..7fc102222b77 100644
>> --- a/arch/powerpc/sysdev/Makefile
>> +++ b/arch/powerpc/sysdev/Makefile
>> @@ -52,3 +52,5 @@ obj-$(CONFIG_PPC_XICS) += xics/
>> obj-$(CONFIG_PPC_XIVE) += xive/
>>
>> obj-$(CONFIG_GE_FPGA) += ge/
>> +
>> +obj-$(CONFIG_MCE_ERROR_INJECT) += mce_error_inject.o
>> diff --git a/arch/powerpc/sysdev/mce_error_inject.c b/arch/powerpc/sysdev/mce_error_inject.c
>> new file mode 100644
>> index 000000000000..ca4726bfa2d9
>> --- /dev/null
>> +++ b/arch/powerpc/sysdev/mce_error_inject.c
>> @@ -0,0 +1,148 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Machine Check Exception injection code
>> + */
>> +
>> +#include <linux/kernel.h>
>> +#include <linux/slab.h>
>> +#include <linux/vmalloc.h>
>> +#include <linux/fs.h>
>> +#include <linux/debugfs.h>
>> +#include <asm/debugfs.h>
>> +
>> +static inline unsigned long get_slb_index(void)
>> +{
>> + unsigned long index;
>> +
>> + index = get_paca()->stab_rr;
>> +
>> + /*
>> + * simple round-robin replacement of slb starting at SLB_NUM_BOLTED.
>> + */
>> + if (index < (mmu_slb_size - 1))
>> + index++;
>> + else
>> + index = SLB_NUM_BOLTED;
>> + get_paca()->stab_rr = index;
>> + return index;
>> +}
>> +
>> +#define slb_esid_mask(ssize) \
>> + (((ssize) == MMU_SEGSIZE_256M) ? ESID_MASK : ESID_MASK_1T)
>> +
>> +static inline unsigned long mk_esid_data(unsigned long ea, int ssize,
>> + unsigned long slot)
>> +{
>> + return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | slot;
>> +}
>> +
>> +#define slb_vsid_shift(ssize) \
>> + ((ssize) == MMU_SEGSIZE_256M ? SLB_VSID_SHIFT : SLB_VSID_SHIFT_1T)
>> +
>> +static inline unsigned long mk_vsid_data(unsigned long ea, int ssize,
>> + unsigned long flags)
>> +{
>> + return (get_kernel_vsid(ea, ssize) << slb_vsid_shift(ssize)) | flags |
>> + ((unsigned long)ssize << SLB_VSID_SSIZE_SHIFT);
>> +}
>> +
>> +static void insert_slb_entry(char *p, int ssize)
>> +{
>> + unsigned long flags, entry;
>> + struct paca_struct *paca;
>> +
>> + flags = SLB_VSID_KERNEL | mmu_psize_defs[MMU_PAGE_64K].sllp;
>> +
>> + preempt_disable();
>> +
>> + paca = get_paca();
>> +
>> + entry = get_slb_index();
>> + asm volatile("slbmte %0,%1" :
>> + : "r" (mk_vsid_data((unsigned long)p, ssize, flags)),
>> + "r" (mk_esid_data((unsigned long)p, ssize, entry))
>> + : "memory");
>> +
>> + entry = get_slb_index();
>> + asm volatile("slbmte %0,%1" :
>> + : "r" (mk_vsid_data((unsigned long)p, ssize, flags)),
>> + "r" (mk_esid_data((unsigned long)p, ssize, entry))
>> + : "memory");
>> + preempt_enable();
>> + p[0] = '!';
>> +}
>> +
>> +static void inject_vmalloc_slb_multihit(void)
>> +{
>> + char *p;
>> +
>> + p = vmalloc(2048);
>> + if (!p)
>> + return;
>> +
>> + insert_slb_entry(p, MMU_SEGSIZE_1T);
>> + vfree(p);
>> +}
>> +
>> +static void inject_kmalloc_slb_multihit(void)
>> +{
>> + char *p;
>> +
>> + p = kmalloc(2048, GFP_KERNEL);
>> + if (!p)
>> + return;
>> +
>> + insert_slb_entry(p, MMU_SEGSIZE_1T);
>> + kfree(p);
>> +}
>> +
>> +static ssize_t inject_slb_multihit(const char __user *u_buf, size_t count)
>> +{
>> + char buf[32];
>> + size_t buf_size;
>> +
>> + buf_size = min(count, (sizeof(buf) - 1));
>> + if (copy_from_user(buf, u_buf, buf_size))
>> + return -EFAULT;
>> + buf[buf_size] = '\0';
>> +
>> + if (buf[0] != '1')
>> + return -EINVAL;
>> +
>> + inject_vmalloc_slb_multihit();
>> + inject_kmalloc_slb_multihit();
>> + return count;
>> +}
>> +
>> +static ssize_t inject_write(struct file *file, const char __user *buf,
>> + size_t count, loff_t *ppos)
>> +{
>> + static ssize_t (*func)(const char __user *, size_t);
>> +
>> + func = file->f_inode->i_private;
>> + return func(buf, count);
>> +}
>> +
>> +static const struct file_operations inject_fops = {
>> + .write = inject_write,
>> + .llseek = default_llseek,
>> +};
>> +
>> +static int mce_error_inject_setup(void)
>> +{
>> + struct dentry *mce_error_inject_dir;
>> +
>> + mce_error_inject_dir = debugfs_create_dir("mce_error_inject",
>> + powerpc_debugfs_root);
>> +
>> + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
>> + (void)debugfs_create_file("inject_slb_multihit", 0200,
>> + mce_error_inject_dir,
>> + &inject_slb_multihit,
>> + &inject_fops);
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +device_initcall(mce_error_inject_setup);
>> --
>> 2.26.2
^ permalink raw reply
* Re: [PATCH 2/3] powerpc/mce: Add debugfs interface to inject MCE
From: Ganesh @ 2020-09-18 12:06 UTC (permalink / raw)
To: Michal Suchánek; +Cc: linuxppc-dev, npiggin, mahesh
In-Reply-To: <20200917122348.GQ29778@kitsune.suse.cz>
On 9/17/20 5:53 PM, Michal Suchánek wrote:
> Hello,
>
> On Wed, Sep 16, 2020 at 10:52:27PM +0530, Ganesh Goudar wrote:
>> To test machine check handling, add debugfs interface to inject
>> slb multihit errors.
>>
>> To inject slb multihit:
>> #echo 1 > /sys/kernel/debug/powerpc/mce_error_inject/inject_slb_multihit
>>
>> Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com>
>> Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
>> ---
>> arch/powerpc/Kconfig.debug | 9 ++
>> arch/powerpc/sysdev/Makefile | 2 +
>> arch/powerpc/sysdev/mce_error_inject.c | 148 +++++++++++++++++++++++++
>> 3 files changed, 159 insertions(+)
>> create mode 100644 arch/powerpc/sysdev/mce_error_inject.c
>>
>> diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
>> index b88900f4832f..61db133f2f0d 100644
>> --- a/arch/powerpc/Kconfig.debug
>> +++ b/arch/powerpc/Kconfig.debug
>> @@ -398,3 +398,12 @@ config KASAN_SHADOW_OFFSET
>> hex
>> depends on KASAN
>> default 0xe0000000
>> +
>> +config MCE_ERROR_INJECT
>> + bool "Enable MCE error injection through debugfs"
>> + depends on DEBUG_FS
>> + default y
>> + help
>> + This option creates an mce_error_inject directory in the
>> + powerpc debugfs directory that allows limited injection of
>> + Machine Check Errors (MCEs).
>> diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
>> index 026b3f01a991..7fc102222b77 100644
>> --- a/arch/powerpc/sysdev/Makefile
>> +++ b/arch/powerpc/sysdev/Makefile
>> @@ -52,3 +52,5 @@ obj-$(CONFIG_PPC_XICS) += xics/
>> obj-$(CONFIG_PPC_XIVE) += xive/
>>
>> obj-$(CONFIG_GE_FPGA) += ge/
>> +
>> +obj-$(CONFIG_MCE_ERROR_INJECT) += mce_error_inject.o
>> diff --git a/arch/powerpc/sysdev/mce_error_inject.c b/arch/powerpc/sysdev/mce_error_inject.c
>> new file mode 100644
>> index 000000000000..ca4726bfa2d9
>> --- /dev/null
>> +++ b/arch/powerpc/sysdev/mce_error_inject.c
>> @@ -0,0 +1,148 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Machine Check Exception injection code
>> + */
>> +
>> +#include <linux/kernel.h>
>> +#include <linux/slab.h>
>> +#include <linux/vmalloc.h>
>> +#include <linux/fs.h>
>> +#include <linux/debugfs.h>
>> +#include <asm/debugfs.h>
>> +
>> +static inline unsigned long get_slb_index(void)
>> +{
>> + unsigned long index;
>> +
>> + index = get_paca()->stab_rr;
>> +
>> + /*
>> + * simple round-robin replacement of slb starting at SLB_NUM_BOLTED.
>> + */
>> + if (index < (mmu_slb_size - 1))
>> + index++;
>> + else
>> + index = SLB_NUM_BOLTED;
>> + get_paca()->stab_rr = index;
>> + return index;
>> +}
>> +
>> +#define slb_esid_mask(ssize) \
>> + (((ssize) == MMU_SEGSIZE_256M) ? ESID_MASK : ESID_MASK_1T)
>> +
>> +static inline unsigned long mk_esid_data(unsigned long ea, int ssize,
>> + unsigned long slot)
>> +{
>> + return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | slot;
>> +}
>> +
>> +#define slb_vsid_shift(ssize) \
>> + ((ssize) == MMU_SEGSIZE_256M ? SLB_VSID_SHIFT : SLB_VSID_SHIFT_1T)
>> +
>> +static inline unsigned long mk_vsid_data(unsigned long ea, int ssize,
>> + unsigned long flags)
>> +{
>> + return (get_kernel_vsid(ea, ssize) << slb_vsid_shift(ssize)) | flags |
>> + ((unsigned long)ssize << SLB_VSID_SSIZE_SHIFT);
>> +}
>> +
>> +static void insert_slb_entry(char *p, int ssize)
>> +{
>> + unsigned long flags, entry;
>> + struct paca_struct *paca;
>> +
>> + flags = SLB_VSID_KERNEL | mmu_psize_defs[MMU_PAGE_64K].sllp;
>> +
>> + preempt_disable();
>> +
>> + paca = get_paca();
> This seems unused?
Thanks, ill remove it.
>> +
>> + entry = get_slb_index();
>> + asm volatile("slbmte %0,%1" :
>> + : "r" (mk_vsid_data((unsigned long)p, ssize, flags)),
>> + "r" (mk_esid_data((unsigned long)p, ssize, entry))
>> + : "memory");
>> +
>> + entry = get_slb_index();
>> + asm volatile("slbmte %0,%1" :
>> + : "r" (mk_vsid_data((unsigned long)p, ssize, flags)),
>> + "r" (mk_esid_data((unsigned long)p, ssize, entry))
>> + : "memory");
>> + preempt_enable();
>> + p[0] = '!';
>> +}
>> +
>> +static void inject_vmalloc_slb_multihit(void)
>> +{
>> + char *p;
>> +
>> + p = vmalloc(2048);
>> + if (!p)
>> + return;
>> +
>> + insert_slb_entry(p, MMU_SEGSIZE_1T);
>> + vfree(p);
>> +}
>> +
>> +static void inject_kmalloc_slb_multihit(void)
>> +{
>> + char *p;
>> +
>> + p = kmalloc(2048, GFP_KERNEL);
>> + if (!p)
>> + return;
>> +
>> + insert_slb_entry(p, MMU_SEGSIZE_1T);
>> + kfree(p);
>> +}
>> +
>> +static ssize_t inject_slb_multihit(const char __user *u_buf, size_t count)
>> +{
>> + char buf[32];
>> + size_t buf_size;
>> +
>> + buf_size = min(count, (sizeof(buf) - 1));
>> + if (copy_from_user(buf, u_buf, buf_size))
>> + return -EFAULT;
>> + buf[buf_size] = '\0';
>> +
>> + if (buf[0] != '1')
>> + return -EINVAL;
>> +
>> + inject_vmalloc_slb_multihit();
>> + inject_kmalloc_slb_multihit();
> This is missing the test of multihit in paca which is for some reason
> special.
I will add it, Thanks
> Thanks
>
> Michal
>> + return count;
>> +}
>> +
>> +static ssize_t inject_write(struct file *file, const char __user *buf,
>> + size_t count, loff_t *ppos)
>> +{
>> + static ssize_t (*func)(const char __user *, size_t);
>> +
>> + func = file->f_inode->i_private;
>> + return func(buf, count);
>> +}
>> +
>> +static const struct file_operations inject_fops = {
>> + .write = inject_write,
>> + .llseek = default_llseek,
>> +};
>> +
>> +static int mce_error_inject_setup(void)
>> +{
>> + struct dentry *mce_error_inject_dir;
>> +
>> + mce_error_inject_dir = debugfs_create_dir("mce_error_inject",
>> + powerpc_debugfs_root);
>> +
>> + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
>> + (void)debugfs_create_file("inject_slb_multihit", 0200,
>> + mce_error_inject_dir,
>> + &inject_slb_multihit,
>> + &inject_fops);
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +device_initcall(mce_error_inject_setup);
>> --
>> 2.26.2
>>
^ permalink raw reply
* Re: [PATCH 6/6] powerpc/64: irq replay remove decrementer overflow check
From: Michael Ellerman @ 2020-09-18 12:06 UTC (permalink / raw)
To: Nicholas Piggin, linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20200915114650.3980244-6-npiggin@gmail.com>
Nicholas Piggin <npiggin@gmail.com> writes:
> This is an ad-hoc way to catch some cases of decrementer overflow. It
> won't catch cases where interrupts were hard disabled before any soft
> masked interrupts fired, for example. And it doesn't catch cases that
> have overflowed an even number of times.
>
> It's not clear what exactly what problem s being solved here. A lost
> timer when we have an IRQ off latency of more than ~4.3 seconds could
> be avoided (so long as it's also less than ~8.6s) but this is already
> a hard lockup order of magnitude event, and the decrementer will wrap
> again and provide a timer interrupt within the same latency magnitdue.
>
> So the test catches some cases of lost decrementers in very exceptional
> (buggy) latency event cases, reducing timer interrupt latency in that
> case by up to 4.3 seconds. And for large decrementer, it's useless. It
> is performed in potentially quite a hot path, reading the TB can be
> a noticable overhead.
>
> Perhaps more importantly it allows the clunky MSR[EE] vs
> PACA_IRQ_HARD_DIS incoherency to be removed.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> arch/powerpc/kernel/irq.c | 50 +--------------------------------------
> 1 file changed, 1 insertion(+), 49 deletions(-)
Seems to be unhappy on qemu ppc64e:
kernel BUG at arch/powerpc/kernel/irq.c:153!
Which is:
notrace unsigned int __check_irq_replay(void)
{
...
/* There should be nothing left ! */
BUG_ON(local_paca->irq_happened != 0);
return 0;
}
Full log below.
cheers
spawn qemu-system-ppc64 -nographic -M ppce500 -cpu e5500 -m 2G -kernel /home/michael/build/adhoc/ci_output/build/corenet64_smp_defconfig@ppc64@korg@10.1.0/uImage -initrd ppc64-novsx-rootfs.cpio.gz -append noreboot
MMU: Supported page sizes
4 KB as direct
4096 KB as direct
16384 KB as direct
65536 KB as direct
262144 KB as direct
1048576 KB as direct
MMU: Book3E HW tablewalk not supported
Linux version 5.9.0-rc2-00187-gf523995cc1ee (linuxppc@e054daee57c9) (powerpc64-linux-gnu-gcc (GCC) 10.1.0, GNU ld (GNU Binutils) 2.34) #1 SMP Fri Sep 18 11:52:25 Australia 2020
Found initrd at 0xc000000005000000:0xc0000000051e9a47
Using QEMU e500 machine description
ioremap() called early from .find_legacy_serial_ports+0x6cc/0x7bc. Use early_ioremap() instead
printk: bootconsole [udbg0] enabled
CPU maps initialized for 1 thread per core
-----------------------------------------------------
phys_mem_size = 0x80000000
dcache_bsize = 0x40
icache_bsize = 0x40
cpu_features = 0x00000003008001b4
possible = 0x00000003009003b6
always = 0x00000003008003b4
cpu_user_features = 0xcc008000 0x08000000
mmu_features = 0x000a0010
firmware_features = 0x0000000000000000
-----------------------------------------------------
qemu_e500_setup_arch()
barrier-nospec: using isync; sync as speculation barrier
Zone ranges:
DMA [mem 0x0000000000000000-0x000000007fffffff]
Normal empty
Movable zone start for each node
Early memory node ranges
node 0: [mem 0x0000000000000000-0x000000007fffffff]
Initmem setup node 0 [mem 0x0000000000000000-0x000000007fffffff]
MMU: Allocated 2112 bytes of context maps for 255 contexts
percpu: Embedded 28 pages/cpu s77400 r0 d37288 u1048576
Built 1 zonelists, mobility grouping on. Total pages: 517120
Kernel command line: noreboot
Dentry cache hash table entries: 262144 (order: 9, 2097152 bytes, linear)
Inode-cache hash table entries: 131072 (order: 8, 1048576 bytes, linear)
mem auto-init: stack:off, heap alloc:off, heap free:off
Memory: 1977432K/2097152K available (12048K kernel code, 2204K rwdata, 3788K rodata, 460K init, 321K bss, 119720K reserved, 0K cma-reserved)
SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=1, Nodes=1
rcu: Hierarchical RCU implementation.
rcu: RCU event tracing is enabled.
rcu: RCU restricting CPUs from NR_CPUS=24 to nr_cpu_ids=1.
rcu: RCU calculated value of scheduler-enlistment delay is 25 jiffies.
rcu: Adjusting geometry for rcu_fanout_leaf=16, nr_cpu_ids=1
NR_IRQS: 512, nr_irqs: 512, preallocated irqs: 16
mpic: Setting up MPIC " OpenPIC " version 1.2 at fe0040000, max 1 CPUs
mpic: ISU size: 256, shift: 8, mask: ff
mpic: Initializing for 256 sources
random: get_random_u64 called from .start_kernel+0x498/0x70c with crng_init=0
clocksource: timebase: mask: 0xffffffffffffffff max_cycles: 0x5c4093a7d1, max_idle_ns: 440795210635 ns
clocksource: timebase mult[2800000] shift[24] registered
Console: colour dummy device 80x25
pid_max: default: 32768 minimum: 301
Mount-cache hash table entries: 4096 (order: 3, 32768 bytes, linear)
Mountpoint-cache hash table entries: 4096 (order: 3, 32768 bytes, linear)
e500 family performance monitor hardware support registered
rcu: Hierarchical SRCU implementation.
smp: Bringing up secondary CPUs ...
smp: Brought up 1 node, 1 CPU
devtmpfs: initialized
clocksource: jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 7645041785100000 ns
futex hash table entries: 256 (order: 2, 16384 bytes, linear)
------------[ cut here ]------------
kernel BUG at arch/powerpc/kernel/irq.c:153!
Oops: Exception in kernel mode, sig: 5 [#1]
BE PAGE_SIZE=4K SMP NR_CPUS=24 QEMU e500
Modules linked in:
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.9.0-rc2-00187-gf523995cc1ee #1
NIP: c0000000000039d4 LR: c00000000001a634 CTR: c000000000594d00
REGS: c00000007d0df5e0 TRAP: 0700 Not tainted (5.9.0-rc2-00187-gf523995cc1ee)
MSR: 0000000080021000 <CE,ME> CR: 28000242 XER: 20000000
IRQMASK: 1
GPR00: 0000000000000001 c00000007d0df870 c0000000011e2200 0000000000000000
GPR04: 0000000000000800 0000000000000000 0000000000000001 c00000000116eec0
GPR08: 0000000000000000 0000000000000001 0000000000000000 0000000000000300
GPR12: 0000000028000242 c000000001263000 c000000000002434 0000000000000000
GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
GPR20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
GPR24: c00000000110b248 c000000000fd3728 c000000000fc1ef8 c000000000f715f8
GPR28: c0000000010c8700 0000000000000000 0000000000000002 0000000000000001
NIP [c0000000000039d4] .__check_irq_replay+0x24/0x60
LR [c00000000001a634] fast_exception_return+0xe0/0x140
Call Trace:
[c00000007d0df870] [c00000000001a37c] storage_fault_common+0x40/0x44 (unreliable)
--- interrupt: 300 at .__se_sys_futex_time32+0x1fc/0x2c8
LR = .futex_init+0xbc/0x144
[c00000007d0dfb70] [c000000000f8af88] .futex_init+0x88/0x144 (unreliable)
[c00000007d0dfc10] [c000000000001ed8] .do_one_initcall+0x6c/0x28c
[c00000007d0dfcf0] [c000000000f79258] .kernel_init_freeable+0x298/0x314
[c00000007d0dfda0] [c000000000002450] .kernel_init+0x1c/0x138
[c00000007d0dfe20] [c00000000000082c] .ret_from_kernel_thread+0x58/0x60
Instruction dump:
4bffff64 48bb8a49 60000000 892d067b 71280008 40820028 71280004 40820040
71280002 40820028 3149ffff 7d2a4910 <0b090000> 38600000 4e800020 55290776
---[ end trace 276324e35afb2c5e ]---
Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000005
Rebooting in 180 seconds..
System Halted, OK to tu
^ permalink raw reply
* Re: [PATCH 1/3] powerpc/mce: remove nmi_enter/exit from real mode handler
From: Ganesh @ 2020-09-18 12:04 UTC (permalink / raw)
To: Michal Suchánek, npiggin; +Cc: linuxppc-dev, mahesh
In-Reply-To: <20200917122028.GP29778@kitsune.suse.cz>
On 9/17/20 5:50 PM, Michal Suchánek wrote:
> Hello,
>
> On Wed, Sep 16, 2020 at 10:52:26PM +0530, Ganesh Goudar wrote:
>> Use of nmi_enter/exit in real mode handler causes the kernel to panic
>> and reboot on injecting slb mutihit on pseries machine running in hash
>> mmu mode, As these calls try to accesses memory outside RMO region in
>> real mode handler where translation is disabled.
>>
>> Add check to not to use these calls on pseries machine running in hash
>> mmu mode.
>>
>> Fixes: 116ac378bb3f ("powerpc/64s: machine check interrupt update NMI accounting")
>> Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com>
>> ---
>> arch/powerpc/kernel/mce.c | 7 ++++++-
>> 1 file changed, 6 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
>> index ada59f6c4298..1d42fe0f5f9c 100644
>> --- a/arch/powerpc/kernel/mce.c
>> +++ b/arch/powerpc/kernel/mce.c
>> @@ -591,10 +591,15 @@ EXPORT_SYMBOL_GPL(machine_check_print_event_info);
>> long notrace machine_check_early(struct pt_regs *regs)
>> {
>> long handled = 0;
>> - bool nested = in_nmi();
>> + bool nested;
>> + bool is_pseries_hpt_guest;
>> u8 ftrace_enabled = this_cpu_get_ftrace_enabled();
>>
>> this_cpu_set_ftrace_enabled(0);
>> + is_pseries_hpt_guest = machine_is(pseries) &&
>> + mmu_has_feature(MMU_FTR_HPTE_TABLE);
>> + /* Do not use nmi_enter/exit for pseries hpte guest */
>> + nested = is_pseries_hpt_guest ? true : in_nmi();
> As pointed out already in another comment nesting is supported natively
> since 69ea03b56ed2c7189ccd0b5910ad39f3cad1df21. You can simply do
> nmi_enter and nmi_exit unconditionally - or only based on
> is_pseries_hpt_guest.
ok
> The other question is what is the value of calling nmi_enter here at
> all. It crashes in one case, we simply skip it for that case, and we are
> good. Maybe we could skip it altogether?
Not sure why nmi_enter/exit is needed here, Again, Nick may have a reason.
> Thanks
>
> Michal
^ permalink raw reply
* Re: [PATCH 0/3] powerpc/mce: Fix mce handler and add selftest
From: Ganesh @ 2020-09-18 11:49 UTC (permalink / raw)
To: Michal Suchánek, npiggin; +Cc: linuxppc-dev, mahesh
In-Reply-To: <20200917122920.GR29778@kitsune.suse.cz>
On 9/17/20 5:59 PM, Michal Suchánek wrote:
> Hello,
>
> On Wed, Sep 16, 2020 at 10:52:25PM +0530, Ganesh Goudar wrote:
>> This patch series fixes mce handling for pseries, provides debugfs
>> interface for mce injection and adds selftest to test mce handling
>> on pseries/powernv machines running in hash mmu mode.
>> debugfs interface and sleftest are added only for slb multihit
>> injection, We can add other tests in future if possible.
>>
>> Ganesh Goudar (3):
>> powerpc/mce: remove nmi_enter/exit from real mode handler
>> powerpc/mce: Add debugfs interface to inject MCE
>> selftest/powerpc: Add slb multihit selftest
> Is the below logic sound? It does not agree with what is added here:
>
> void machine_check_exception(struct pt_regs *regs)
> {
> int recover = 0;
>
> /*
> * BOOK3S_64 does not call this handler as a non-maskable interrupt
> * (it uses its own early real-mode handler to handle the MCE proper
> * and then raises irq_work to call this handler when interrupts are
> * enabled).
> *
> * This is silly. The BOOK3S_64 should just call a different function
> * rather than expecting semantics to magically change. Something
> * like 'non_nmi_machine_check_exception()', perhaps?
> */
> const bool nmi = !IS_ENABLED(CONFIG_PPC_BOOK3S_64);
>
> if (nmi) nmi_enter();
>
> Thanks
>
> Michal
Looks like Nick Piggin has the Justification for it, Ill leave Nick to
answer this.
^ permalink raw reply
* Re: [PATCH -next] tty: hvc: fix link error with CONFIG_SERIAL_CORE_CONSOLE=n
From: Greg KH @ 2020-09-18 11:17 UTC (permalink / raw)
To: Yang Yingliang; +Cc: linuxppc-dev, linux-kernel
In-Reply-To: <20200918092030.3855438-1-yangyingliang@huawei.com>
On Fri, Sep 18, 2020 at 05:20:30PM +0800, Yang Yingliang wrote:
> Fix the link error by selecting SERIAL_CORE_CONSOLE.
>
> aarch64-linux-gnu-ld: drivers/tty/hvc/hvc_dcc.o: in function `dcc_early_write':
> hvc_dcc.c:(.text+0x164): undefined reference to `uart_console_write'
>
> Reported-by: Hulk Robot <hulkci@huawei.com>
> Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
> ---
> drivers/tty/hvc/Kconfig | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/drivers/tty/hvc/Kconfig b/drivers/tty/hvc/Kconfig
> index d1b27b0522a3..8d60e0ff67b4 100644
> --- a/drivers/tty/hvc/Kconfig
> +++ b/drivers/tty/hvc/Kconfig
> @@ -81,6 +81,7 @@ config HVC_DCC
> bool "ARM JTAG DCC console"
> depends on ARM || ARM64
> select HVC_DRIVER
> + select SERIAL_CORE_CONSOLE
> help
> This console uses the JTAG DCC on ARM to create a console under the HVC
> driver. This console is used through a JTAG only on ARM. If you don't have
> --
> 2.25.1
>
Same question here, what caused this problem to happen?
thanks,
greg k-h
^ permalink raw reply
* Re: [PATCH v6 0/8] powerpc/watchpoint: Bug fixes plus new feature flag
From: Michael Ellerman @ 2020-09-18 10:50 UTC (permalink / raw)
To: Rogerio Alves, Ravi Bangoria, christophe.leroy
Cc: mikey, jniethe5, pedromfc, linux-kernel, paulus, rogealve,
naveen.n.rao, linuxppc-dev
In-Reply-To: <6927523d-de63-910a-e789-5fab424c7eb9@linux.ibm.com>
Rogerio Alves <rcardoso@linux.ibm.com> writes:
> On 9/2/20 1:29 AM, Ravi Bangoria wrote:
>> Patch #1 fixes issue for quardword instruction on p10 predecessors.
>> Patch #2 fixes issue for vector instructions.
>> Patch #3 fixes a bug about watchpoint not firing when created with
>> ptrace PPC_PTRACE_SETHWDEBUG and CONFIG_HAVE_HW_BREAKPOINT=N.
>> The fix uses HW_BRK_TYPE_PRIV_ALL for ptrace user which, I
>> guess, should be fine because we don't leak any kernel
>> addresses and PRIV_ALL will also help to cover scenarios when
>> kernel accesses user memory.
>> Patch #4,#5 fixes infinite exception bug, again the bug happens only
>> with CONFIG_HAVE_HW_BREAKPOINT=N.
>> Patch #6 fixes two places where we are missing to set hw_len.
>> Patch #7 introduce new feature bit PPC_DEBUG_FEATURE_DATA_BP_ARCH_31
>> which will be set when running on ISA 3.1 compliant machine.
>> Patch #8 finally adds selftest to test scenarios fixed by patch#2,#3
>> and also moves MODE_EXACT tests outside of BP_RANGE condition.
>>
>> Christophe, let me know if this series breaks something for 8xx.
>>
>> v5: https://lore.kernel.org/r/20200825043617.1073634-1-ravi.bangoria@linux.ibm.com
>>
>> v5->v6:
>> - Fix build faulure reported by kernel test robot
>> - patch #5. Use more compact if condition, suggested by Christophe
>>
>>
>> Ravi Bangoria (8):
>> powerpc/watchpoint: Fix quarword instruction handling on p10
>> predecessors
>> powerpc/watchpoint: Fix handling of vector instructions
>> powerpc/watchpoint/ptrace: Fix SETHWDEBUG when
>> CONFIG_HAVE_HW_BREAKPOINT=N
>> powerpc/watchpoint: Move DAWR detection logic outside of
>> hw_breakpoint.c
>> powerpc/watchpoint: Fix exception handling for
>> CONFIG_HAVE_HW_BREAKPOINT=N
>> powerpc/watchpoint: Add hw_len wherever missing
>> powerpc/watchpoint/ptrace: Introduce PPC_DEBUG_FEATURE_DATA_BP_ARCH_31
>> powerpc/watchpoint/selftests: Tests for kernel accessing user memory
>>
>> Documentation/powerpc/ptrace.rst | 1 +
>> arch/powerpc/include/asm/hw_breakpoint.h | 12 ++
>> arch/powerpc/include/uapi/asm/ptrace.h | 1 +
>> arch/powerpc/kernel/Makefile | 3 +-
>> arch/powerpc/kernel/hw_breakpoint.c | 149 +---------------
>> .../kernel/hw_breakpoint_constraints.c | 162 ++++++++++++++++++
>> arch/powerpc/kernel/process.c | 48 ++++++
>> arch/powerpc/kernel/ptrace/ptrace-noadv.c | 9 +-
>> arch/powerpc/xmon/xmon.c | 1 +
>> .../selftests/powerpc/ptrace/ptrace-hwbreak.c | 48 +++++-
>> 10 files changed, 282 insertions(+), 152 deletions(-)
>> create mode 100644 arch/powerpc/kernel/hw_breakpoint_constraints.c
>>
>
> Tested this patch set for:
> - SETHWDEBUG when CONFIG_HAVE_HW_BREAKPOINT=N = OK
> - Fix exception handling for CONFIG_HAVE_HW_BREAKPOINT=N = OK
> - Check for PPC_DEBUG_FEATURE_DATA_BP_ARCH_31 = OK
> - Fix quarword instruction handling on p10 predecessors = OK
> - Fix handling of vector instructions = OK
>
> Also tested for:
> - Set second watchpoint (P10 Mambo) = OK
> - Infinity loop on sc instruction = OK
Thanks.
I wasn't able to pick up your Tested-by tags as I'd already applied the
patches, but thanks for sending them anyway, they will live on in the
mailing list archives for eternity.
cheers
^ permalink raw reply
* [PATCH] ASoC: fsl_sai: Instantiate snd_soc_dai_driver
From: Shengjiu Wang @ 2020-09-18 10:26 UTC (permalink / raw)
To: timur, nicoleotsuka, Xiubo.Lee, festevam, broonie, perex, tiwai,
alsa-devel, lgirdwood
Cc: linuxppc-dev, linux-kernel
Instantiate snd_soc_dai_driver for independent symmetric control.
Otherwise the symmetric setting may be overwritten by other
instance.
Fixes: 08fdf65e37d5 ("ASoC: fsl_sai: Add asynchronous mode support")
Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
---
sound/soc/fsl/fsl_sai.c | 19 +++++++++++--------
sound/soc/fsl/fsl_sai.h | 1 +
2 files changed, 12 insertions(+), 8 deletions(-)
diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c
index 5117c1cd5682..3e5c1eaccd5e 100644
--- a/sound/soc/fsl/fsl_sai.c
+++ b/sound/soc/fsl/fsl_sai.c
@@ -725,7 +725,7 @@ static int fsl_sai_dai_probe(struct snd_soc_dai *cpu_dai)
return 0;
}
-static struct snd_soc_dai_driver fsl_sai_dai = {
+static struct snd_soc_dai_driver fsl_sai_dai_template = {
.probe = fsl_sai_dai_probe,
.playback = {
.stream_name = "CPU-Playback",
@@ -1062,12 +1062,15 @@ static int fsl_sai_probe(struct platform_device *pdev)
return ret;
}
+ memcpy(&sai->cpu_dai_drv, &fsl_sai_dai_template,
+ sizeof(fsl_sai_dai_template));
+
/* Sync Tx with Rx as default by following old DT binding */
sai->synchronous[RX] = true;
sai->synchronous[TX] = false;
- fsl_sai_dai.symmetric_rates = 1;
- fsl_sai_dai.symmetric_channels = 1;
- fsl_sai_dai.symmetric_samplebits = 1;
+ sai->cpu_dai_drv.symmetric_rates = 1;
+ sai->cpu_dai_drv.symmetric_channels = 1;
+ sai->cpu_dai_drv.symmetric_samplebits = 1;
if (of_find_property(np, "fsl,sai-synchronous-rx", NULL) &&
of_find_property(np, "fsl,sai-asynchronous", NULL)) {
@@ -1084,9 +1087,9 @@ static int fsl_sai_probe(struct platform_device *pdev)
/* Discard all settings for asynchronous mode */
sai->synchronous[RX] = false;
sai->synchronous[TX] = false;
- fsl_sai_dai.symmetric_rates = 0;
- fsl_sai_dai.symmetric_channels = 0;
- fsl_sai_dai.symmetric_samplebits = 0;
+ sai->cpu_dai_drv.symmetric_rates = 0;
+ sai->cpu_dai_drv.symmetric_channels = 0;
+ sai->cpu_dai_drv.symmetric_samplebits = 0;
}
if (of_find_property(np, "fsl,sai-mclk-direction-output", NULL) &&
@@ -1128,7 +1131,7 @@ static int fsl_sai_probe(struct platform_device *pdev)
regcache_cache_only(sai->regmap, true);
ret = devm_snd_soc_register_component(&pdev->dev, &fsl_component,
- &fsl_sai_dai, 1);
+ &sai->cpu_dai_drv, 1);
if (ret)
goto err_pm_disable;
diff --git a/sound/soc/fsl/fsl_sai.h b/sound/soc/fsl/fsl_sai.h
index ba7425a9e217..4bbcd0dbe8f1 100644
--- a/sound/soc/fsl/fsl_sai.h
+++ b/sound/soc/fsl/fsl_sai.h
@@ -267,6 +267,7 @@ struct fsl_sai {
unsigned int bclk_ratio;
const struct fsl_sai_soc_data *soc_data;
+ struct snd_soc_dai_driver cpu_dai_drv;
struct snd_dmaengine_dai_dma_data dma_params_rx;
struct snd_dmaengine_dai_dma_data dma_params_tx;
struct fsl_sai_verid verid;
--
2.27.0
^ permalink raw reply related
* [PATCH] ocxl: fix kconfig dependency warning for OCXL
From: Necip Fazil Yildiran @ 2020-09-18 9:41 UTC (permalink / raw)
To: fbarrat
Cc: gregkh, Necip Fazil Yildiran, linux-kernel, jeho, paul,
linuxppc-dev
When OCXL is enabled and HOTPLUG_PCI is disabled, it results in the
following Kbuild warning:
WARNING: unmet direct dependencies detected for HOTPLUG_PCI_POWERNV
Depends on [n]: PCI [=y] && HOTPLUG_PCI [=n] && PPC_POWERNV [=y] && EEH [=y]
Selected by [y]:
- OCXL [=y] && PPC_POWERNV [=y] && PCI [=y] && EEH [=y]
The reason is that OCXL selects HOTPLUG_PCI_POWERNV without depending on
or selecting HOTPLUG_PCI while HOTPLUG_PCI_POWERNV is subordinate to
HOTPLUG_PCI.
HOTPLUG_PCI_POWERNV is a visible symbol with a set of dependencies.
Selecting it will lead to overlooking its other dependencies as well.
Let OCXL depend on HOTPLUG_PCI_POWERNV instead to avoid Kbuild issues.
Fixes: 49ce94b8677c ("ocxl: Add PCI hotplug dependency to Kconfig")
Signed-off-by: Necip Fazil Yildiran <fazilyildiran@gmail.com>
---
drivers/misc/ocxl/Kconfig | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/misc/ocxl/Kconfig b/drivers/misc/ocxl/Kconfig
index 6551007a066c..947294f6d7f4 100644
--- a/drivers/misc/ocxl/Kconfig
+++ b/drivers/misc/ocxl/Kconfig
@@ -9,9 +9,8 @@ config OCXL_BASE
config OCXL
tristate "OpenCAPI coherent accelerator support"
- depends on PPC_POWERNV && PCI && EEH
+ depends on PPC_POWERNV && PCI && EEH && HOTPLUG_PCI_POWERNV
select OCXL_BASE
- select HOTPLUG_PCI_POWERNV
default m
help
Select this option to enable the ocxl driver for Open
--
2.25.1
^ permalink raw reply related
* [PATCH v2 9/9] powerpc/eeh: Clean up PE addressing
From: Oliver O'Halloran @ 2020-09-18 9:30 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Oliver O'Halloran
In-Reply-To: <20200918093050.37344-1-oohall@gmail.com>
When support for EEH on PowerNV was added a lot of pseries specific code
was made "generic" and some of the quirks of pseries EEH came along for the
ride. One of the stranger quirks is eeh_pe containing two types of PE
address: pe->addr and pe->config_addr. There reason for this appears to be
historical baggage rather than any real requirements.
On pseries EEH PEs are manipulated using RTAS calls. Each EEH RTAS call
takes a "PE configuration address" as an input which is used to identify
which EEH PE is being manipulated by the call. When initialising the EEH
state for a device the first thing we need to do is determine the
configuration address for the PE which contains the device so we can enable
EEH on that PE. This process is outlined in PAPR which is the modern
(i.e post-2003) FW specification for pseries. However, EEH support was
first described in the pSeries RISC Platform Architecture (RPA) and
although they are mostly compatible EEH is one of the areas where they are
not.
The major difference is that RPA doesn't actually have the concept of a PE.
On RPA systems the EEH RTAS calls are done on a per-device basis using the
same config_addr that would be passed to the RTAS functions to access PCI
config space (e.g. ibm,read-pci-config). The config_addr is not identical
since the function and config register offsets of the config_addr must be
set to zero. EEH operations being done on a per-device basis doesn't make a
whole lot of sense when you consider how EEH was implemented on legacy PCI
systems.
For legacy PCI(-X) systems EEH was implemented using special PCI-PCI
bridges which contained logic to detect errors and freeze the secondary
bus when one occurred. This means that the EEH enabled state is shared
among all devices behind that EEH bridge. As a result there's no way to
implement the per-device control required for the semantics specified by
RPA. It can be made to work if we assume that a separate EEH bridge exists
for each EEH capable PCI slot and there are no bridges behind those slots.
However, RPA also specifies the ibm,configure-bridge RTAS call for
re-initalising bridges behind EEH capable slots after they are reset due
to an EEH event so that is probably not a valid assumption. This
incoherence was fixed in later PAPR, which succeeded RPA. Unfortunately,
since Linux EEH support seems to have been implemented based on the RPA
spec some of the legacy assumptions were carried over (probably for POWER4
compatibility).
The fix made in PAPR was the introduction of the "PE" concept and
redefining the EEH RTAS calls (set-eeh-option, reset-slot, etc) to operate
on a per-PE basis so all devices behind an EEH bride would share the same
EEH state. The "config_addr" argument to the EEH RTAS calls became the
"PE_config_addr" and the OS was required to use the
ibm,get-config-addr-info RTAS call to find the correct PE address for the
device. When support for the new interfaces was added to Linux it was
implemented using something like:
At probe time:
pdn->eeh_config_addr = rtas_config_addr(pdn);
pdn->eeh_pe_config_addr = rtas_get_config_addr_info(pdn);
When performing an RTAS call:
config_addr = pdn->eeh_config_addr;
if (pdn->eeh_pe_config_addr)
config_addr = pdn->eeh_pe_config_addr;
rtas_call(..., config_addr, ...);
In other words, if the ibm,get-config-addr-info RTAS call is implemented
and returned a valid result we'd use that as the argument to the EEH
RTAS calls. If not, Linux would fall back to using the device's
config_addr. Over time these addresses have moved around going from pci_dn
to eeh_dev and finally into eeh_pe. Today the users look like this:
config_addr = pe->config_addr;
if (pe->addr)
config_addr = pe->addr;
rtas_call(..., config_addr, ...);
However, considering the EEH core always operates on a per-PE basis and
even on pseries the only per-device operation is the initial call to
ibm,set-eeh-option I'm not sure if any of this actually works on an RPA
system today. It doesn't make much sense to have the fallback address in
a generic structure either since the bulk of the code which reference it
is in pseries anyway.
The EEH core makes a token effort to support looking up a PE using the
config_addr by having two arguments to eeh_pe_get(). However, a survey of
all the callers to eeh_pe_get() shows that all bar one have the config_addr
argument hard-coded to zero.The only caller that doesn't is in
eeh_pe_tree_insert() which has:
if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr)
return -EINVAL;
pe = eeh_pe_get(hose, edev->pe_config_addr, edev->bdfn);
The third argument (config_addr) is only used if the second (pe->addr)
argument is invalid. The preceding check ensures that the call to
eeh_pe_get() will never happen if edev->pe_config_addr is invalid so there
is no situation where eeh_pe_get() will search for a PE based on the 3rd
argument. The check also means that we'll never insert a PE into the tree
where pe_config_addr is zero since EEH_VALID_PE_ZERO is never set on
pseries. All the users of the fallback address on pseries never actually
use the fallback and all the only caller that supplies something for the
config_addr argument to eeh_pe_get() never use it either. It's all dead
code.
This patch removes the fallback address from eeh_pe since nothing uses it.
Specificly, we do this by:
1) Removing pe->config_addr
2) Removing the EEH_VALID_PE_ZERO flag
3) Removing the fallback address argument to eeh_pe_get().
4) Removing all the checks for pe->addr being zero in the pseries EEH code.
This leaves us with PE's only being identified by what's in their pe->addr
field and the EEH core relying on the platform to ensure that eeh_dev's are
only inserted into the EEH tree if they're actually inside a PE.
No functional changes, I hope.
Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
v2: re-wrote commit message
---
arch/powerpc/include/asm/eeh.h | 4 +-
arch/powerpc/kernel/eeh.c | 2 +-
arch/powerpc/kernel/eeh_pe.c | 46 +++-----------------
arch/powerpc/platforms/powernv/eeh-powernv.c | 16 ++-----
arch/powerpc/platforms/pseries/eeh_pseries.c | 42 +++---------------
5 files changed, 17 insertions(+), 93 deletions(-)
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 85030c05e67e..dd6a4ac6c713 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -27,7 +27,6 @@ struct pci_dn;
#define EEH_FORCE_DISABLED 0x02 /* EEH disabled */
#define EEH_PROBE_MODE_DEV 0x04 /* From PCI device */
#define EEH_PROBE_MODE_DEVTREE 0x08 /* From device tree */
-#define EEH_VALID_PE_ZERO 0x10 /* PE#0 is valid */
#define EEH_ENABLE_IO_FOR_LOG 0x20 /* Enable IO for log */
#define EEH_EARLY_DUMP_LOG 0x40 /* Dump log immediately */
@@ -280,8 +279,7 @@ int eeh_phb_pe_create(struct pci_controller *phb);
int eeh_wait_state(struct eeh_pe *pe, int max_wait);
struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb);
struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root);
-struct eeh_pe *eeh_pe_get(struct pci_controller *phb,
- int pe_no, int config_addr);
+struct eeh_pe *eeh_pe_get(struct pci_controller *phb, int pe_no);
int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent);
int eeh_pe_tree_remove(struct eeh_dev *edev);
void eeh_pe_update_time_stamp(struct eeh_pe *pe);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index c9e25cfce8f0..87de8b798b2d 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1657,7 +1657,7 @@ static ssize_t eeh_force_recover_write(struct file *filp,
return -ENODEV;
/* Retrieve PE */
- pe = eeh_pe_get(hose, pe_no, 0);
+ pe = eeh_pe_get(hose, pe_no);
if (!pe)
return -ENODEV;
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index d2aaaa73fdd5..61b7d4019051 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -251,43 +251,21 @@ void eeh_pe_dev_traverse(struct eeh_pe *root,
/**
* __eeh_pe_get - Check the PE address
- * @data: EEH PE
- * @flag: EEH device
*
* For one particular PE, it can be identified by PE address
* or tranditional BDF address. BDF address is composed of
* Bus/Device/Function number. The extra data referred by flag
* indicates which type of address should be used.
*/
-struct eeh_pe_get_flag {
- int pe_no;
- int config_addr;
-};
-
static void *__eeh_pe_get(struct eeh_pe *pe, void *flag)
{
- struct eeh_pe_get_flag *tmp = (struct eeh_pe_get_flag *) flag;
+ int *target_pe = flag;
- /* Unexpected PHB PE */
+ /* PHB PEs are special and should be ignored */
if (pe->type & EEH_PE_PHB)
return NULL;
- /*
- * We prefer PE address. For most cases, we should
- * have non-zero PE address
- */
- if (eeh_has_flag(EEH_VALID_PE_ZERO)) {
- if (tmp->pe_no == pe->addr)
- return pe;
- } else {
- if (tmp->pe_no &&
- (tmp->pe_no == pe->addr))
- return pe;
- }
-
- /* Try BDF address */
- if (tmp->config_addr &&
- (tmp->config_addr == pe->config_addr))
+ if (*target_pe == pe->addr)
return pe;
return NULL;
@@ -297,7 +275,6 @@ static void *__eeh_pe_get(struct eeh_pe *pe, void *flag)
* eeh_pe_get - Search PE based on the given address
* @phb: PCI controller
* @pe_no: PE number
- * @config_addr: Config address
*
* Search the corresponding PE based on the specified address which
* is included in the eeh device. The function is used to check if
@@ -306,16 +283,11 @@ static void *__eeh_pe_get(struct eeh_pe *pe, void *flag)
* which is composed of PCI bus/device/function number, or unified
* PE address.
*/
-struct eeh_pe *eeh_pe_get(struct pci_controller *phb,
- int pe_no, int config_addr)
+struct eeh_pe *eeh_pe_get(struct pci_controller *phb, int pe_no)
{
struct eeh_pe *root = eeh_phb_pe_get(phb);
- struct eeh_pe_get_flag tmp = { pe_no, config_addr };
- struct eeh_pe *pe;
-
- pe = eeh_pe_traverse(root, __eeh_pe_get, &tmp);
- return pe;
+ return eeh_pe_traverse(root, __eeh_pe_get, &pe_no);
}
/**
@@ -336,19 +308,13 @@ int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent)
struct pci_controller *hose = edev->controller;
struct eeh_pe *pe, *parent;
- /* Check if the PE number is valid */
- if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) {
- eeh_edev_err(edev, "PE#0 is invalid for this PHB!\n");
- return -EINVAL;
- }
-
/*
* Search the PE has been existing or not according
* to the PE address. If that has been existing, the
* PE should be composed of PCI bus and its subordinate
* components.
*/
- pe = eeh_pe_get(hose, edev->pe_config_addr, edev->bdfn);
+ pe = eeh_pe_get(hose, edev->pe_config_addr);
if (pe) {
if (pe->type & EEH_PE_INVALID) {
list_add_tail(&edev->entry, &pe->edevs);
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 6d95e774bded..e0f6ad2d2986 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -87,7 +87,7 @@ static ssize_t pnv_eeh_ei_write(struct file *filp,
return -EINVAL;
/* Retrieve PE */
- pe = eeh_pe_get(hose, pe_no, 0);
+ pe = eeh_pe_get(hose, pe_no);
if (!pe)
return -ENODEV;
@@ -306,7 +306,7 @@ static struct eeh_pe *pnv_eeh_get_upstream_pe(struct pci_dev *pdev)
if (parent) {
struct pnv_ioda_pe *ioda_pe = pnv_ioda_get_pe(parent);
- return eeh_pe_get(phb->hose, ioda_pe->pe_number, 0);
+ return eeh_pe_get(phb->hose, ioda_pe->pe_number);
}
return NULL;
@@ -1358,7 +1358,7 @@ static int pnv_eeh_get_pe(struct pci_controller *hose,
}
/* Find the PE according to PE# */
- dev_pe = eeh_pe_get(hose, pe_no, 0);
+ dev_pe = eeh_pe_get(hose, pe_no);
if (!dev_pe)
return -EEXIST;
@@ -1675,16 +1675,6 @@ static int __init eeh_powernv_init(void)
if (phb->diag_data_size > max_diag_size)
max_diag_size = phb->diag_data_size;
- /*
- * PE#0 should be regarded as valid by EEH core
- * if it's not the reserved one. Currently, we
- * have the reserved PE#255 and PE#127 for PHB3
- * and P7IOC separately. So we should regard
- * PE#0 as valid for PHB3 and P7IOC.
- */
- if (phb->ioda.reserved_pe_idx != 0)
- eeh_add_flag(EEH_VALID_PE_ZERO);
-
break;
}
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index e42c026392aa..2ebfe9d8bf8c 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -511,7 +511,6 @@ EXPORT_SYMBOL_GPL(pseries_eeh_init_edev_recursive);
static int pseries_eeh_set_option(struct eeh_pe *pe, int option)
{
int ret = 0;
- int config_addr;
/*
* When we're enabling or disabling EEH functioality on
@@ -524,9 +523,6 @@ static int pseries_eeh_set_option(struct eeh_pe *pe, int option)
case EEH_OPT_ENABLE:
case EEH_OPT_THAW_MMIO:
case EEH_OPT_THAW_DMA:
- config_addr = pe->config_addr;
- if (pe->addr)
- config_addr = pe->addr;
break;
case EEH_OPT_FREEZE_PE:
/* Not support */
@@ -538,7 +534,7 @@ static int pseries_eeh_set_option(struct eeh_pe *pe, int option)
}
ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
- config_addr, BUID_HI(pe->phb->buid),
+ pe->addr, BUID_HI(pe->phb->buid),
BUID_LO(pe->phb->buid), option);
return ret;
@@ -559,25 +555,19 @@ static int pseries_eeh_set_option(struct eeh_pe *pe, int option)
*/
static int pseries_eeh_get_state(struct eeh_pe *pe, int *delay)
{
- int config_addr;
int ret;
int rets[4];
int result;
- /* Figure out PE config address if possible */
- config_addr = pe->config_addr;
- if (pe->addr)
- config_addr = pe->addr;
-
if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
ret = rtas_call(ibm_read_slot_reset_state2, 3, 4, rets,
- config_addr, BUID_HI(pe->phb->buid),
+ pe->addr, BUID_HI(pe->phb->buid),
BUID_LO(pe->phb->buid));
} else if (ibm_read_slot_reset_state != RTAS_UNKNOWN_SERVICE) {
/* Fake PE unavailable info */
rets[2] = 0;
ret = rtas_call(ibm_read_slot_reset_state, 3, 3, rets,
- config_addr, BUID_HI(pe->phb->buid),
+ pe->addr, BUID_HI(pe->phb->buid),
BUID_LO(pe->phb->buid));
} else {
return EEH_STATE_NOT_SUPPORT;
@@ -631,14 +621,7 @@ static int pseries_eeh_get_state(struct eeh_pe *pe, int *delay)
*/
static int pseries_eeh_reset(struct eeh_pe *pe, int option)
{
- int config_addr;
-
- /* Figure out PE address */
- config_addr = pe->config_addr;
- if (pe->addr)
- config_addr = pe->addr;
-
- return pseries_eeh_phb_reset(pe->phb, config_addr, option);
+ return pseries_eeh_phb_reset(pe->phb, pe->addr, option);
}
/**
@@ -654,19 +637,13 @@ static int pseries_eeh_reset(struct eeh_pe *pe, int option)
*/
static int pseries_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len)
{
- int config_addr;
unsigned long flags;
int ret;
spin_lock_irqsave(&slot_errbuf_lock, flags);
memset(slot_errbuf, 0, eeh_error_buf_size);
- /* Figure out the PE address */
- config_addr = pe->config_addr;
- if (pe->addr)
- config_addr = pe->addr;
-
- ret = rtas_call(ibm_slot_error_detail, 8, 1, NULL, config_addr,
+ ret = rtas_call(ibm_slot_error_detail, 8, 1, NULL, pe->addr,
BUID_HI(pe->phb->buid), BUID_LO(pe->phb->buid),
virt_to_phys(drv_log), len,
virt_to_phys(slot_errbuf), eeh_error_buf_size,
@@ -685,14 +662,7 @@ static int pseries_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, u
*/
static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
{
- int config_addr;
-
- /* Figure out the PE address */
- config_addr = pe->config_addr;
- if (pe->addr)
- config_addr = pe->addr;
-
- return pseries_eeh_phb_configure_bridge(pe->phb, config_addr);
+ return pseries_eeh_phb_configure_bridge(pe->phb, pe->addr);
}
/**
--
2.26.2
^ permalink raw reply related
* [PATCH v2 8/9] powerpc/pseries/eeh: Allow zero to be a valid PE configuration address
From: Oliver O'Halloran @ 2020-09-18 9:30 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Oliver O'Halloran
In-Reply-To: <20200918093050.37344-1-oohall@gmail.com>
There's no real reason why zero can't be a valid PE configuration address.
Under qemu each sPAPR PHB (i.e. EEH supporting) has the passed-though
devices on bus zero, so the PE address of bus <dddd>:00 should be zero.
However, all previous versions of Linux will reject that, so Qemu at least
goes out of it's way to avoid it. The Qemu implementation of
ibm,get-config-addr-info2 RTAS has the following comment:
> /*
> * We always have PE address of form "00BB0001". "BB"
> * represents the bus number of PE's primary bus.
> */
So qemu puts a one into the register portion of the PE's config_addr to
avoid it being zero. The whole is pretty silly considering that RTAS will
return a negative error code if it can't map the device's config_addr to a
PE.
This patch fixes Linux to treat zero as a valid PE address. This shouldn't
have any real effects due to the Qemu hack mentioned above. And the fact
that Linux EEH has worked historically on PowerVM means they never pass
through devices on bus zero so we would never see the problem there either.
Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
arch/powerpc/platforms/pseries/eeh_pseries.c | 38 +++++++++++---------
1 file changed, 22 insertions(+), 16 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index c2ecc0db2f94..e42c026392aa 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -87,21 +87,20 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev)
* pseries_eeh_get_pe_config_addr - Find the pe_config_addr for a device
* @pdn: pci_dn of the input device
*
- * Retrieve the assocated config address. Actually, there're 2 RTAS
- * function calls dedicated for the purpose. We need implement
- * it through the new function and then the old one. Besides,
- * you should make sure the config address is figured out from
- * FDT node before calling the function.
+ * The EEH RTAS calls use a tuple consisting of: (buid_hi, buid_lo,
+ * pe_config_addr) as a handle to a given PE. This function finds the
+ * pe_config_addr based on the device's config addr.
*
- * It's notable that zero'ed return value means invalid PE config
- * address.
+ * Keep in mind that the pe_config_addr *might* be numerically identical to the
+ * device's config addr, but the two are conceptually distinct.
+ *
+ * Returns the pe_config_addr, or a negative error code.
*/
static int pseries_eeh_get_pe_config_addr(struct pci_dn *pdn)
{
int config_addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
struct pci_controller *phb = pdn->phb;
- int ret = 0;
- int rets[3];
+ int ret, rets[3];
if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
/*
@@ -112,16 +111,16 @@ static int pseries_eeh_get_pe_config_addr(struct pci_dn *pdn)
config_addr, BUID_HI(phb->buid),
BUID_LO(phb->buid), 1);
if (ret || (rets[0] == 0))
- return 0;
+ return -ENOENT;
- /* Retrieve the associated PE config address */
+ /* Retrieve the associated PE config address with function 0 */
ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
config_addr, BUID_HI(phb->buid),
BUID_LO(phb->buid), 0);
if (ret) {
pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
__func__, phb->global_number, config_addr);
- return 0;
+ return -ENXIO;
}
return rets[0];
@@ -134,13 +133,20 @@ static int pseries_eeh_get_pe_config_addr(struct pci_dn *pdn)
if (ret) {
pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
__func__, phb->global_number, config_addr);
- return 0;
+ return -ENXIO;
}
return rets[0];
}
- return ret;
+ /*
+ * PAPR does describe a process for finding the pe_config_addr that was
+ * used before the ibm,get-config-addr-info calls were added. However,
+ * I haven't found *any* systems that don't have that RTAS call
+ * implemented. If you happen to find one that needs the old DT based
+ * process, patches are welcome!
+ */
+ return -ENOENT;
}
/**
@@ -419,7 +425,7 @@ void pseries_eeh_init_edev(struct pci_dn *pdn)
/* first up, find the pe_config_addr for the PE containing the device */
addr = pseries_eeh_get_pe_config_addr(pdn);
- if (addr == 0) {
+ if (addr < 0) {
eeh_edev_dbg(edev, "Unable to find pe_config_addr\n");
goto err;
}
@@ -901,7 +907,7 @@ static int __init eeh_pseries_init(void)
config_addr = pseries_eeh_get_pe_config_addr(pdn);
/* invalid PE config addr */
- if (config_addr == 0)
+ if (config_addr < 0)
continue;
pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_FUNDAMENTAL);
--
2.26.2
^ permalink raw reply related
* [PATCH v2 7/9] powerpc/pseries/eeh: Rework device EEH PE determination
From: Oliver O'Halloran @ 2020-09-18 9:30 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Oliver O'Halloran
In-Reply-To: <20200918093050.37344-1-oohall@gmail.com>
The process Linux uses for determining if a device supports EEH or not
appears to be at odds with what PAPR says the OS should be doing. The
current flow is something like:
1. Assume pe_config_addr is equal the the device's config_addr.
2. Attempt to enable EEH on that PE
3. Verify EEH was enabled (POWER4 bug workaround)
4. Try find the pe_config_addr using the ibm,get-config-addr-info2 RTAS
call.
5. If that fails walk the pci_dn tree upwards trying to find a parent
device with EEH support. If we find one then add the device to that PE.
The first major problem with this process is that we need the PE config
address in step 2) since its needs to be passed to the ibm,set-eeh-option
RTAS call when enabling EEH for th PE. We hack around this requirement in
by making the assumption in 1) and delay finding the actual PE address
until 4). This is fine if:
a) The PCI device is the 0th function, and
b) The device is on the PE's root bus.
Granted, the current sequence does appear to work on most systems even when
these conditions are false. At a guess PowerVM's RTAS has workarounds to
accommodate Linux's quirks or the RTAS call to enable EEH is treated as
no-op on most platforms since EEH is usually enabled by default. However,
what is currently implemented is a bit sketch and is downright confusing
since it doesn't match up with what what PAPR suggests we should be doing.
This patch re-works how we handle EEH init so that we find the PE config
address using the ibm,get-config-addr-info2 RTAS call first, then use the
found address to finish the EEH init process. It also drops the Power4
workaround since as of commit 471d7ff8b51b ("powerpc/64s: Remove POWER4
support") the kernel does not support running on a Power4 CPU so there's
no need to support the Power4 platform's quirks either. With the patch
applied the sequence is now:
1. Find the pe_config_addr from the device using the RTAS call.
2. Enable the PE.
3. Insert the edev into the tree and create an eeh_pe if needed.
The other change made here is ignoring unsupported devices entirely.
Currently the device's BARs are saved to the eeh_dev even if the device is
not part of an EEH PE. Not being part of a PE means that an EEH recovery
pass will never see that device so the saving the BARs is pointless.
Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
arch/powerpc/platforms/pseries/eeh_pseries.c | 57 ++++++++------------
1 file changed, 22 insertions(+), 35 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 10303de3d8d5..c2ecc0db2f94 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -357,10 +357,10 @@ static struct eeh_pe *pseries_eeh_pe_get_parent(struct eeh_dev *edev)
*/
void pseries_eeh_init_edev(struct pci_dn *pdn)
{
+ struct eeh_pe pe, *parent;
struct eeh_dev *edev;
- struct eeh_pe pe;
+ int addr;
u32 pcie_flags;
- int enable = 0;
int ret;
if (WARN_ON_ONCE(!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)))
@@ -417,51 +417,38 @@ void pseries_eeh_init_edev(struct pci_dn *pdn)
}
}
- /* Initialize the fake PE */
+ /* first up, find the pe_config_addr for the PE containing the device */
+ addr = pseries_eeh_get_pe_config_addr(pdn);
+ if (addr == 0) {
+ eeh_edev_dbg(edev, "Unable to find pe_config_addr\n");
+ goto err;
+ }
+
+ /* Try enable EEH on the fake PE */
memset(&pe, 0, sizeof(struct eeh_pe));
pe.phb = pdn->phb;
- pe.config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
+ pe.addr = addr;
- /* Enable EEH on the device */
eeh_edev_dbg(edev, "Enabling EEH on device\n");
ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE);
if (ret) {
eeh_edev_dbg(edev, "EEH failed to enable on device (code %d)\n", ret);
- } else {
- struct eeh_pe *parent;
+ goto err;
+ }
- /* Retrieve PE address */
- edev->pe_config_addr = pseries_eeh_get_pe_config_addr(pdn);
- pe.addr = edev->pe_config_addr;
+ edev->pe_config_addr = addr;
- /* Some older systems (Power4) allow the ibm,set-eeh-option
- * call to succeed even on nodes where EEH is not supported.
- * Verify support explicitly.
- */
- ret = eeh_ops->get_state(&pe, NULL);
- if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT)
- enable = 1;
+ eeh_add_flag(EEH_ENABLED);
- /*
- * This device doesn't support EEH, but it may have an
- * EEH parent. In this case any error on the device will
- * freeze the PE of it's upstream bridge, so added it to
- * the upstream PE.
- */
- parent = pseries_eeh_pe_get_parent(edev);
- if (parent && !enable)
- edev->pe_config_addr = parent->addr;
+ parent = pseries_eeh_pe_get_parent(edev);
+ eeh_pe_tree_insert(edev, parent);
+ eeh_save_bars(edev);
+ eeh_edev_dbg(edev, "EEH enabled for device");
- if (enable || parent) {
- eeh_add_flag(EEH_ENABLED);
- eeh_pe_tree_insert(edev, parent);
- }
- eeh_edev_dbg(edev, "EEH is %s on device (code %d)\n",
- (enable ? "enabled" : "unsupported"), ret);
- }
+ return;
- /* Save memory bars */
- eeh_save_bars(edev);
+err:
+ eeh_edev_dbg(edev, "EEH is unsupported on device (code = %d)\n", ret);
}
static struct eeh_dev *pseries_eeh_probe(struct pci_dev *pdev)
--
2.26.2
^ permalink raw reply related
* [PATCH v2 6/9] powerpc/pseries/eeh: Clean up pe_config_addr lookups
From: Oliver O'Halloran @ 2020-09-18 9:30 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Oliver O'Halloran
In-Reply-To: <20200918093050.37344-1-oohall@gmail.com>
De-duplicate, and fix up the comments, and make the prototype just take a
pci_dn since the job of the function is to return the pe_config_addr of the
PE which contains a given device.
Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
arch/powerpc/platforms/pseries/eeh_pseries.c | 80 +++-----------------
1 file changed, 11 insertions(+), 69 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index b1561961c7ff..10303de3d8d5 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -33,8 +33,6 @@
#include <asm/ppc-pci.h>
#include <asm/rtas.h>
-static int pseries_eeh_get_pe_addr(struct pci_dn *pdn);
-
/* RTAS tokens */
static int ibm_set_eeh_option;
static int ibm_set_slot_reset;
@@ -86,7 +84,8 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev)
/**
- * pseries_eeh_get_config_addr - Retrieve config address
+ * pseries_eeh_get_pe_config_addr - Find the pe_config_addr for a device
+ * @pdn: pci_dn of the input device
*
* Retrieve the assocated config address. Actually, there're 2 RTAS
* function calls dedicated for the purpose. We need implement
@@ -97,16 +96,17 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev)
* It's notable that zero'ed return value means invalid PE config
* address.
*/
-static int pseries_eeh_get_config_addr(struct pci_controller *phb, int config_addr)
+static int pseries_eeh_get_pe_config_addr(struct pci_dn *pdn)
{
+ int config_addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
+ struct pci_controller *phb = pdn->phb;
int ret = 0;
int rets[3];
if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
/*
- * First of all, we need to make sure there has one PE
- * associated with the device. Otherwise, PE address is
- * meaningless.
+ * First of all, use function 1 to determine if this device is
+ * part of a PE or not. ret[0] being zero indicates it's not.
*/
ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
config_addr, BUID_HI(phb->buid),
@@ -431,7 +431,7 @@ void pseries_eeh_init_edev(struct pci_dn *pdn)
struct eeh_pe *parent;
/* Retrieve PE address */
- edev->pe_config_addr = pseries_eeh_get_pe_addr(pdn);
+ edev->pe_config_addr = pseries_eeh_get_pe_config_addr(pdn);
pe.addr = edev->pe_config_addr;
/* Some older systems (Power4) allow the ibm,set-eeh-option
@@ -551,64 +551,6 @@ static int pseries_eeh_set_option(struct eeh_pe *pe, int option)
return ret;
}
-/**
- * pseries_eeh_get_pe_addr - Retrieve PE address
- * @pe: EEH PE
- *
- * Retrieve the assocated PE address. Actually, there're 2 RTAS
- * function calls dedicated for the purpose. We need implement
- * it through the new function and then the old one. Besides,
- * you should make sure the config address is figured out from
- * FDT node before calling the function.
- *
- * It's notable that zero'ed return value means invalid PE config
- * address.
- */
-static int pseries_eeh_get_pe_addr(struct pci_dn *pdn)
-{
- int config_addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
- unsigned long buid = pdn->phb->buid;
- int ret = 0;
- int rets[3];
-
- if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
- /*
- * First of all, we need to make sure there has one PE
- * associated with the device. Otherwise, PE address is
- * meaningless.
- */
- ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
- config_addr, BUID_HI(buid), BUID_LO(buid), 1);
- if (ret || (rets[0] == 0))
- return 0;
-
- /* Retrieve the associated PE config address */
- ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
- config_addr, BUID_HI(buid), BUID_LO(buid), 0);
- if (ret) {
- pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
- __func__, pdn->phb->global_number, config_addr);
- return 0;
- }
-
- return rets[0];
- }
-
- if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
- ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
- config_addr, BUID_HI(buid), BUID_LO(buid), 0);
- if (ret) {
- pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
- __func__, pdn->phb->global_number, config_addr);
- return 0;
- }
-
- return rets[0];
- }
-
- return ret;
-}
-
/**
* pseries_eeh_get_state - Retrieve PE state
* @pe: EEH PE
@@ -911,7 +853,7 @@ static int __init eeh_pseries_init(void)
{
struct pci_controller *phb;
struct pci_dn *pdn;
- int ret, addr, config_addr;
+ int ret, config_addr;
/* figure out EEH RTAS function call tokens */
ibm_set_eeh_option = rtas_token("ibm,set-eeh-option");
@@ -969,8 +911,8 @@ static int __init eeh_pseries_init(void)
pr_info("Issue PHB reset ...\n");
list_for_each_entry(phb, &hose_list, list_node) {
pdn = list_first_entry(&PCI_DN(phb->dn)->child_list, struct pci_dn, list);
- addr = (pdn->busno << 16) | (pdn->devfn << 8);
- config_addr = pseries_eeh_get_config_addr(phb, addr);
+ config_addr = pseries_eeh_get_pe_config_addr(pdn);
+
/* invalid PE config addr */
if (config_addr == 0)
continue;
--
2.26.2
^ permalink raw reply related
* [PATCH v2 5/9] powerpc/eeh: Move EEH initialisation to an arch initcall
From: Oliver O'Halloran @ 2020-09-18 9:30 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Oliver O'Halloran
In-Reply-To: <20200918093050.37344-1-oohall@gmail.com>
The initialisation of EEH mostly happens in a core_initcall_sync initcall,
followed by registering a bus notifier later on in an arch_initcall.
Anything involving initcall dependecies is mostly incomprehensible unless
you've spent a while staring at code so here's the full sequence:
ppc_md.setup_arch <-- pci_controllers are created here
...time passes...
core_initcall <-- pci_dns are created from DT nodes
core_initcall_sync <-- platforms call eeh_init()
postcore_initcall <-- PCI bus type is registered
postcore_initcall_sync
arch_initcall <-- EEH pci_bus notifier registered
subsys_initcall <-- PHBs are scanned here
There's no real requirement to do the EEH setup at the core_initcall_sync
level. It just needs to be done after pci_dn's are created and before we
start scanning PHBs. Simplify the flow a bit by moving the platform EEH
inititalisation to an arch_initcall so we can fold the bus notifier
registration into eeh_init().
Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
arch/powerpc/kernel/eeh.c | 64 ++++++++++----------
arch/powerpc/platforms/powernv/eeh-powernv.c | 2 +-
arch/powerpc/platforms/pseries/eeh_pseries.c | 2 +-
3 files changed, 34 insertions(+), 34 deletions(-)
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 98faf139e676..c9e25cfce8f0 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -940,6 +940,30 @@ static struct notifier_block eeh_reboot_nb = {
.notifier_call = eeh_reboot_notifier,
};
+static int eeh_device_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct device *dev = data;
+
+ switch (action) {
+ /*
+ * Note: It's not possible to perform EEH device addition (i.e.
+ * {pseries,pnv}_pcibios_bus_add_device()) here because it depends on
+ * the device's resources, which have not yet been set up.
+ */
+ case BUS_NOTIFY_DEL_DEVICE:
+ eeh_remove_device(to_pci_dev(dev));
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block eeh_device_nb = {
+ .notifier_call = eeh_device_notifier,
+};
+
/**
* eeh_init - System wide EEH initialization
*
@@ -960,7 +984,14 @@ int eeh_init(struct eeh_ops *ops)
/* Register reboot notifier */
ret = register_reboot_notifier(&eeh_reboot_nb);
if (ret) {
- pr_warn("%s: Failed to register notifier (%d)\n",
+ pr_warn("%s: Failed to register reboot notifier (%d)\n",
+ __func__, ret);
+ return ret;
+ }
+
+ ret = bus_register_notifier(&pci_bus_type, &eeh_device_nb);
+ if (ret) {
+ pr_warn("%s: Failed to register bus notifier (%d)\n",
__func__, ret);
return ret;
}
@@ -975,37 +1006,6 @@ int eeh_init(struct eeh_ops *ops)
return eeh_event_init();
}
-static int eeh_device_notifier(struct notifier_block *nb,
- unsigned long action, void *data)
-{
- struct device *dev = data;
-
- switch (action) {
- /*
- * Note: It's not possible to perform EEH device addition (i.e.
- * {pseries,pnv}_pcibios_bus_add_device()) here because it depends on
- * the device's resources, which have not yet been set up.
- */
- case BUS_NOTIFY_DEL_DEVICE:
- eeh_remove_device(to_pci_dev(dev));
- break;
- default:
- break;
- }
- return NOTIFY_DONE;
-}
-
-static struct notifier_block eeh_device_nb = {
- .notifier_call = eeh_device_notifier,
-};
-
-static __init int eeh_set_bus_notifier(void)
-{
- bus_register_notifier(&pci_bus_type, &eeh_device_nb);
- return 0;
-}
-arch_initcall(eeh_set_bus_notifier);
-
/**
* eeh_probe_device() - Perform EEH initialization for the indicated pci device
* @dev: pci device for which to set up EEH
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 8b0fb6c8b8d9..6d95e774bded 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -1703,4 +1703,4 @@ static int __init eeh_powernv_init(void)
return ret;
}
-machine_core_initcall_sync(powernv, eeh_powernv_init);
+machine_arch_initcall(powernv, eeh_powernv_init);
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index fd3444428632..b1561961c7ff 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -989,4 +989,4 @@ static int __init eeh_pseries_init(void)
ret);
return ret;
}
-machine_core_initcall_sync(pseries, eeh_pseries_init);
+machine_arch_initcall(pseries, eeh_pseries_init);
--
2.26.2
^ permalink raw reply related
* [PATCH v2 4/9] powerpc/eeh: Delete eeh_ops->init
From: Oliver O'Halloran @ 2020-09-18 9:30 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Oliver O'Halloran
In-Reply-To: <20200918093050.37344-1-oohall@gmail.com>
No longer used since the platforms perform their EEH initialisation before
calling eeh_init().
Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
arch/powerpc/include/asm/eeh.h | 1 -
arch/powerpc/kernel/eeh.c | 8 --------
2 files changed, 9 deletions(-)
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 765bcf63edea..85030c05e67e 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -216,7 +216,6 @@ enum {
struct eeh_ops {
char *name;
- int (*init)(void);
struct eeh_dev *(*probe)(struct pci_dev *pdev);
int (*set_option)(struct eeh_pe *pe, int option);
int (*get_state)(struct eeh_pe *pe, int *delay);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 28a0ea5d9faa..98faf139e676 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -965,14 +965,6 @@ int eeh_init(struct eeh_ops *ops)
return ret;
}
- if (eeh_ops->init)
- ret = eeh_ops->init();
- if (ret) {
- pr_warn("%s: platform EEH init failed (%d)\n",
- __func__, ret);
- return ret;
- }
-
/* Initialize PHB PEs */
list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
eeh_phb_pe_create(hose);
--
2.26.2
^ permalink raw reply related
* [PATCH v2 3/9] powerpc/pseries: Stop using eeh_ops->init()
From: Oliver O'Halloran @ 2020-09-18 9:30 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Oliver O'Halloran
In-Reply-To: <20200918093050.37344-1-oohall@gmail.com>
Fold pseries_eeh_init() into eeh_pseries_init() rather than having
eeh_init() call it via eeh_ops->init(). It's simpler and it'll let us
delete eeh_ops.init.
Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
arch/powerpc/platforms/pseries/eeh_pseries.c | 155 +++++++++----------
1 file changed, 71 insertions(+), 84 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 3cc569e8b6d4..fd3444428632 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -239,88 +239,6 @@ static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
static DEFINE_SPINLOCK(slot_errbuf_lock);
static int eeh_error_buf_size;
-/**
- * pseries_eeh_init - EEH platform dependent initialization
- *
- * EEH platform dependent initialization on pseries.
- */
-static int pseries_eeh_init(void)
-{
- struct pci_controller *phb;
- struct pci_dn *pdn;
- int addr, config_addr;
-
- /* figure out EEH RTAS function call tokens */
- ibm_set_eeh_option = rtas_token("ibm,set-eeh-option");
- ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
- ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");
- ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
- ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
- ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2");
- ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info");
- ibm_configure_pe = rtas_token("ibm,configure-pe");
-
- /*
- * ibm,configure-pe and ibm,configure-bridge have the same semantics,
- * however ibm,configure-pe can be faster. If we can't find
- * ibm,configure-pe then fall back to using ibm,configure-bridge.
- */
- if (ibm_configure_pe == RTAS_UNKNOWN_SERVICE)
- ibm_configure_pe = rtas_token("ibm,configure-bridge");
-
- /*
- * Necessary sanity check. We needn't check "get-config-addr-info"
- * and its variant since the old firmware probably support address
- * of domain/bus/slot/function for EEH RTAS operations.
- */
- if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE ||
- ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE ||
- (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE &&
- ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE) ||
- ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE ||
- ibm_configure_pe == RTAS_UNKNOWN_SERVICE) {
- pr_info("EEH functionality not supported\n");
- return -EINVAL;
- }
-
- /* Initialize error log lock and size */
- spin_lock_init(&slot_errbuf_lock);
- eeh_error_buf_size = rtas_token("rtas-error-log-max");
- if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
- pr_info("%s: unknown EEH error log size\n",
- __func__);
- eeh_error_buf_size = 1024;
- } else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
- pr_info("%s: EEH error log size %d exceeds the maximal %d\n",
- __func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
- eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
- }
-
- /* Set EEH probe mode */
- eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG);
-
- /* Set EEH machine dependent code */
- ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device;
-
- if (is_kdump_kernel() || reset_devices) {
- pr_info("Issue PHB reset ...\n");
- list_for_each_entry(phb, &hose_list, list_node) {
- pdn = list_first_entry(&PCI_DN(phb->dn)->child_list, struct pci_dn, list);
- addr = (pdn->busno << 16) | (pdn->devfn << 8);
- config_addr = pseries_eeh_get_config_addr(phb, addr);
- /* invalid PE config addr */
- if (config_addr == 0)
- continue;
-
- pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_FUNDAMENTAL);
- pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_DEACTIVATE);
- pseries_eeh_phb_configure_bridge(phb, config_addr);
- }
- }
-
- return 0;
-}
-
static int pseries_eeh_cap_start(struct pci_dn *pdn)
{
u32 status;
@@ -967,7 +885,6 @@ static int pseries_notify_resume(struct eeh_dev *edev)
static struct eeh_ops pseries_eeh_ops = {
.name = "pseries",
- .init = pseries_eeh_init,
.probe = pseries_eeh_probe,
.set_option = pseries_eeh_set_option,
.get_state = pseries_eeh_get_state,
@@ -992,7 +909,77 @@ static struct eeh_ops pseries_eeh_ops = {
*/
static int __init eeh_pseries_init(void)
{
- int ret;
+ struct pci_controller *phb;
+ struct pci_dn *pdn;
+ int ret, addr, config_addr;
+
+ /* figure out EEH RTAS function call tokens */
+ ibm_set_eeh_option = rtas_token("ibm,set-eeh-option");
+ ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
+ ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");
+ ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
+ ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
+ ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2");
+ ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info");
+ ibm_configure_pe = rtas_token("ibm,configure-pe");
+
+ /*
+ * ibm,configure-pe and ibm,configure-bridge have the same semantics,
+ * however ibm,configure-pe can be faster. If we can't find
+ * ibm,configure-pe then fall back to using ibm,configure-bridge.
+ */
+ if (ibm_configure_pe == RTAS_UNKNOWN_SERVICE)
+ ibm_configure_pe = rtas_token("ibm,configure-bridge");
+
+ /*
+ * Necessary sanity check. We needn't check "get-config-addr-info"
+ * and its variant since the old firmware probably support address
+ * of domain/bus/slot/function for EEH RTAS operations.
+ */
+ if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE ||
+ ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE ||
+ (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE &&
+ ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE) ||
+ ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE ||
+ ibm_configure_pe == RTAS_UNKNOWN_SERVICE) {
+ pr_info("EEH functionality not supported\n");
+ return -EINVAL;
+ }
+
+ /* Initialize error log lock and size */
+ spin_lock_init(&slot_errbuf_lock);
+ eeh_error_buf_size = rtas_token("rtas-error-log-max");
+ if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
+ pr_info("%s: unknown EEH error log size\n",
+ __func__);
+ eeh_error_buf_size = 1024;
+ } else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
+ pr_info("%s: EEH error log size %d exceeds the maximal %d\n",
+ __func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
+ eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
+ }
+
+ /* Set EEH probe mode */
+ eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG);
+
+ /* Set EEH machine dependent code */
+ ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device;
+
+ if (is_kdump_kernel() || reset_devices) {
+ pr_info("Issue PHB reset ...\n");
+ list_for_each_entry(phb, &hose_list, list_node) {
+ pdn = list_first_entry(&PCI_DN(phb->dn)->child_list, struct pci_dn, list);
+ addr = (pdn->busno << 16) | (pdn->devfn << 8);
+ config_addr = pseries_eeh_get_config_addr(phb, addr);
+ /* invalid PE config addr */
+ if (config_addr == 0)
+ continue;
+
+ pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_FUNDAMENTAL);
+ pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_DEACTIVATE);
+ pseries_eeh_phb_configure_bridge(phb, config_addr);
+ }
+ }
ret = eeh_init(&pseries_eeh_ops);
if (!ret)
--
2.26.2
^ permalink raw reply related
* [PATCH v2 2/9] powerpc/powernv: Stop using eeh_ops->init()
From: Oliver O'Halloran @ 2020-09-18 9:30 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Oliver O'Halloran
In-Reply-To: <20200918093050.37344-1-oohall@gmail.com>
Fold pnv_eeh_init() into eeh_powernv_init() rather than having eeh_init()
call it via eeh_ops->init(). It's simpler and it'll let us delete
eeh_ops.init.
Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
arch/powerpc/platforms/powernv/eeh-powernv.c | 94 ++++++++++----------
1 file changed, 45 insertions(+), 49 deletions(-)
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 3eb0f2439da8..8b0fb6c8b8d9 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -44,54 +44,6 @@ void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
eeh_probe_device(pdev);
}
-static int pnv_eeh_init(void)
-{
- struct pci_controller *hose;
- struct pnv_phb *phb;
- int max_diag_size = PNV_PCI_DIAG_BUF_SIZE;
-
- if (!firmware_has_feature(FW_FEATURE_OPAL)) {
- pr_warn("%s: OPAL is required !\n",
- __func__);
- return -EINVAL;
- }
-
- /* Set probe mode */
- eeh_add_flag(EEH_PROBE_MODE_DEV);
-
- /*
- * P7IOC blocks PCI config access to frozen PE, but PHB3
- * doesn't do that. So we have to selectively enable I/O
- * prior to collecting error log.
- */
- list_for_each_entry(hose, &hose_list, list_node) {
- phb = hose->private_data;
-
- if (phb->model == PNV_PHB_MODEL_P7IOC)
- eeh_add_flag(EEH_ENABLE_IO_FOR_LOG);
-
- if (phb->diag_data_size > max_diag_size)
- max_diag_size = phb->diag_data_size;
-
- /*
- * PE#0 should be regarded as valid by EEH core
- * if it's not the reserved one. Currently, we
- * have the reserved PE#255 and PE#127 for PHB3
- * and P7IOC separately. So we should regard
- * PE#0 as valid for PHB3 and P7IOC.
- */
- if (phb->ioda.reserved_pe_idx != 0)
- eeh_add_flag(EEH_VALID_PE_ZERO);
-
- break;
- }
-
- eeh_set_pe_aux_size(max_diag_size);
- ppc_md.pcibios_bus_add_device = pnv_pcibios_bus_add_device;
-
- return 0;
-}
-
static irqreturn_t pnv_eeh_event(int irq, void *data)
{
/*
@@ -1674,7 +1626,6 @@ static int pnv_eeh_restore_config(struct eeh_dev *edev)
static struct eeh_ops pnv_eeh_ops = {
.name = "powernv",
- .init = pnv_eeh_init,
.probe = pnv_eeh_probe,
.set_option = pnv_eeh_set_option,
.get_state = pnv_eeh_get_state,
@@ -1697,8 +1648,53 @@ static struct eeh_ops pnv_eeh_ops = {
*/
static int __init eeh_powernv_init(void)
{
+ int max_diag_size = PNV_PCI_DIAG_BUF_SIZE;
+ struct pci_controller *hose;
+ struct pnv_phb *phb;
int ret = -EINVAL;
+ if (!firmware_has_feature(FW_FEATURE_OPAL)) {
+ pr_warn("%s: OPAL is required !\n", __func__);
+ return -EINVAL;
+ }
+
+ /* Set probe mode */
+ eeh_add_flag(EEH_PROBE_MODE_DEV);
+
+ /*
+ * P7IOC blocks PCI config access to frozen PE, but PHB3
+ * doesn't do that. So we have to selectively enable I/O
+ * prior to collecting error log.
+ */
+ list_for_each_entry(hose, &hose_list, list_node) {
+ phb = hose->private_data;
+
+ if (phb->model == PNV_PHB_MODEL_P7IOC)
+ eeh_add_flag(EEH_ENABLE_IO_FOR_LOG);
+
+ if (phb->diag_data_size > max_diag_size)
+ max_diag_size = phb->diag_data_size;
+
+ /*
+ * PE#0 should be regarded as valid by EEH core
+ * if it's not the reserved one. Currently, we
+ * have the reserved PE#255 and PE#127 for PHB3
+ * and P7IOC separately. So we should regard
+ * PE#0 as valid for PHB3 and P7IOC.
+ */
+ if (phb->ioda.reserved_pe_idx != 0)
+ eeh_add_flag(EEH_VALID_PE_ZERO);
+
+ break;
+ }
+
+ /*
+ * eeh_init() allocates the eeh_pe and its aux data buf so the
+ * size needs to be set before calling eeh_init().
+ */
+ eeh_set_pe_aux_size(max_diag_size);
+ ppc_md.pcibios_bus_add_device = pnv_pcibios_bus_add_device;
+
ret = eeh_init(&pnv_eeh_ops);
if (!ret)
pr_info("EEH: PowerNV platform initialized\n");
--
2.26.2
^ permalink raw reply related
* [PATCH v2 1/9] powerpc/eeh: Rework EEH initialisation
From: Oliver O'Halloran @ 2020-09-18 9:30 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Oliver O'Halloran
Drop the EEH register / unregister ops thing and have the platform pass the
ops structure into eeh_init() directly. This takes one initcall out of the
EEH setup path and it means we're only doing EEH setup on the platforms
which actually support it. It's also less code and generally easier to
follow.
No functional changes.
Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
arch/powerpc/include/asm/eeh.h | 3 +-
arch/powerpc/kernel/eeh.c | 87 ++++----------------
arch/powerpc/platforms/powernv/eeh-powernv.c | 4 +-
arch/powerpc/platforms/pseries/eeh_pseries.c | 5 +-
4 files changed, 21 insertions(+), 78 deletions(-)
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index d5f369bcd130..765bcf63edea 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -295,8 +295,7 @@ const char *eeh_pe_loc_get(struct eeh_pe *pe);
struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
void eeh_show_enabled(void);
-int __init eeh_ops_register(struct eeh_ops *ops);
-int __exit eeh_ops_unregister(const char *name);
+int __init eeh_init(struct eeh_ops *ops);
int eeh_check_failure(const volatile void __iomem *token);
int eeh_dev_check_failure(struct eeh_dev *edev);
void eeh_addr_cache_init(void);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 94682382fc8c..28a0ea5d9faa 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -929,56 +929,6 @@ void eeh_save_bars(struct eeh_dev *edev)
edev->config_space[1] |= PCI_COMMAND_MASTER;
}
-/**
- * eeh_ops_register - Register platform dependent EEH operations
- * @ops: platform dependent EEH operations
- *
- * Register the platform dependent EEH operation callback
- * functions. The platform should call this function before
- * any other EEH operations.
- */
-int __init eeh_ops_register(struct eeh_ops *ops)
-{
- if (!ops->name) {
- pr_warn("%s: Invalid EEH ops name for %p\n",
- __func__, ops);
- return -EINVAL;
- }
-
- if (eeh_ops && eeh_ops != ops) {
- pr_warn("%s: EEH ops of platform %s already existing (%s)\n",
- __func__, eeh_ops->name, ops->name);
- return -EEXIST;
- }
-
- eeh_ops = ops;
-
- return 0;
-}
-
-/**
- * eeh_ops_unregister - Unreigster platform dependent EEH operations
- * @name: name of EEH platform operations
- *
- * Unregister the platform dependent EEH operation callback
- * functions.
- */
-int __exit eeh_ops_unregister(const char *name)
-{
- if (!name || !strlen(name)) {
- pr_warn("%s: Invalid EEH ops name\n",
- __func__);
- return -EINVAL;
- }
-
- if (eeh_ops && !strcmp(eeh_ops->name, name)) {
- eeh_ops = NULL;
- return 0;
- }
-
- return -EEXIST;
-}
-
static int eeh_reboot_notifier(struct notifier_block *nb,
unsigned long action, void *unused)
{
@@ -991,25 +941,22 @@ static struct notifier_block eeh_reboot_nb = {
};
/**
- * eeh_init - EEH initialization
- *
- * Initialize EEH by trying to enable it for all of the adapters in the system.
- * As a side effect we can determine here if eeh is supported at all.
- * Note that we leave EEH on so failed config cycles won't cause a machine
- * check. If a user turns off EEH for a particular adapter they are really
- * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't
- * grant access to a slot if EEH isn't enabled, and so we always enable
- * EEH for all slots/all devices.
+ * eeh_init - System wide EEH initialization
*
- * The eeh-force-off option disables EEH checking globally, for all slots.
- * Even if force-off is set, the EEH hardware is still enabled, so that
- * newer systems can boot.
+ * It's the platform's job to call this from an arch_initcall().
*/
-static int eeh_init(void)
+int eeh_init(struct eeh_ops *ops)
{
struct pci_controller *hose, *tmp;
int ret = 0;
+ /* the platform should only initialise EEH once */
+ if (WARN_ON(eeh_ops))
+ return -EEXIST;
+ if (WARN_ON(!ops))
+ return -ENOENT;
+ eeh_ops = ops;
+
/* Register reboot notifier */
ret = register_reboot_notifier(&eeh_reboot_nb);
if (ret) {
@@ -1018,13 +965,13 @@ static int eeh_init(void)
return ret;
}
- /* call platform initialization function */
- if (!eeh_ops) {
- pr_warn("%s: Platform EEH operation not found\n",
- __func__);
- return -EEXIST;
- } else if ((ret = eeh_ops->init()))
+ if (eeh_ops->init)
+ ret = eeh_ops->init();
+ if (ret) {
+ pr_warn("%s: platform EEH init failed (%d)\n",
+ __func__, ret);
return ret;
+ }
/* Initialize PHB PEs */
list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
@@ -1036,8 +983,6 @@ static int eeh_init(void)
return eeh_event_init();
}
-core_initcall_sync(eeh_init);
-
static int eeh_device_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 0cabe4e632e3..3eb0f2439da8 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -1699,7 +1699,7 @@ static int __init eeh_powernv_init(void)
{
int ret = -EINVAL;
- ret = eeh_ops_register(&pnv_eeh_ops);
+ ret = eeh_init(&pnv_eeh_ops);
if (!ret)
pr_info("EEH: PowerNV platform initialized\n");
else
@@ -1707,4 +1707,4 @@ static int __init eeh_powernv_init(void)
return ret;
}
-machine_early_initcall(powernv, eeh_powernv_init);
+machine_core_initcall_sync(powernv, eeh_powernv_init);
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index cb2d9a970b7b..3cc569e8b6d4 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -994,13 +994,12 @@ static int __init eeh_pseries_init(void)
{
int ret;
- ret = eeh_ops_register(&pseries_eeh_ops);
+ ret = eeh_init(&pseries_eeh_ops);
if (!ret)
pr_info("EEH: pSeries platform initialized\n");
else
pr_info("EEH: pSeries platform initialization failure (%d)\n",
ret);
-
return ret;
}
-machine_early_initcall(pseries, eeh_pseries_init);
+machine_core_initcall_sync(pseries, eeh_pseries_init);
--
2.26.2
^ permalink raw reply related
* [PATCH -next] tty: hvc: fix link error with CONFIG_SERIAL_CORE_CONSOLE=n
From: Yang Yingliang @ 2020-09-18 9:20 UTC (permalink / raw)
To: linuxppc-dev, linux-kernel; +Cc: gregkh
Fix the link error by selecting SERIAL_CORE_CONSOLE.
aarch64-linux-gnu-ld: drivers/tty/hvc/hvc_dcc.o: in function `dcc_early_write':
hvc_dcc.c:(.text+0x164): undefined reference to `uart_console_write'
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
---
drivers/tty/hvc/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/tty/hvc/Kconfig b/drivers/tty/hvc/Kconfig
index d1b27b0522a3..8d60e0ff67b4 100644
--- a/drivers/tty/hvc/Kconfig
+++ b/drivers/tty/hvc/Kconfig
@@ -81,6 +81,7 @@ config HVC_DCC
bool "ARM JTAG DCC console"
depends on ARM || ARM64
select HVC_DRIVER
+ select SERIAL_CORE_CONSOLE
help
This console uses the JTAG DCC on ARM to create a console under the HVC
driver. This console is used through a JTAG only on ARM. If you don't have
--
2.25.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox