LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH vdsotest] Add support for clock_gettime64() on powerpc32
From: Christophe Leroy @ 2020-05-09 16:18 UTC (permalink / raw)
  To: nathanl, arnd; +Cc: linuxppc-dev

libc test is commented out because at the time being
very few libc if any supports clock_gettime64()

syscall number is hardcoded when it doesn't exists in unistd.h

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
Based on master from https://github.com/nathanlynch/vdsotest.git

 src/clock-boottime.c           |   4 +
 src/clock-monotonic-coarse.c   |   4 +
 src/clock-monotonic-raw.c      |   4 +
 src/clock-monotonic.c          |   4 +
 src/clock-realtime-coarse.c    |   4 +
 src/clock-realtime.c           |   4 +
 src/clock-tai.c                |   4 +
 src/clock_gettime64_template.c | 401 +++++++++++++++++++++++++++++++++
 8 files changed, 429 insertions(+)
 create mode 100644 src/clock_gettime64_template.c

diff --git a/src/clock-boottime.c b/src/clock-boottime.c
index 9fb1ac48501d..07ef31d08614 100644
--- a/src/clock-boottime.c
+++ b/src/clock-boottime.c
@@ -3,3 +3,7 @@
 
 #include "clock_gettime_template.c"
 #include "clock_getres_template.c"
+
+#if defined(__powerpc__) && !defined(__powerpc64__)
+#include "clock_gettime64_template.c"
+#endif
diff --git a/src/clock-monotonic-coarse.c b/src/clock-monotonic-coarse.c
index ca1df58691ca..da064188756f 100644
--- a/src/clock-monotonic-coarse.c
+++ b/src/clock-monotonic-coarse.c
@@ -3,3 +3,7 @@
 
 #include "clock_gettime_template.c"
 #include "clock_getres_template.c"
+
+#if defined(__powerpc__) && !defined(__powerpc64__)
+#include "clock_gettime64_template.c"
+#endif
diff --git a/src/clock-monotonic-raw.c b/src/clock-monotonic-raw.c
index 5dbb1842e698..55373b94ecfd 100644
--- a/src/clock-monotonic-raw.c
+++ b/src/clock-monotonic-raw.c
@@ -3,3 +3,7 @@
 
 #include "clock_gettime_template.c"
 #include "clock_getres_template.c"
+
+#if defined(__powerpc__) && !defined(__powerpc64__)
+#include "clock_gettime64_template.c"
+#endif
diff --git a/src/clock-monotonic.c b/src/clock-monotonic.c
index 44318ae1e1c2..a900d24598a1 100644
--- a/src/clock-monotonic.c
+++ b/src/clock-monotonic.c
@@ -3,3 +3,7 @@
 
 #include "clock_gettime_template.c"
 #include "clock_getres_template.c"
+
+#if defined(__powerpc__) && !defined(__powerpc64__)
+#include "clock_gettime64_template.c"
+#endif
diff --git a/src/clock-realtime-coarse.c b/src/clock-realtime-coarse.c
index 8f33f9a2d30b..8f2e6242bf0d 100644
--- a/src/clock-realtime-coarse.c
+++ b/src/clock-realtime-coarse.c
@@ -3,3 +3,7 @@
 
 #include "clock_gettime_template.c"
 #include "clock_getres_template.c"
+
+#if defined(__powerpc__) && !defined(__powerpc64__)
+#include "clock_gettime64_template.c"
+#endif
diff --git a/src/clock-realtime.c b/src/clock-realtime.c
index 079fd801e654..ab76329a6676 100644
--- a/src/clock-realtime.c
+++ b/src/clock-realtime.c
@@ -3,3 +3,7 @@
 
 #include "clock_gettime_template.c"
 #include "clock_getres_template.c"
+
+#if defined(__powerpc__) && !defined(__powerpc64__)
+#include "clock_gettime64_template.c"
+#endif
diff --git a/src/clock-tai.c b/src/clock-tai.c
index ad0448adeba5..cb2511039ddc 100644
--- a/src/clock-tai.c
+++ b/src/clock-tai.c
@@ -7,3 +7,7 @@
 
 #include "clock_gettime_template.c"
 #include "clock_getres_template.c"
+
+#if defined(__powerpc__) && !defined(__powerpc64__)
+#include "clock_gettime64_template.c"
+#endif
diff --git a/src/clock_gettime64_template.c b/src/clock_gettime64_template.c
new file mode 100644
index 000000000000..4752845148d6
--- /dev/null
+++ b/src/clock_gettime64_template.c
@@ -0,0 +1,401 @@
+/*
+ * Copyright 2014 Mentor Graphics Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <errno.h>
+#include <error.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include "compiler.h"
+#include "vdsotest.h"
+
+struct timespec64 {
+	long long tv_sec;
+	long tv_nsec;
+};
+
+#ifndef SYS_clock_gettime64
+#define SYS_clock_gettime64 403
+#endif
+
+static int (*clock_gettime64_vdso)(clockid_t id, struct timespec64 *ts);
+
+static bool vdso_has_clock_gettime64(void)
+{
+	return clock_gettime64_vdso != NULL;
+}
+
+static int clock_gettime64_syscall_wrapper(clockid_t id, struct timespec64 *ts)
+{
+	return syscall(SYS_clock_gettime64, id, ts);
+}
+
+static void clock_gettime64_syscall_nofail(clockid_t id, struct timespec64 *ts)
+{
+	int err;
+
+	err = clock_gettime64_syscall_wrapper(id, ts);
+	if (err)
+		error(EXIT_FAILURE, errno, "SYS_clock_gettime64");
+}
+
+static int clock_gettime64_vdso_wrapper(clockid_t id, struct timespec64 *ts)
+{
+	return DO_VDSO_CALL(clock_gettime64_vdso, int, 2, id, ts);
+}
+
+static void clock_gettime64_vdso_nofail(clockid_t id, struct timespec64 *ts)
+{
+	int err;
+
+	err = clock_gettime64_vdso_wrapper(id, ts);
+	if (err)
+		error(EXIT_FAILURE, errno, "clock_gettime");
+}
+
+static bool timespecs64_ordered(const struct timespec64 *first,
+				const struct timespec64 *second)
+{
+	if (first->tv_sec < second->tv_sec)
+		return true;
+
+	if (first->tv_sec == second->tv_sec)
+		return first->tv_nsec <= second->tv_nsec;
+
+	return false;
+}
+
+static bool timespec64_normalized(const struct timespec64 *ts)
+{
+	if (ts->tv_sec < 0)
+		return false;
+	if (ts->tv_nsec < 0)
+		return false;
+	if (ts->tv_nsec >= NSEC_PER_SEC)
+		return false;
+	return true;
+}
+
+static void clock_gettime64_verify(struct ctx *ctx)
+{
+	struct timespec64 now;
+
+	clock_gettime64_syscall_nofail(CLOCK_ID, &now);
+
+	ctx_start_timer(ctx);
+
+	while (!test_should_stop(ctx)) {
+		struct timespec64 prev;
+
+		if (!vdso_has_clock_gettime64())
+			goto skip_vdso;
+
+		prev = now;
+
+		clock_gettime64_vdso_nofail(CLOCK_ID, &now);
+
+		if (!timespec64_normalized(&now)) {
+			log_failure(ctx, "timestamp obtained from libc/vDSO "
+				    "not normalized:\n"
+				    "\t[%ld, %ld]\n",
+				    (long int)now.tv_sec, (long int)now.tv_nsec);
+		}
+
+		if (!timespecs64_ordered(&prev, &now)) {
+			log_failure(ctx, "timestamp obtained from libc/vDSO "
+				    "predates timestamp\n"
+				    "previously obtained from kernel:\n"
+				    "\t[%ld, %ld] (kernel)\n"
+				    "\t[%ld, %ld] (vDSO)\n",
+				    (long int)prev.tv_sec, (long int)prev.tv_nsec,
+				    (long int)now.tv_sec, (long int)now.tv_nsec);
+		}
+
+	skip_vdso:
+		prev = now;
+
+		clock_gettime64_syscall_nofail(CLOCK_ID, &now);
+
+		if (!timespec64_normalized(&now)) {
+			log_failure(ctx, "timestamp obtained from kernel "
+				    "not normalized:\n"
+				    "\t[%ld, %ld]\n",
+				    (long int)now.tv_sec, (long int)now.tv_nsec);
+		}
+
+		if (!timespecs64_ordered(&prev, &now)) {
+			log_failure(ctx, "timestamp obtained from kernel "
+				    "predates timestamp\n"
+				    "previously obtained from libc/vDSO:\n"
+				    "\t[%ld, %ld] (vDSO)\n"
+				    "\t[%ld, %ld] (kernel)\n",
+				    (long int)prev.tv_sec, (long int)prev.tv_nsec,
+				    (long int)now.tv_sec, (long int)now.tv_nsec);
+		}
+
+	}
+
+	ctx_cleanup_timer(ctx);
+}
+
+static void clock_gettime64_bench(struct ctx *ctx, struct bench_results *res)
+{
+	struct timespec64 ts;
+
+	if (vdso_has_clock_gettime64()) {
+		BENCH(ctx, clock_gettime64_vdso_wrapper(CLOCK_ID, &ts),
+		      &res->vdso_interval);
+	}
+
+/*	BENCH(ctx, clock_gettime64(CLOCK_ID, &ts),
+	      &res->libc_interval);
+*/
+	BENCH(ctx, clock_gettime64_syscall_wrapper(CLOCK_ID, &ts),
+	      &res->sys_interval);
+}
+
+static void sys_clock_gettime64_simple(void *arg, struct syscall_result *res)
+{
+	int err;
+
+	syscall_prepare();
+	err = clock_gettime64_syscall_wrapper(CLOCK_ID, arg);
+	record_syscall_result(res, err, errno);
+}
+
+static void sys_clock_gettime64_prot(void *arg, struct syscall_result *res)
+{
+	void *buf;
+	int err;
+
+	buf = alloc_page((int)(unsigned long)arg);
+	syscall_prepare();
+	err = clock_gettime64_syscall_wrapper(CLOCK_ID, buf);
+	record_syscall_result(res, err, errno);
+	free_page(buf);
+}
+
+static void vdso_clock_gettime64_simple(void *arg, struct syscall_result *res)
+{
+	int err;
+
+	syscall_prepare();
+	err = clock_gettime64_vdso_wrapper(CLOCK_ID, arg);
+	record_syscall_result(res, err, errno);
+}
+
+static void vdso_clock_gettime64_prot(void *arg, struct syscall_result *res)
+{
+	void *buf;
+	int err;
+
+	buf = alloc_page((int)(unsigned long)arg);
+	syscall_prepare();
+	err = clock_gettime64_vdso_wrapper(CLOCK_ID, buf);
+	record_syscall_result(res, err, errno);
+	free_page(buf);
+}
+
+static void clock_gettime64_bogus_id(void *arg, struct syscall_result *res)
+{
+	struct timespec64 ts;
+	int err;
+
+	syscall_prepare();
+	err = arg ? clock_gettime64_syscall_wrapper((clockid_t)-1, &ts) :
+		clock_gettime64_vdso_wrapper((clockid_t)-1, &ts);
+
+	record_syscall_result(res, err, errno);
+}
+
+static void clock_gettime64_bogus_id_null(void *arg, struct syscall_result *res)
+{
+	int err;
+
+	syscall_prepare();
+	err = arg ? clock_gettime64_syscall_wrapper((clockid_t)-1, NULL) :
+		clock_gettime64_vdso_wrapper((clockid_t)-1, NULL);
+
+	record_syscall_result(res, err, errno);
+}
+
+static const struct child_params sys_clock_gettime64_abi_params[] = {
+
+	/* Kernel sanity checks */
+
+	{
+		.desc = "passing NULL to clock_gettime (syscall)",
+		.func = sys_clock_gettime64_simple,
+		.arg = NULL,
+		.expected_ret = -1,
+		.expected_errno = EFAULT,
+	},
+	{
+		.desc = "passing UINTPTR_MAX to clock_gettime (syscall)",
+		.func = sys_clock_gettime64_simple,
+		.arg = (void *)ADDR_SPACE_END,
+		.expected_ret = -1,
+		.expected_errno = EFAULT,
+	},
+	{
+		.desc = "passing PROT_NONE page to clock_gettime (syscall)",
+		.func = sys_clock_gettime64_prot,
+		.arg = (void *)PROT_NONE,
+		.expected_ret = -1,
+		.expected_errno = EFAULT,
+	},
+	{
+		.desc = "passing PROT_READ page to clock_gettime (syscall)",
+		.func = sys_clock_gettime64_prot,
+		.arg = (void *)PROT_READ,
+		.expected_ret = -1,
+		.expected_errno = EFAULT,
+	},
+	{
+		/* This will be duplicated across the different clock
+		 * id modules.  Oh well.
+		 */
+		.desc = "passing bogus clock id to clock_gettime (syscall)",
+		.func = clock_gettime64_bogus_id,
+		.arg = (void *)true, /* force syscall */
+		.expected_ret = -1,
+		.expected_errno = EINVAL,
+	},
+	{
+		/* This one too. */
+		.desc = "passing bogus clock id and NULL to clock_gettime (syscall)",
+		.func = clock_gettime64_bogus_id_null,
+		.arg = (void *)true, /* force syscall */
+		.expected_ret = -1,
+		.expected_errno = EINVAL,
+	},
+};
+
+static const struct child_params vdso_clock_gettime64_abi_params[] = {
+	/* The below will be serviced by a vDSO, if present. */
+
+	{
+		.desc = "passing NULL to clock_gettime (VDSO)",
+		.func = vdso_clock_gettime64_simple,
+		.arg = NULL,
+		.expected_ret = -1,
+		.expected_errno = EFAULT,
+		.signal_set = {
+			.mask = SIGNO_TO_BIT(SIGSEGV),
+		},
+	},
+	{
+		.desc = "passing UINTPTR_MAX to clock_gettime (VDSO)",
+		.func = vdso_clock_gettime64_simple,
+		.arg = (void *)ADDR_SPACE_END,
+		.expected_ret = -1,
+		.expected_errno = EFAULT,
+		.signal_set = {
+			.mask = SIGNO_TO_BIT(SIGSEGV),
+		},
+	},
+	{
+		.desc = "passing PROT_NONE page to clock_gettime (VDSO)",
+		.func = vdso_clock_gettime64_prot,
+		.arg = (void *)PROT_NONE,
+		.expected_ret = -1,
+		.expected_errno = EFAULT,
+		.signal_set = {
+			.mask = SIGNO_TO_BIT(SIGSEGV),
+		},
+	},
+	{
+		.desc = "passing PROT_READ page to clock_gettime (VDSO)",
+		.func = vdso_clock_gettime64_prot,
+		.arg = (void *)PROT_READ,
+		.expected_ret = -1,
+		.expected_errno = EFAULT,
+		.signal_set = {
+			.mask = SIGNO_TO_BIT(SIGSEGV),
+		},
+	},
+	{
+		/* This will be duplicated across the different clock
+		 * id modules.  Oh well.
+		 */
+		.desc = "passing bogus clock id to clock_gettime (VDSO)",
+		.func = clock_gettime64_bogus_id,
+		.arg = (void *)false, /* use vdso */
+		.expected_ret = -1,
+		.expected_errno = EINVAL,
+	},
+	{
+		/* This one too. */
+		.desc = "passing bogus clock id and NULL to clock_gettime (VDSO)",
+		.func = clock_gettime64_bogus_id_null,
+		.arg = (void *)false, /* use vdso */
+		.expected_ret = -1,
+		.expected_errno = EINVAL,
+	},
+};
+
+static void clock_gettime64_abi(struct ctx *ctx)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(sys_clock_gettime64_abi_params); i++)
+		run_as_child(ctx, &sys_clock_gettime64_abi_params[i]);
+
+	if (vdso_has_clock_gettime64()) {
+		for (i = 0; i < ARRAY_SIZE(vdso_clock_gettime64_abi_params); i++)
+			run_as_child(ctx, &vdso_clock_gettime64_abi_params[i]);
+	}
+}
+
+static void clock_gettime64_notes(struct ctx *ctx)
+{
+	if (!vdso_has_clock_gettime64())
+		printf("Note: vDSO version of clock_gettime64 not found\n");
+}
+
+static const char *clock_gettime64_vdso_names[] = {
+	"__kernel_clock_gettime64",
+	"__vdso_clock_gettime64",
+	NULL,
+};
+
+static void clock_gettime64_bind(void *sym)
+{
+	clock_gettime64_vdso = sym;
+}
+
+static const struct test_suite clock_gettime64_ts = {
+	.name = "clock-gettime64-" TS_SFX,
+	.bench = clock_gettime64_bench,
+	.verify = clock_gettime64_verify,
+	.abi = clock_gettime64_abi,
+	.notes = clock_gettime64_notes,
+	.vdso_names = clock_gettime64_vdso_names,
+	.bind = clock_gettime64_bind,
+};
+
+static void __constructor clock_gettime64_init(void)
+{
+	register_testsuite(&clock_gettime64_ts);
+}

base-commit: 7e4796a0695bdff3daca22630761264f5dff4680
-- 
2.25.0


^ permalink raw reply related

* [PATCH fixes] powerpc/40x: Make more space for system call exception
From: Christophe Leroy @ 2020-05-09 17:05 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel

When CONFIG_VIRT_CPU_ACCOUNTING is selected, system call exception
handler doesn't fit below 0xd00 and build fails.

As exception 0xd00 doesn't exist and is never generated by 40x,
comment it out in order to get more space for system call exception.

Reported-by: kbuild test robot <lkp@intel.com>
Fixes: 9e27086292aa ("powerpc/32: Warn and return ENOSYS on syscalls from kernel")
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/kernel/head_40x.S | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 9bb663977e84..2cec543c38f0 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -344,8 +344,9 @@ _ENTRY(saved_ksp_limit)
 /* 0x0C00 - System Call Exception */
 	START_EXCEPTION(0x0C00,	SystemCall)
 	SYSCALL_ENTRY	0xc00
+/*	Trap_0D is commented out to get more space for system call exception */
 
-	EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_STD)
+/*	EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_STD) */
 	EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_STD)
 	EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_STD)
 
-- 
2.25.0


^ permalink raw reply related

* Re: [PATCH v8 8/8] powerpc/vdso: Provide __kernel_clock_gettime64() on vdso32
From: Christophe Leroy @ 2020-05-09 18:48 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Nathan Lynch, linux-arch, linux-kernel@vger.kernel.org,
	Paul Mackerras, Andy Lutomirski, Thomas Gleixner,
	Vincenzo Frascino, linuxppc-dev
In-Reply-To: <d3f303f1-8b2c-0c54-5380-0b9a370a4eb3@csgroup.eu>



Le 09/05/2020 à 17:54, Christophe Leroy a écrit :
> 
> 
> Le 28/04/2020 à 18:05, Arnd Bergmann a écrit :
>> On Tue, Apr 28, 2020 at 3:16 PM Christophe Leroy
>> <christophe.leroy@c-s.fr> wrote:
>>>
>>> Provides __kernel_clock_gettime64() on vdso32. This is the
>>> 64 bits version of __kernel_clock_gettime() which is
>>> y2038 compliant.
>>>
>>> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
>>
>> Looks good to me
>>
>> Reviewed-by: Arnd Bergmann <arnd@arndb.de>
>>
>> There was a bug on ARM for the corresponding function, so far it is 
>> unclear
>> if this was a problem related to particular hardware, the 32-bit 
>> kernel code,
>> or the common implementation of clock_gettime64 in the vdso library,
>> see https://github.com/richfelker/musl-cross-make/issues/96
>>
>> Just to be sure that powerpc is not affected by the same issue, can you
>> confirm that repeatedly calling clock_gettime64 on powerpc32, alternating
>> between vdso and syscall, results in monotically increasing times?
>>
> 
> I think that's one of the things vdsotest checks, so yes that's ok I think.
> 

Here is the full result with vdsotest:

gettimeofday: syscall: 3715 nsec/call
gettimeofday:    libc: 794 nsec/call
gettimeofday:    vdso: 947 nsec/call
getcpu: syscall: 1614 nsec/call
getcpu:    libc: 484 nsec/call
getcpu:    vdso: 184 nsec/call
clock-gettime64-realtime-coarse: syscall: 3152 nsec/call
clock-gettime64-realtime-coarse:    libc: not tested
clock-gettime64-realtime-coarse:    vdso: 653 nsec/call
clock-getres-realtime-coarse: syscall: 2963 nsec/call
clock-getres-realtime-coarse:    libc: 745 nsec/call
clock-getres-realtime-coarse:    vdso: 553 nsec/call
clock-gettime-realtime-coarse: syscall: 5120 nsec/call
clock-gettime-realtime-coarse:    libc: 731 nsec/call
clock-gettime-realtime-coarse:    vdso: 577 nsec/call
clock-gettime64-realtime: syscall: 3719 nsec/call
clock-gettime64-realtime:    libc: not tested
clock-gettime64-realtime:    vdso: 976 nsec/call
clock-getres-realtime: syscall: 2496 nsec/call
clock-getres-realtime:    libc: 745 nsec/call
clock-getres-realtime:    vdso: 546 nsec/call
clock-gettime-realtime: syscall: 4800 nsec/call
clock-gettime-realtime:    libc: 1080 nsec/call
clock-gettime-realtime:    vdso: 1798 nsec/call
clock-gettime64-boottime: syscall: 4132 nsec/call
clock-gettime64-boottime:    libc: not tested
clock-gettime64-boottime:    vdso: 975 nsec/call
clock-getres-boottime: syscall: 2497 nsec/call
clock-getres-boottime:    libc: 730 nsec/call
clock-getres-boottime:    vdso: 546 nsec/call
clock-gettime-boottime: syscall: 3728 nsec/call
clock-gettime-boottime:    libc: 1079 nsec/call
clock-gettime-boottime:    vdso: 941 nsec/call
clock-gettime64-tai: syscall: 4148 nsec/call
clock-gettime64-tai:    libc: not tested
clock-gettime64-tai:    vdso: 955 nsec/call
clock-getres-tai: syscall: 2494 nsec/call
clock-getres-tai:    libc: 730 nsec/call
clock-getres-tai:    vdso: 545 nsec/call
clock-gettime-tai: syscall: 3729 nsec/call
clock-gettime-tai:    libc: 1079 nsec/call
clock-gettime-tai:    vdso: 927 nsec/call
clock-gettime64-monotonic-raw: syscall: 3677 nsec/call
clock-gettime64-monotonic-raw:    libc: not tested
clock-gettime64-monotonic-raw:    vdso: 1032 nsec/call
clock-getres-monotonic-raw: syscall: 2494 nsec/call
clock-getres-monotonic-raw:    libc: 745 nsec/call
clock-getres-monotonic-raw:    vdso: 545 nsec/call
clock-gettime-monotonic-raw: syscall: 3276 nsec/call
clock-gettime-monotonic-raw:    libc: 1140 nsec/call
clock-gettime-monotonic-raw:    vdso: 1002 nsec/call
clock-gettime64-monotonic-coarse: syscall: 4099 nsec/call
clock-gettime64-monotonic-coarse:    libc: not tested
clock-gettime64-monotonic-coarse:    vdso: 653 nsec/call
clock-getres-monotonic-coarse: syscall: 2962 nsec/call
clock-getres-monotonic-coarse:    libc: 745 nsec/call
clock-getres-monotonic-coarse:    vdso: 545 nsec/call
clock-gettime-monotonic-coarse: syscall: 4297 nsec/call
clock-gettime-monotonic-coarse:    libc: 730 nsec/call
clock-gettime-monotonic-coarse:    vdso: 592 nsec/call
clock-gettime64-monotonic: syscall: 3863 nsec/call
clock-gettime64-monotonic:    libc: not tested
clock-gettime64-monotonic:    vdso: 975 nsec/call
clock-getres-monotonic: syscall: 2494 nsec/call
clock-getres-monotonic:    libc: 745 nsec/call
clock-getres-monotonic:    vdso: 545 nsec/call
clock-gettime-monotonic: syscall: 3465 nsec/call
clock-gettime-monotonic:    libc: 1079 nsec/call
clock-gettime-monotonic:    vdso: 927 nsec/call

Christophe

^ permalink raw reply

* [PATCH v2 4/9] drivers/ps3: Remove duplicate error messages
From: Geoff Levand @ 2020-05-09 18:58 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: linuxppc-dev, Geert Uytterhoeven, Markus Elfring,
	Emmanuel Nicolet
In-Reply-To: <cover.1589049250.git.geoff@infradead.org>

From: Markus Elfring <elfring@users.sourceforge.net>

Remove duplicate memory allocation failure error messages.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 drivers/ps3/ps3-lpm.c   | 2 --
 drivers/ps3/ps3-vuart.c | 1 -
 2 files changed, 3 deletions(-)

diff --git a/drivers/ps3/ps3-lpm.c b/drivers/ps3/ps3-lpm.c
index 83c45659bc9d..fcc346296e3a 100644
--- a/drivers/ps3/ps3-lpm.c
+++ b/drivers/ps3/ps3-lpm.c
@@ -1111,8 +1111,6 @@ int ps3_lpm_open(enum ps3_lpm_tb_type tb_type, void *tb_cache,
 		lpm_priv->tb_cache_internal = kzalloc(
 			lpm_priv->tb_cache_size + 127, GFP_KERNEL);
 		if (!lpm_priv->tb_cache_internal) {
-			dev_err(sbd_core(), "%s:%u: alloc internal tb_cache "
-				"failed\n", __func__, __LINE__);
 			result = -ENOMEM;
 			goto fail_malloc;
 		}
diff --git a/drivers/ps3/ps3-vuart.c b/drivers/ps3/ps3-vuart.c
index ddaa5ea5801a..3b5878edc6c2 100644
--- a/drivers/ps3/ps3-vuart.c
+++ b/drivers/ps3/ps3-vuart.c
@@ -917,7 +917,6 @@ static int ps3_vuart_bus_interrupt_get(void)
 	vuart_bus_priv.bmp = kzalloc(sizeof(struct ports_bmp), GFP_KERNEL);
 
 	if (!vuart_bus_priv.bmp) {
-		pr_debug("%s:%d: kzalloc failed.\n", __func__, __LINE__);
 		result = -ENOMEM;
 		goto fail_bmp_malloc;
 	}
-- 
2.20.1



^ permalink raw reply related

* [PATCH v2 5/9] net/ps3_gelic_net: Remove duplicate error message
From: Geoff Levand @ 2020-05-09 18:58 UTC (permalink / raw)
  To: Michael Ellerman, David S. Miller
  Cc: linuxppc-dev, Geert Uytterhoeven, Markus Elfring,
	Emmanuel Nicolet
In-Reply-To: <cover.1589049250.git.geoff@infradead.org>

From: Markus Elfring <elfring@users.sourceforge.net>

Remove an extra message for a memory allocation failure in
function gelic_descr_prepare_rx().

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 drivers/net/ethernet/toshiba/ps3_gelic_net.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
index 070dd6fa9401..8522f3898e0d 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
@@ -382,8 +382,6 @@ static int gelic_descr_prepare_rx(struct gelic_card *card,
 	descr->skb = dev_alloc_skb(bufsize + GELIC_NET_RXBUF_ALIGN - 1);
 	if (!descr->skb) {
 		descr->buf_addr = 0; /* tell DMAC don't touch memory */
-		dev_info(ctodev(card),
-			 "%s:allocate skb failed !!\n", __func__);
 		return -ENOMEM;
 	}
 	descr->buf_size = cpu_to_be32(bufsize);
-- 
2.20.1



^ permalink raw reply related

* [PATCH v2 6/9] ps3disk: use the default segment boundary
From: Geoff Levand @ 2020-05-09 18:58 UTC (permalink / raw)
  To: Michael Ellerman, Jens Axboe
  Cc: linuxppc-dev, Geert Uytterhoeven, Markus Elfring,
	Emmanuel Nicolet
In-Reply-To: <cover.1589049250.git.geoff@infradead.org>

From: Emmanuel Nicolet <emmanuel.nicolet@gmail.com>

Hi,
since commit dcebd755926b ("block: use bio_for_each_bvec() to compute
multi-page bvec count"), the kernel will bug_on on the PS3 because
bio_split() is called with sectors == 0:

kernel BUG at block/bio.c:1853!
Oops: Exception in kernel mode, sig: 5 [#1]
BE PAGE_SIZE=4K MMU=Hash PREEMPT SMP NR_CPUS=8 NUMA PS3
Modules linked in: firewire_sbp2 rtc_ps3(+) soundcore ps3_gelic(+) \
ps3rom(+) firewire_core ps3vram(+) usb_common crc_itu_t
CPU: 0 PID: 97 Comm: blkid Not tainted 5.3.0-rc4 #1
NIP:  c00000000027d0d0 LR: c00000000027d0b0 CTR: 0000000000000000
REGS: c00000000135ae90 TRAP: 0700   Not tainted  (5.3.0-rc4)
MSR:  8000000000028032 <SF,EE,IR,DR,RI>  CR: 44008240  XER: 20000000
IRQMASK: 0
GPR00: c000000000289368 c00000000135b120 c00000000084a500 c000000004ff8300
GPR04: 0000000000000c00 c000000004c905e0 c000000004c905e0 000000000000ffff
GPR08: 0000000000000000 0000000000000001 0000000000000000 000000000000ffff
GPR12: 0000000000000000 c0000000008ef000 000000000000003e 0000000000080001
GPR16: 0000000000000100 000000000000ffff 0000000000000000 0000000000000004
GPR20: c00000000062fd7e 0000000000000001 000000000000ffff 0000000000000080
GPR24: c000000000781788 c00000000135b350 0000000000000080 c000000004c905e0
GPR28: c00000000135b348 c000000004ff8300 0000000000000000 c000000004c90000
NIP [c00000000027d0d0] .bio_split+0x28/0xac
LR [c00000000027d0b0] .bio_split+0x8/0xac
Call Trace:
[c00000000135b120] [c00000000027d130] .bio_split+0x88/0xac (unreliable)
[c00000000135b1b0] [c000000000289368] .__blk_queue_split+0x11c/0x53c
[c00000000135b2d0] [c00000000028f614] .blk_mq_make_request+0x80/0x7d4
[c00000000135b3d0] [c000000000283a8c] .generic_make_request+0x118/0x294
[c00000000135b4b0] [c000000000283d34] .submit_bio+0x12c/0x174
[c00000000135b580] [c000000000205a44] .mpage_bio_submit+0x3c/0x4c
[c00000000135b600] [c000000000206184] .mpage_readpages+0xa4/0x184
[c00000000135b750] [c0000000001ff8fc] .blkdev_readpages+0x24/0x38
[c00000000135b7c0] [c0000000001589f0] .read_pages+0x6c/0x1a8
[c00000000135b8b0] [c000000000158c74] .__do_page_cache_readahead+0x118/0x184
[c00000000135b9b0] [c0000000001591a8] .force_page_cache_readahead+0xe4/0xe8
[c00000000135ba50] [c00000000014fc24] .generic_file_read_iter+0x1d8/0x830
[c00000000135bb50] [c0000000001ffadc] .blkdev_read_iter+0x40/0x5c
[c00000000135bbc0] [c0000000001b9e00] .new_sync_read+0x144/0x1a0
[c00000000135bcd0] [c0000000001bc454] .vfs_read+0xa0/0x124
[c00000000135bd70] [c0000000001bc7a4] .ksys_read+0x70/0xd8
[c00000000135be20] [c00000000000a524] system_call+0x5c/0x70
Instruction dump:
7fe3fb78 482e30dc 7c0802a6 482e3085 7c9e2378 f821ff71 7ca42b78 7d3e00d0
7c7d1b78 79290fe0 7cc53378 69290001 <0b090000> 81230028 7bca0020 7929ba62
[ end trace 313fec760f30aa1f ]---

The problem originates from setting the segment boundary of the request
queue to -1UL. This makes get_max_segment_size() return zero when offset
is zero, whatever the max segment size. The test with BLK_SEG_BOUNDARY_MASK
fails and 'mask - (mask & offset) + 1' overflows to zero in the return
statement.

Not setting the segment boundary and using the default value
(BLK_SEG_BOUNDARY_MASK) fixes the problem.
Maybe BLK_SEG_BOUNDARY_MASK should be set to -1UL? It's currently set to
only 0xFFFFFFFFUL. I don't know if that would break anything.

Signed-off-by: Emmanuel Nicolet <emmanuel.nicolet@gmail.com>
Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 drivers/block/ps3disk.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index c5c6487a19d5..7b55811c2a81 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -454,7 +454,6 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
 	queue->queuedata = dev;
 
 	blk_queue_max_hw_sectors(queue, dev->bounce_size >> 9);
-	blk_queue_segment_boundary(queue, -1UL);
 	blk_queue_dma_alignment(queue, dev->blk_size-1);
 	blk_queue_logical_block_size(queue, dev->blk_size);
 
-- 
2.20.1



^ permalink raw reply related

* [PATCH v2 3/9] powerpc/head_check: Avoid broken pipe
From: Geoff Levand @ 2020-05-09 18:58 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: linuxppc-dev, Geert Uytterhoeven, Markus Elfring,
	Emmanuel Nicolet
In-Reply-To: <cover.1589049250.git.geoff@infradead.org>

Remove the '-m4' option to grep to allow grep to process all of nm's
output.  This avoids the nm warning:

  nm terminated with signal 13 [Broken pipe]

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/powerpc/tools/head_check.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/tools/head_check.sh b/arch/powerpc/tools/head_check.sh
index 37061fb9b58e..e32d3162e5ed 100644
--- a/arch/powerpc/tools/head_check.sh
+++ b/arch/powerpc/tools/head_check.sh
@@ -46,7 +46,7 @@ nm="$1"
 vmlinux="$2"
 
 # gcc-4.6-era toolchain make _stext an A (absolute) symbol rather than T
-$nm "$vmlinux" | grep -e " [TA] _stext$" -e " t start_first_256B$" -e " a text_start$" -e " t start_text$" -m4 > .tmp_symbols.txt
+$nm "$vmlinux" | grep -e " [TA] _stext$" -e " t start_first_256B$" -e " a text_start$" -e " t start_text$" > .tmp_symbols.txt
 
 
 vma=$(cat .tmp_symbols.txt | grep -e " [TA] _stext$" | cut -d' ' -f1)
-- 
2.20.1



^ permalink raw reply related

* [PATCH v2 2/9] powerpc/wrapper: Output linker map file
From: Geoff Levand @ 2020-05-09 18:58 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: linuxppc-dev, Geert Uytterhoeven, Markus Elfring,
	Emmanuel Nicolet
In-Reply-To: <cover.1589049250.git.geoff@infradead.org>

To aid debugging wrapper troubles, output a linker map file
'wrapper.map' when the build is verbose.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/powerpc/boot/wrapper | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index ed6266367bc0..35ace40d9fc2 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -29,6 +29,7 @@ set -e
 # Allow for verbose output
 if [ "$V" = 1 ]; then
     set -x
+    map="-Map wrapper.map"
 fi
 
 # defaults
@@ -500,7 +501,7 @@ if [ "$platform" != "miboot" ]; then
         text_start="-Ttext $link_address"
     fi
 #link everything
-    ${CROSS}ld -m $format -T $lds $text_start $pie $nodl -o "$ofile" \
+    ${CROSS}ld -m $format -T $lds $text_start $pie $nodl -o "$ofile" $map \
 	$platformo $tmp $object/wrapper.a
     rm $tmp
 fi
-- 
2.20.1



^ permalink raw reply related

* [PATCH v2 8/9] powerpc/ps3: Fix kexec shutdown hang
From: Geoff Levand @ 2020-05-09 18:58 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: linuxppc-dev, Geert Uytterhoeven, Markus Elfring,
	Emmanuel Nicolet
In-Reply-To: <cover.1589049250.git.geoff@infradead.org>

The ps3_mm_region_destroy() and ps3_mm_vas_destroy() routines
are called very late in the shutdown via kexec's mmu_cleanup_all
routine.  By the time mmu_cleanup_all runs it is too late to use
udbg_printf, and calling it will cause PS3 systems to hang.

Remove all debugging statements from ps3_mm_region_destroy() and
ps3_mm_vas_destroy() and replace any error reporting with calls
to lv1_panic.

With this change builds with 'DEBUG' defined will not cause kexec
reboots to hang, and builds with 'DEBUG' defined or not will end
in lv1_panic if an error is encountered.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/powerpc/platforms/ps3/mm.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
index 423be34f0f5f..f42fe4e86ce5 100644
--- a/arch/powerpc/platforms/ps3/mm.c
+++ b/arch/powerpc/platforms/ps3/mm.c
@@ -200,13 +200,14 @@ void ps3_mm_vas_destroy(void)
 {
 	int result;
 
-	DBG("%s:%d: map.vas_id    = %llu\n", __func__, __LINE__, map.vas_id);
-
 	if (map.vas_id) {
 		result = lv1_select_virtual_address_space(0);
-		BUG_ON(result);
-		result = lv1_destruct_virtual_address_space(map.vas_id);
-		BUG_ON(result);
+		result += lv1_destruct_virtual_address_space(map.vas_id);
+
+		if (result) {
+			lv1_panic(0);
+		}
+
 		map.vas_id = 0;
 	}
 }
@@ -304,19 +305,20 @@ static void ps3_mm_region_destroy(struct mem_region *r)
 	int result;
 
 	if (!r->destroy) {
-		pr_info("%s:%d: Not destroying high region: %llxh %llxh\n",
-			__func__, __LINE__, r->base, r->size);
 		return;
 	}
 
-	DBG("%s:%d: r->base = %llxh\n", __func__, __LINE__, r->base);
-
 	if (r->base) {
 		result = lv1_release_memory(r->base);
-		BUG_ON(result);
+
+		if (result) {
+			lv1_panic(0);
+		}
+
 		r->size = r->base = r->offset = 0;
 		map.total = map.rm.size;
 	}
+
 	ps3_mm_set_repository_highmem(NULL);
 }
 
-- 
2.20.1



^ permalink raw reply related

* [PATCH v2 9/9] hvc_console: Allow backends to set I/O buffer size
From: Geoff Levand @ 2020-05-09 18:58 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: linuxppc-dev, Geert Uytterhoeven, Markus Elfring,
	Emmanuel Nicolet
In-Reply-To: <cover.1589049250.git.geoff@infradead.org>

To allow HVC backends to set the I/O buffer sizes to values
that are most efficient for the backend, change the macro
definitions where the buffer sizes are set to be conditional
on whether or not the macros are already defined.  Also,
rename the macros from N_OUTBUF to HVC_N_OUBUF and from
N_INBUF to HVC_N_INBUF.

Typical usage in the backend source file would be:

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 drivers/tty/hvc/hvc_console.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c
index 436cc51c92c3..2928bad057fc 100644
--- a/drivers/tty/hvc/hvc_console.c
+++ b/drivers/tty/hvc/hvc_console.c
@@ -42,12 +42,15 @@
 #define HVC_CLOSE_WAIT (HZ/100) /* 1/10 of a second */
 
 /*
- * These sizes are most efficient for vio, because they are the
- * native transfer size. We could make them selectable in the
- * future to better deal with backends that want other buffer sizes.
+ * These default sizes are most efficient for vio, because they are
+ * the native transfer size.
  */
-#define N_OUTBUF	16
-#define N_INBUF		16
+#if !defined(HVC_N_OUTBUF)
+# define HVC_N_OUTBUF	16
+#endif
+#if !defined(HVC_N_INBUF)
+# define HVC_N_INBUF	16
+#endif
 
 #define __ALIGNED__ __attribute__((__aligned__(sizeof(long))))
 
@@ -151,7 +154,7 @@ static uint32_t vtermnos[MAX_NR_HVC_CONSOLES] =
 static void hvc_console_print(struct console *co, const char *b,
 			      unsigned count)
 {
-	char c[N_OUTBUF] __ALIGNED__;
+	char c[HVC_N_OUTBUF] __ALIGNED__;
 	unsigned i = 0, n = 0;
 	int r, donecr = 0, index = co->index;
 
@@ -640,7 +643,7 @@ static int __hvc_poll(struct hvc_struct *hp, bool may_sleep)
 {
 	struct tty_struct *tty;
 	int i, n, count, poll_mask = 0;
-	char buf[N_INBUF] __ALIGNED__;
+	char buf[HVC_N_INBUF] __ALIGNED__;
 	unsigned long flags;
 	int read_total = 0;
 	int written_total = 0;
@@ -681,7 +684,7 @@ static int __hvc_poll(struct hvc_struct *hp, bool may_sleep)
 
  read_again:
 	/* Read data if any */
-	count = tty_buffer_request_room(&hp->port, N_INBUF);
+	count = tty_buffer_request_room(&hp->port, HVC_N_INBUF);
 
 	/* If flip is full, just reschedule a later read */
 	if (count == 0) {
-- 
2.20.1


^ permalink raw reply related

* [PATCH v2 1/9] powerpc/head_check: Automatic verbosity
From: Geoff Levand @ 2020-05-09 18:58 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: linuxppc-dev, Geert Uytterhoeven, Markus Elfring,
	Emmanuel Nicolet
In-Reply-To: <cover.1589049250.git.geoff@infradead.org>

To aid debugging build problems turn on shell tracing for the
head_check script when the build is verbose.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/powerpc/tools/head_check.sh | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/tools/head_check.sh b/arch/powerpc/tools/head_check.sh
index ad9e57209aa4..37061fb9b58e 100644
--- a/arch/powerpc/tools/head_check.sh
+++ b/arch/powerpc/tools/head_check.sh
@@ -31,8 +31,10 @@
 # level entry code (boot, interrupt vectors, etc) until r2 is set up. This
 # could cause the kernel to die in early boot.
 
-# Turn this on if you want more debug output:
-# set -x
+# Allow for verbose output
+if [ "$V" = "1" ]; then
+	set -x
+fi
 
 if [ $# -lt 2 ]; then
 	echo "$0 [path to nm] [path to vmlinux]" 1>&2
-- 
2.20.1



^ permalink raw reply related

* [PATCH v2 0/9] powerpc + ps3 patches
From: Geoff Levand @ 2020-05-09 18:58 UTC (permalink / raw)
  To: Michael Ellerman, David S. Miller, Jens Axboe
  Cc: linuxppc-dev, Geert Uytterhoeven, Markus Elfring,
	Emmanuel Nicolet

Hi Michael,

This is a combined V2 of the two patch sets I sent out on March 27th,
'PS3 patches for v5.7' and 'powerpc: Minor updates to improve build debugging'.

I've dropped these two patches that were in my 'PS3 patches for v5.7' set:

      powerpc/ps3: Add lv1_panic
      powerpc/ps3: Add udbg_panic

and replaced them with a new patch:

      powerpc/ps3: Fix kexec shutdown hang

There is new patch I've added in this set:

      hvc_console: Allow backends to set I/O buffer size

which I've been using in my debugging.  There is nothing using this feature in
the upstream kernel, and I don't plan to submit anything that would use it, so
use your discretion as to merge it.

-Geoff

The following changes since commit 0e698dfa282211e414076f9dc7e83c1c288314fd:

  Linux 5.7-rc4 (2020-05-03 14:56:04 -0700)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/geoff/ps3-linux.git for-merge-powerpc-v2

for you to fetch changes up to 6f6294df663a53f47bb28abcbb1ef756c6a59922:

  hvc_console: Allow backends to set I/O buffer size (2020-05-09 11:24:42 -0700)

----------------------------------------------------------------
Emmanuel Nicolet (1):
      ps3disk: use the default segment boundary

Geoff Levand (6):
      powerpc/head_check: Automatic verbosity
      powerpc/wrapper: Output linker map file
      powerpc/head_check: Avoid broken pipe
      powerpc/ps3: Add check for otheros image size
      powerpc/ps3: Fix kexec shutdown hang
      hvc_console: Allow backends to set I/O buffer size

Markus Elfring (2):
      drivers/ps3: Remove duplicate error messages
      net/ps3_gelic_net: Remove duplicate error message

 arch/powerpc/boot/wrapper                    | 20 +++++++++++++++++---
 arch/powerpc/platforms/ps3/mm.c              | 22 ++++++++++++----------
 arch/powerpc/tools/head_check.sh             |  8 +++++---
 drivers/block/ps3disk.c                      |  1 -
 drivers/net/ethernet/toshiba/ps3_gelic_net.c |  2 --
 drivers/ps3/ps3-lpm.c                        |  2 --
 drivers/ps3/ps3-vuart.c                      |  1 -
 drivers/tty/hvc/hvc_console.c                | 19 +++++++++++--------
 8 files changed, 45 insertions(+), 30 deletions(-)

-- 
2.20.1


^ permalink raw reply

* [PATCH v2 7/9] powerpc/ps3: Add check for otheros image size
From: Geoff Levand @ 2020-05-09 18:58 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: linuxppc-dev, Geert Uytterhoeven, Markus Elfring,
	Emmanuel Nicolet
In-Reply-To: <cover.1589049250.git.geoff@infradead.org>

The ps3's otheros flash loader has a size limit of 16 MiB for the
uncompressed image.  If that limit will be reached output the
flash image file as 'otheros-too-big.bld'.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/powerpc/boot/wrapper | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index 35ace40d9fc2..ab1e3ddc79f3 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -571,7 +571,20 @@ ps3)
         count=$overlay_size bs=1
 
     odir="$(dirname "$ofile.bin")"
-    rm -f "$odir/otheros.bld"
-    gzip -n --force -9 --stdout "$ofile.bin" > "$odir/otheros.bld"
+
+    # The ps3's flash loader has a size limit of 16 MiB for the uncompressed
+    # image.  If a compressed image that exceeded this limit is written to
+    # flash the loader will decompress that image until the 16 MiB limit is
+    # reached, then enter the system reset vector of the partially decompressed
+    # image.  No warning is issued.
+    rm -f "$odir"/{otheros,otheros-too-big}.bld
+    size=$(${CROSS}nm --no-sort --radix=d "$ofile" | egrep ' _end$' | cut -d' ' -f1)
+    bld="otheros.bld"
+    if [ $size -gt $((0x1000000)) ]; then
+        bld="otheros-too-big.bld"
+        echo "  INFO: Uncompressed kernel is too large to program into PS3 flash memory;" \
+        "size=0x$(printf "%x\n" $size), limit=0x1000000."
+    fi
+    gzip -n --force -9 --stdout "$ofile.bin" > "$odir/$bld"
     ;;
 esac
-- 
2.20.1



^ permalink raw reply related

* Re: ioremap() called early from pnv_pci_init_ioda_phb()
From: Oliver O'Halloran @ 2020-05-09 22:59 UTC (permalink / raw)
  To: Christophe Leroy; +Cc: Qian Cai, linuxppc-dev, LKML
In-Reply-To: <a4ae5c50-f317-4224-a5f2-6e1030e62d2b@csgroup.eu>

On Sun, May 10, 2020 at 1:51 AM Christophe Leroy
<christophe.leroy@csgroup.eu> wrote:
>
>
>
> Le 08/05/2020 à 19:41, Qian Cai a écrit :
> >
> >
> >> On May 8, 2020, at 10:39 AM, Qian Cai <cai@lca.pw> wrote:
> >>
> >> Booting POWER9 PowerNV has this message,
> >>
> >> "ioremap() called early from pnv_pci_init_ioda_phb+0x420/0xdfc. Use early_ioremap() instead”
> >>
> >> but use the patch below will result in leaks because it will never call early_iounmap() anywhere. However, it looks me it was by design that phb->regs mapping would be there forever where it would be used in pnv_ioda_get_inval_reg(), so is just that check_early_ioremap_leak() initcall too strong?
> >>
> >> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> >> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> >> @@ -36,6 +36,7 @@
> >> #include <asm/firmware.h>
> >> #include <asm/pnv-pci.h>
> >> #include <asm/mmzone.h>
> >> +#include <asm/early_ioremap.h>
> >>
> >> #include <misc/cxl-base.h>
> >>
> >> @@ -3827,7 +3828,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
> >>         /* Get registers */
> >>         if (!of_address_to_resource(np, 0, &r)) {
> >>                 phb->regs_phys = r.start;
> >> -               phb->regs = ioremap(r.start, resource_size(&r));
> >> +               phb->regs = early_ioremap(r.start, resource_size(&r));
> >>                 if (phb->regs == NULL)
> >>                         pr_err("  Failed to map registers !\n”);
> >
> > This will also trigger a panic with debugfs reads, so isn’t that this commit bogus at least for powerpc64?
> >
> > d538aadc2718 (“powerpc/ioremap: warn on early use of ioremap()")
>
> No d538aadc2718 is not bogus. That's the point, we want to remove all
> early usages of ioremap() in order to remove the hack with the
> ioremap_bot stuff and all, and stick to the generic ioremap logic.
>
> In order to do so, all early use of ioremap() has to be converted to
> early_ioremap() or to fixmap or anything else that allows to do ioremaps
> before the slab is ready.
>
> early_ioremap() is for temporary mappings necessary at boottime. For
> long lasting mappings, another method is to be used.
>
> Now, the point is that other architectures like for instance x86 don't
> seem to have to use early_ioremap() much. Powerpc is for instance doing
> early mappings for PCI. Seems like x86 initialises PCI once slab is
> ready. Can't powerpc do the same ?

Patches welcome.

^ permalink raw reply

* Re: [PATCH] tty: hvc: Fix data abort due to race in hvc_open
From: rananta @ 2020-05-10  1:30 UTC (permalink / raw)
  To: Greg KH; +Cc: andrew, linuxppc-dev, linux-kernel, jslaby
In-Reply-To: <20200506094851.GA2787548@kroah.com>

On 2020-05-06 02:48, Greg KH wrote:
> On Mon, Apr 27, 2020 at 08:26:01PM -0700, Raghavendra Rao Ananta wrote:
>> Potentially, hvc_open() can be called in parallel when two tasks calls
>> open() on /dev/hvcX. In such a scenario, if the 
>> hp->ops->notifier_add()
>> callback in the function fails, where it sets the tty->driver_data to
>> NULL, the parallel hvc_open() can see this NULL and cause a memory 
>> abort.
>> Hence, serialize hvc_open and check if tty->private_data is NULL 
>> before
>> proceeding ahead.
>> 
>> The issue can be easily reproduced by launching two tasks 
>> simultaneously
>> that does nothing but open() and close() on /dev/hvcX.
>> For example:
>> $ ./simple_open_close /dev/hvc0 & ./simple_open_close /dev/hvc0 &
>> 
>> Signed-off-by: Raghavendra Rao Ananta <rananta@codeaurora.org>
>> ---
>>  drivers/tty/hvc/hvc_console.c | 16 ++++++++++++++--
>>  1 file changed, 14 insertions(+), 2 deletions(-)
>> 
>> diff --git a/drivers/tty/hvc/hvc_console.c 
>> b/drivers/tty/hvc/hvc_console.c
>> index 436cc51c92c3..ebe26fe5ac09 100644
>> --- a/drivers/tty/hvc/hvc_console.c
>> +++ b/drivers/tty/hvc/hvc_console.c
>> @@ -75,6 +75,8 @@ static LIST_HEAD(hvc_structs);
>>   */
>>  static DEFINE_MUTEX(hvc_structs_mutex);
>> 
>> +/* Mutex to serialize hvc_open */
>> +static DEFINE_MUTEX(hvc_open_mutex);
>>  /*
>>   * This value is used to assign a tty->index value to a hvc_struct 
>> based
>>   * upon order of exposure via hvc_probe(), when we can not match it 
>> to
>> @@ -346,16 +348,24 @@ static int hvc_install(struct tty_driver 
>> *driver, struct tty_struct *tty)
>>   */
>>  static int hvc_open(struct tty_struct *tty, struct file * filp)
>>  {
>> -	struct hvc_struct *hp = tty->driver_data;
>> +	struct hvc_struct *hp;
>>  	unsigned long flags;
>>  	int rc = 0;
>> 
>> +	mutex_lock(&hvc_open_mutex);
>> +
>> +	hp = tty->driver_data;
>> +	if (!hp) {
>> +		rc = -EIO;
>> +		goto out;
>> +	}
>> +
>>  	spin_lock_irqsave(&hp->port.lock, flags);
>>  	/* Check and then increment for fast path open. */
>>  	if (hp->port.count++ > 0) {
>>  		spin_unlock_irqrestore(&hp->port.lock, flags);
>>  		hvc_kick();
>> -		return 0;
>> +		goto out;
>>  	} /* else count == 0 */
>>  	spin_unlock_irqrestore(&hp->port.lock, flags);
> 
> Wait, why isn't this driver just calling tty_port_open() instead of
> trying to open-code all of this?
> 
> Keeping a single mutext for open will not protect it from close, it 
> will
> just slow things down a bit.  There should already be a tty lock held 
> by
> the tty core for open() to keep it from racing things, right?
The tty lock should have been held, but not likely across ->install() 
and
->open() callbacks, thus resulting in a race between hvc_install() and 
hvc_open(),
where hvc_install() sets a data and the hvc_open() clears it. hvc_open() 
doesn't
check if the data was set to NULL and proceeds.
> 
> Try just removing all of this logic and replacing it with a call to
> tty_port_open() and see if that fixes this issue.
> 
> As "proof" of this, I don't see other serial drivers needing a single
> mutex for their open calls, do you?
> 
> thanks,
> 
> greg k-h

Thank you.
Raghavendra

^ permalink raw reply

* [Bug 207359] MegaRAID SAS 9361 controller hang/reset
From: bugzilla-daemon @ 2020-05-10  3:02 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <bug-207359-206035@https.bugzilla.kernel.org/>

https://bugzilla.kernel.org/show_bug.cgi?id=207359

--- Comment #3 from Cameron (cam@neo-zeon.de) ---
Created attachment 289041
  --> https://bugzilla.kernel.org/attachment.cgi?id=289041&action=edit
5.6.11 megaraid POWER hang

Still happens with 5.6.11. There seems to be potentially a bit more output this
time, and I've included output from shutting down too in case it's useful.

-- 
You are receiving this mail because:
You are watching the assignee of the bug.

^ permalink raw reply

* [PATCH] powerpc/powernv/pci: fix a RCU-list lock
From: Qian Cai @ 2020-05-10  5:13 UTC (permalink / raw)
  To: mpe; +Cc: paulmck, aik, linux-kernel, paulus, Qian Cai, linuxppc-dev

It is unsafe to traverse tbl->it_group_list without the RCU read lock.

 WARNING: suspicious RCU usage
 5.7.0-rc4-next-20200508 #1 Not tainted
 -----------------------------
 arch/powerpc/platforms/powernv/pci-ioda-tce.c:355 RCU-list traversed in non-reader section!!

 other info that might help us debug this:

 rcu_scheduler_active = 2, debug_locks = 1
 3 locks held by qemu-kvm/4305:
  #0: c000000bc3fe6988 (&container->group_lock){++++}-{3:3}, at: vfio_fops_unl_ioctl+0x108/0x410 [vfio]
  #1: c00800000fcc7400 (&vfio.iommu_drivers_lock){+.+.}-{3:3}, at: vfio_fops_unl_ioctl+0x148/0x410 [vfio]
  #2: c000000bc3fe4d68 (&container->lock){+.+.}-{3:3}, at: tce_iommu_attach_group+0x3c/0x4f0 [vfio_iommu_spapr_tce]

 stack backtrace:
 CPU: 4 PID: 4305 Comm: qemu-kvm Not tainted 5.7.0-rc4-next-20200508 #1
 Call Trace:
 [c0000010f29afa60] [c0000000007154c8] dump_stack+0xfc/0x174 (unreliable)
 [c0000010f29afab0] [c0000000001d8ff0] lockdep_rcu_suspicious+0x140/0x164
 [c0000010f29afb30] [c0000000000dae2c] pnv_pci_unlink_table_and_group+0x11c/0x200
 [c0000010f29afb70] [c0000000000d4a34] pnv_pci_ioda2_unset_window+0xc4/0x190
 [c0000010f29afbf0] [c0000000000d4b4c] pnv_ioda2_take_ownership+0x4c/0xd0
 [c0000010f29afc30] [c00800000fd60ee0] tce_iommu_attach_group+0x2c8/0x4f0 [vfio_iommu_spapr_tce]
 [c0000010f29afcd0] [c00800000fcc11a0] vfio_fops_unl_ioctl+0x238/0x410 [vfio]
 [c0000010f29afd50] [c0000000005430a8] ksys_ioctl+0xd8/0x130
 [c0000010f29afda0] [c000000000543128] sys_ioctl+0x28/0x40
 [c0000010f29afdc0] [c000000000038af4] system_call_exception+0x114/0x1e0
 [c0000010f29afe20] [c00000000000c8f0] system_call_common+0xf0/0x278

Signed-off-by: Qian Cai <cai@lca.pw>
---
 arch/powerpc/platforms/powernv/pci-ioda-tce.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
index 5dc6847d5f4c..6be9cf292b4e 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
@@ -352,6 +352,8 @@ void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
 
 	/* Remove link to a group from table's list of attached groups */
 	found = false;
+
+	rcu_read_lock();
 	list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
 		if (tgl->table_group == table_group) {
 			list_del_rcu(&tgl->next);
@@ -360,6 +362,8 @@ void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
 			break;
 		}
 	}
+	rcu_read_unlock();
+
 	if (WARN_ON(!found))
 		return;
 
-- 
2.21.0 (Apple Git-122.2)


^ permalink raw reply related

* [PATCH] powerpc/mm/book3s64/iommu: fix some RCU-list locks
From: Qian Cai @ 2020-05-10  5:15 UTC (permalink / raw)
  To: mpe; +Cc: paulmck, aik, linux-kernel, paulus, Qian Cai, linuxppc-dev

It is safe to traverse mm->context.iommu_group_mem_list with either
mem_list_mutex or the RCU read lock held. Silence a few RCU-list false
positive warnings and fix a few missing RCU read locks.

 arch/powerpc/mm/book3s64/iommu_api.c:330 RCU-list traversed in non-reader section!!

 other info that might help us debug this:

 rcu_scheduler_active = 2, debug_locks = 1
 2 locks held by qemu-kvm/4305:
  #0: c000000bc3fe4d68 (&container->lock){+.+.}-{3:3}, at: tce_iommu_ioctl.part.9+0xc7c/0x1870 [vfio_iommu_spapr_tce]
  #1: c000000001501910 (mem_list_mutex){+.+.}-{3:3}, at: mm_iommu_get+0x50/0x190

 ====
 arch/powerpc/mm/book3s64/iommu_api.c:132 RCU-list traversed in non-reader section!!

 other info that might help us debug this:

 rcu_scheduler_active = 2, debug_locks = 1
 2 locks held by qemu-kvm/4305:
  #0: c000000bc3fe4d68 (&container->lock){+.+.}-{3:3}, at: tce_iommu_ioctl.part.9+0xc7c/0x1870 [vfio_iommu_spapr_tce]
  #1: c000000001501910 (mem_list_mutex){+.+.}-{3:3}, at: mm_iommu_do_alloc+0x120/0x5f0

 ====
 arch/powerpc/mm/book3s64/iommu_api.c:292 RCU-list traversed in non-reader section!!

 other info that might help us debug this:

 rcu_scheduler_active = 2, debug_locks = 1
 2 locks held by qemu-kvm/4312:
  #0: c000000ecafe23c8 (&vcpu->mutex){+.+.}-{3:3}, at: kvm_vcpu_ioctl+0xdc/0x950 [kvm]
  #1: c000000045e6c468 (&kvm->srcu){....}-{0:0}, at: kvmppc_h_put_tce+0x88/0x340 [kvm]

 ====
 arch/powerpc/mm/book3s64/iommu_api.c:424 RCU-list traversed in non-reader section!!

 other info that might help us debug this:

 rcu_scheduler_active = 2, debug_locks = 1
 2 locks held by qemu-kvm/4312:
  #0: c000000ecafe23c8 (&vcpu->mutex){+.+.}-{3:3}, at: kvm_vcpu_ioctl+0xdc/0x950 [kvm]
  #1: c000000045e6c468 (&kvm->srcu){....}-{0:0}, at: kvmppc_h_put_tce+0x88/0x340 [kvm]

Signed-off-by: Qian Cai <cai@lca.pw>
---
 arch/powerpc/mm/book3s64/iommu_api.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c
index fa05bbd1f682..bf0108b6f445 100644
--- a/arch/powerpc/mm/book3s64/iommu_api.c
+++ b/arch/powerpc/mm/book3s64/iommu_api.c
@@ -129,7 +129,8 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
 
 	mutex_lock(&mem_list_mutex);
 
-	list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next) {
+	list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next,
+				lockdep_is_held(&mem_list_mutex)) {
 		/* Overlap? */
 		if ((mem2->ua < (ua + (entries << PAGE_SHIFT))) &&
 				(ua < (mem2->ua +
@@ -289,6 +290,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
 {
 	struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
 
+	rcu_read_lock();
 	list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
 		if ((mem->ua <= ua) &&
 				(ua + size <= mem->ua +
@@ -297,6 +299,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
 			break;
 		}
 	}
+	rcu_read_unlock();
 
 	return ret;
 }
@@ -327,7 +330,8 @@ struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm,
 
 	mutex_lock(&mem_list_mutex);
 
-	list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
+	list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next,
+				lockdep_is_held(&mem_list_mutex)) {
 		if ((mem->ua == ua) && (mem->entries == entries)) {
 			ret = mem;
 			++mem->used;
@@ -421,6 +425,7 @@ bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
 	struct mm_iommu_table_group_mem_t *mem;
 	unsigned long end;
 
+	rcu_read_lock();
 	list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
 		if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
 			continue;
@@ -437,6 +442,7 @@ bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
 			return true;
 		}
 	}
+	rcu_read_unlock();
 
 	return false;
 }
-- 
2.21.0 (Apple Git-122.2)


^ permalink raw reply related

* [PATCH] powerpc/kvm/book3s64/vio: fix some RCU-list locks
From: Qian Cai @ 2020-05-10  5:18 UTC (permalink / raw)
  To: mpe; +Cc: paulmck, aik, linux-kernel, kvm-ppc, paulus, Qian Cai,
	linuxppc-dev

It is unsafe to traverse kvm->arch.spapr_tce_tables and
stt->iommu_tables without the RCU read lock held. Also, add
cond_resched_rcu() in places with the RCU read lock held that could take
a while to finish.

 arch/powerpc/kvm/book3s_64_vio.c:76 RCU-list traversed in non-reader section!!

 other info that might help us debug this:

 rcu_scheduler_active = 2, debug_locks = 1
 no locks held by qemu-kvm/4265.

 stack backtrace:
 CPU: 96 PID: 4265 Comm: qemu-kvm Not tainted 5.7.0-rc4-next-20200508+ #2
 Call Trace:
 [c000201a8690f720] [c000000000715948] dump_stack+0xfc/0x174 (unreliable)
 [c000201a8690f770] [c0000000001d9470] lockdep_rcu_suspicious+0x140/0x164
 [c000201a8690f7f0] [c008000010b9fb48] kvm_spapr_tce_release_iommu_group+0x1f0/0x220 [kvm]
 [c000201a8690f870] [c008000010b8462c] kvm_spapr_tce_release_vfio_group+0x54/0xb0 [kvm]
 [c000201a8690f8a0] [c008000010b84710] kvm_vfio_destroy+0x88/0x140 [kvm]
 [c000201a8690f8f0] [c008000010b7d488] kvm_put_kvm+0x370/0x600 [kvm]
 [c000201a8690f990] [c008000010b7e3c0] kvm_vm_release+0x38/0x60 [kvm]
 [c000201a8690f9c0] [c0000000005223f4] __fput+0x124/0x330
 [c000201a8690fa20] [c000000000151cd8] task_work_run+0xb8/0x130
 [c000201a8690fa70] [c0000000001197e8] do_exit+0x4e8/0xfa0
 [c000201a8690fb70] [c00000000011a374] do_group_exit+0x64/0xd0
 [c000201a8690fbb0] [c000000000132c90] get_signal+0x1f0/0x1200
 [c000201a8690fcc0] [c000000000020690] do_notify_resume+0x130/0x3c0
 [c000201a8690fda0] [c000000000038d64] syscall_exit_prepare+0x1a4/0x280
 [c000201a8690fe20] [c00000000000c8f8] system_call_common+0xf8/0x278

 ====
 arch/powerpc/kvm/book3s_64_vio.c:368 RCU-list traversed in non-reader section!!

 other info that might help us debug this:

 rcu_scheduler_active = 2, debug_locks = 1
 2 locks held by qemu-kvm/4264:
  #0: c000201ae2d000d8 (&vcpu->mutex){+.+.}-{3:3}, at: kvm_vcpu_ioctl+0xdc/0x950 [kvm]
  #1: c000200c9ed0c468 (&kvm->srcu){....}-{0:0}, at: kvmppc_h_put_tce+0x88/0x340 [kvm]

 ====
 arch/powerpc/kvm/book3s_64_vio.c:108 RCU-list traversed in non-reader section!!

 other info that might help us debug this:

 rcu_scheduler_active = 2, debug_locks = 1
 1 lock held by qemu-kvm/4257:
  #0: c000200b1b363a40 (&kv->lock){+.+.}-{3:3}, at: kvm_vfio_set_attr+0x598/0x6c0 [kvm]

 ====
 arch/powerpc/kvm/book3s_64_vio.c:146 RCU-list traversed in non-reader section!!

 other info that might help us debug this:

 rcu_scheduler_active = 2, debug_locks = 1
 1 lock held by qemu-kvm/4257:
  #0: c000200b1b363a40 (&kv->lock){+.+.}-{3:3}, at: kvm_vfio_set_attr+0x598/0x6c0 [kvm]

Signed-off-by: Qian Cai <cai@lca.pw>
---
 arch/powerpc/kvm/book3s_64_vio.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 50555ad1db93..4f5016bab723 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -73,6 +73,7 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
 	struct kvmppc_spapr_tce_iommu_table *stit, *tmp;
 	struct iommu_table_group *table_group = NULL;
 
+	rcu_read_lock();
 	list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
 
 		table_group = iommu_group_get_iommudata(grp);
@@ -87,7 +88,9 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
 				kref_put(&stit->kref, kvm_spapr_tce_liobn_put);
 			}
 		}
+		cond_resched_rcu();
 	}
+	rcu_read_unlock();
 }
 
 extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
@@ -105,12 +108,14 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
 	if (!f.file)
 		return -EBADF;
 
+	rcu_read_lock();
 	list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
 		if (stt == f.file->private_data) {
 			found = true;
 			break;
 		}
 	}
+	rcu_read_unlock();
 
 	fdput(f);
 
@@ -143,6 +148,7 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
 	if (!tbl)
 		return -EINVAL;
 
+	rcu_read_lock();
 	list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
 		if (tbl != stit->tbl)
 			continue;
@@ -150,14 +156,17 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
 		if (!kref_get_unless_zero(&stit->kref)) {
 			/* stit is being destroyed */
 			iommu_tce_table_put(tbl);
+			rcu_read_unlock();
 			return -ENOTTY;
 		}
 		/*
 		 * The table is already known to this KVM, we just increased
 		 * its KVM reference counter and can return.
 		 */
+		rcu_read_unlock();
 		return 0;
 	}
+	rcu_read_unlock();
 
 	stit = kzalloc(sizeof(*stit), GFP_KERNEL);
 	if (!stit) {
@@ -365,18 +374,20 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
 	if (kvmppc_tce_to_ua(stt->kvm, tce, &ua))
 		return H_TOO_HARD;
 
+	rcu_read_lock();
 	list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
 		unsigned long hpa = 0;
 		struct mm_iommu_table_group_mem_t *mem;
 		long shift = stit->tbl->it_page_shift;
 
 		mem = mm_iommu_lookup(stt->kvm->mm, ua, 1ULL << shift);
-		if (!mem)
-			return H_TOO_HARD;
-
-		if (mm_iommu_ua_to_hpa(mem, ua, shift, &hpa))
+		if (!mem || mm_iommu_ua_to_hpa(mem, ua, shift, &hpa)) {
+			rcu_read_unlock();
 			return H_TOO_HARD;
+		}
+		cond_resched_rcu();
 	}
+	rcu_read_unlock();
 
 	return H_SUCCESS;
 }
-- 
2.21.0 (Apple Git-122.2)


^ permalink raw reply related

* Re: [PATCH] tty: hvc: Fix data abort due to race in hvc_open
From: Greg KH @ 2020-05-10  6:48 UTC (permalink / raw)
  To: rananta; +Cc: andrew, linuxppc-dev, linux-kernel, jslaby
In-Reply-To: <98bbe7afabf48d8e8fe839fdc9e836a5@codeaurora.org>

On Sat, May 09, 2020 at 06:30:56PM -0700, rananta@codeaurora.org wrote:
> On 2020-05-06 02:48, Greg KH wrote:
> > On Mon, Apr 27, 2020 at 08:26:01PM -0700, Raghavendra Rao Ananta wrote:
> > > Potentially, hvc_open() can be called in parallel when two tasks calls
> > > open() on /dev/hvcX. In such a scenario, if the
> > > hp->ops->notifier_add()
> > > callback in the function fails, where it sets the tty->driver_data to
> > > NULL, the parallel hvc_open() can see this NULL and cause a memory
> > > abort.
> > > Hence, serialize hvc_open and check if tty->private_data is NULL
> > > before
> > > proceeding ahead.
> > > 
> > > The issue can be easily reproduced by launching two tasks
> > > simultaneously
> > > that does nothing but open() and close() on /dev/hvcX.
> > > For example:
> > > $ ./simple_open_close /dev/hvc0 & ./simple_open_close /dev/hvc0 &
> > > 
> > > Signed-off-by: Raghavendra Rao Ananta <rananta@codeaurora.org>
> > > ---
> > >  drivers/tty/hvc/hvc_console.c | 16 ++++++++++++++--
> > >  1 file changed, 14 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/drivers/tty/hvc/hvc_console.c
> > > b/drivers/tty/hvc/hvc_console.c
> > > index 436cc51c92c3..ebe26fe5ac09 100644
> > > --- a/drivers/tty/hvc/hvc_console.c
> > > +++ b/drivers/tty/hvc/hvc_console.c
> > > @@ -75,6 +75,8 @@ static LIST_HEAD(hvc_structs);
> > >   */
> > >  static DEFINE_MUTEX(hvc_structs_mutex);
> > > 
> > > +/* Mutex to serialize hvc_open */
> > > +static DEFINE_MUTEX(hvc_open_mutex);
> > >  /*
> > >   * This value is used to assign a tty->index value to a hvc_struct
> > > based
> > >   * upon order of exposure via hvc_probe(), when we can not match it
> > > to
> > > @@ -346,16 +348,24 @@ static int hvc_install(struct tty_driver
> > > *driver, struct tty_struct *tty)
> > >   */
> > >  static int hvc_open(struct tty_struct *tty, struct file * filp)
> > >  {
> > > -	struct hvc_struct *hp = tty->driver_data;
> > > +	struct hvc_struct *hp;
> > >  	unsigned long flags;
> > >  	int rc = 0;
> > > 
> > > +	mutex_lock(&hvc_open_mutex);
> > > +
> > > +	hp = tty->driver_data;
> > > +	if (!hp) {
> > > +		rc = -EIO;
> > > +		goto out;
> > > +	}
> > > +
> > >  	spin_lock_irqsave(&hp->port.lock, flags);
> > >  	/* Check and then increment for fast path open. */
> > >  	if (hp->port.count++ > 0) {
> > >  		spin_unlock_irqrestore(&hp->port.lock, flags);
> > >  		hvc_kick();
> > > -		return 0;
> > > +		goto out;
> > >  	} /* else count == 0 */
> > >  	spin_unlock_irqrestore(&hp->port.lock, flags);
> > 
> > Wait, why isn't this driver just calling tty_port_open() instead of
> > trying to open-code all of this?
> > 
> > Keeping a single mutext for open will not protect it from close, it will
> > just slow things down a bit.  There should already be a tty lock held by
> > the tty core for open() to keep it from racing things, right?
> The tty lock should have been held, but not likely across ->install() and
> ->open() callbacks, thus resulting in a race between hvc_install() and
> hvc_open(),

How?  The tty lock is held in install, and should not conflict with
open(), otherwise we would be seeing this happen in all tty drivers,
right?

> where hvc_install() sets a data and the hvc_open() clears it. hvc_open()
> doesn't
> check if the data was set to NULL and proceeds.

What data is being set that hvc_open is checking?

And you are not grabbing a lock in your install callback, you are only
serializing your open call here, I don't see how this is fixing anything
other than perhaps slowing down your codepaths.

As an arument why this isn't correct, can you answer why this same type
of change wouldn't be required for all tty drivers in the tree?

thanks,

greg k-h

^ permalink raw reply

* sort out the flush_icache_range mess
From: Christoph Hellwig @ 2020-05-10  7:54 UTC (permalink / raw)
  To: Andrew Morton, Arnd Bergmann, Roman Zippel
  Cc: linux-arch, linux-xtensa, Michal Simek, Jessica Yu, linux-ia64,
	linux-c6x-dev, linux-sh, linux-hexagon, x86, linux-um,
	linux-kernel, linux-mips, linux-mm, linux-m68k, openrisc,
	linux-alpha, sparclinux, linux-fsdevel, linux-riscv, linuxppc-dev,
	linux-arm-kernel

Hi all,

flush_icache_range is mostly used for kernel address, except for the following
cases:

 - the nommu brk and mmap implementations,
 - the read_code helper that is only used for binfmt_flat, binfmt_elf_fdpic,
   and binfmt_aout including the broken ia32 compat version
 - binfmt_flat itself,

none of which really are used by a typical MMU enabled kernel, as a.out can
only be build for alpha and m68k to start with.

But strangely enough commit ae92ef8a4424 ("PATCH] flush icache in correct
context") added a "set_fs(KERNEL_DS)" around the flush_icache_range call
in the module loader, because apparently m68k assumed user pointers.

This series first cleans up the cacheflush implementations, largely by
switching as much as possible to the asm-generic version after a few
preparations, then moves the misnamed current flush_icache_user_range to
a new name, to finally introduce a real flush_icache_user_range to be used
for the above use cases to flush the instruction cache for a userspace
address range.  The last patch then drops the set_fs in the module code
and moves it into the m68k implementation.

A git tree is available here:

    git://git.infradead.org/users/hch/misc.git flush_icache_range

Gitweb:

    http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/flush_icache_range

^ permalink raw reply

* [PATCH 01/31] arm: fix the flush_icache_range arguments in set_fiq_handler
From: Christoph Hellwig @ 2020-05-10  7:54 UTC (permalink / raw)
  To: Andrew Morton, Arnd Bergmann, Roman Zippel
  Cc: linux-arch, linux-xtensa, Michal Simek, Jessica Yu, linux-ia64,
	linux-c6x-dev, linux-sh, linux-hexagon, x86, linux-um,
	linux-kernel, linux-mips, linux-mm, linux-m68k, openrisc,
	linux-alpha, sparclinux, linux-fsdevel, linux-riscv, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20200510075510.987823-1-hch@lst.de>

The arguments passed look bogus, try to fix them to something that seems
to make sense.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/arm/kernel/fiq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c
index cd1234c103fcd..98ca3e3fa8471 100644
--- a/arch/arm/kernel/fiq.c
+++ b/arch/arm/kernel/fiq.c
@@ -98,8 +98,8 @@ void set_fiq_handler(void *start, unsigned int length)
 
 	memcpy(base + offset, start, length);
 	if (!cache_is_vipt_nonaliasing())
-		flush_icache_range((unsigned long)base + offset, offset +
-				   length);
+		flush_icache_range((unsigned long)base + offset,
+				   (unsigned long)base + offset + length);
 	flush_icache_range(0xffff0000 + offset, 0xffff0000 + offset + length);
 }
 
-- 
2.26.2


^ permalink raw reply related

* [PATCH 02/31] arm64: fix the flush_icache_range arguments in machine_kexec
From: Christoph Hellwig @ 2020-05-10  7:54 UTC (permalink / raw)
  To: Andrew Morton, Arnd Bergmann, Roman Zippel
  Cc: linux-arch, linux-xtensa, Michal Simek, Jessica Yu, linux-ia64,
	linux-c6x-dev, linux-sh, linux-hexagon, x86, linux-um,
	linux-kernel, linux-mips, linux-mm, linux-m68k, openrisc,
	linux-alpha, sparclinux, linux-fsdevel, linux-riscv, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20200510075510.987823-1-hch@lst.de>

The second argument is the end "pointer", not the length.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/arm64/kernel/machine_kexec.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index 8e9c924423b4e..a0b144cfaea71 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -177,6 +177,7 @@ void machine_kexec(struct kimage *kimage)
 	 * the offline CPUs. Therefore, we must use the __* variant here.
 	 */
 	__flush_icache_range((uintptr_t)reboot_code_buffer,
+			     (uintptr_t)reboot_code_buffer +
 			     arm64_relocate_new_kernel_size);
 
 	/* Flush the kimage list and its buffers. */
-- 
2.26.2


^ permalink raw reply related

* [PATCH 03/31] MIPS: unexport __flush_icache_user_range
From: Christoph Hellwig @ 2020-05-10  7:54 UTC (permalink / raw)
  To: Andrew Morton, Arnd Bergmann, Roman Zippel
  Cc: linux-arch, linux-xtensa, Michal Simek, Jessica Yu, linux-ia64,
	linux-c6x-dev, linux-sh, linux-hexagon, x86, linux-um,
	linux-kernel, linux-mips, linux-mm, linux-m68k, openrisc,
	linux-alpha, sparclinux, linux-fsdevel, linux-riscv, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20200510075510.987823-1-hch@lst.de>

__flush_icache_user_range is not used in modular code, so unexport it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/mips/mm/cache.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index 33b409391ddb6..ad6df1cea866f 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -36,7 +36,6 @@ EXPORT_SYMBOL_GPL(flush_icache_range);
 void (*local_flush_icache_range)(unsigned long start, unsigned long end);
 EXPORT_SYMBOL_GPL(local_flush_icache_range);
 void (*__flush_icache_user_range)(unsigned long start, unsigned long end);
-EXPORT_SYMBOL_GPL(__flush_icache_user_range);
 void (*__local_flush_icache_user_range)(unsigned long start, unsigned long end);
 EXPORT_SYMBOL_GPL(__local_flush_icache_user_range);
 
-- 
2.26.2


^ permalink raw reply related

* [PATCH 04/31] nds32: unexport flush_icache_page
From: Christoph Hellwig @ 2020-05-10  7:54 UTC (permalink / raw)
  To: Andrew Morton, Arnd Bergmann, Roman Zippel
  Cc: linux-arch, linux-xtensa, Michal Simek, Jessica Yu, linux-ia64,
	linux-c6x-dev, linux-sh, linux-hexagon, x86, linux-um,
	linux-kernel, linux-mips, linux-mm, linux-m68k, openrisc,
	linux-alpha, sparclinux, linux-fsdevel, linux-riscv, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20200510075510.987823-1-hch@lst.de>

flush_icache_page is only used by mm/memory.c.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/nds32/mm/cacheflush.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/nds32/mm/cacheflush.c b/arch/nds32/mm/cacheflush.c
index 254703653b6f5..8f168b33065fa 100644
--- a/arch/nds32/mm/cacheflush.c
+++ b/arch/nds32/mm/cacheflush.c
@@ -35,7 +35,6 @@ void flush_icache_page(struct vm_area_struct *vma, struct page *page)
 	kunmap_atomic((void *)kaddr);
 	local_irq_restore(flags);
 }
-EXPORT_SYMBOL(flush_icache_page);
 
 void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
 	                     unsigned long addr, int len)
-- 
2.26.2


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox