* [PATCH 7/7] powerpc/64s: rename pnv|pseries_setup_rfi_flush to _setup_security_mitigations
From: Daniel Axtens @ 2020-11-19 23:13 UTC (permalink / raw)
To: linuxppc-dev; +Cc: dja, cmr, spoorts2, npiggin
In-Reply-To: <20201119231333.361771-1-dja@axtens.net>
pseries|pnv_setup_rfi_flush already does the count cache flush setup, and
we just added entry and uaccess flushes. So the name is not very accurate
any more. In both platforms we then also immediately setup the STF flush.
Rename them to _setup_security_mitigations and fold the STF flush in.
Signed-off-by: Daniel Axtens <dja@axtens.net>
---
arch/powerpc/platforms/powernv/setup.c | 7 ++++---
arch/powerpc/platforms/pseries/mobility.c | 4 ++--
arch/powerpc/platforms/pseries/pseries.h | 2 +-
arch/powerpc/platforms/pseries/setup.c | 7 ++++---
4 files changed, 11 insertions(+), 9 deletions(-)
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 087ec92acfc4..46115231a3b2 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -98,7 +98,7 @@ static void init_fw_feat_flags(struct device_node *np)
security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
}
-static void pnv_setup_rfi_flush(void)
+static void pnv_setup_security_mitigations(void)
{
struct device_node *np, *fw_features;
enum l1d_flush_type type;
@@ -145,6 +145,8 @@ static void pnv_setup_rfi_flush(void)
enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS);
setup_uaccess_flush(enable);
+
+ setup_stf_barrier();
}
static void __init pnv_check_guarded_cores(void)
@@ -173,8 +175,7 @@ static void __init pnv_setup_arch(void)
{
set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
- pnv_setup_rfi_flush();
- setup_stf_barrier();
+ pnv_setup_security_mitigations();
/* Initialize SMP */
pnv_smp_init();
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index d6f4162478a5..2f73cb5bf12d 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -349,8 +349,8 @@ void post_mobility_fixup(void)
cpus_read_unlock();
- /* Possibly switch to a new RFI flush type */
- pseries_setup_rfi_flush();
+ /* Possibly switch to a new L1 flush type */
+ pseries_setup_security_mitigations();
/* Reinitialise system information for hv-24x7 */
read_24x7_sys_info();
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 13fa370a87e4..593840847cd3 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -111,7 +111,7 @@ static inline unsigned long cmo_get_page_size(void)
int dlpar_workqueue_init(void);
-void pseries_setup_rfi_flush(void);
+void pseries_setup_security_mitigations(void);
void pseries_lpar_read_hblkrm_characteristics(void);
#endif /* _PSERIES_PSERIES_H */
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 3617cdb079f6..090c13f6c881 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -542,7 +542,7 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
}
-void pseries_setup_rfi_flush(void)
+void pseries_setup_security_mitigations(void)
{
struct h_cpu_char_result result;
enum l1d_flush_type types;
@@ -587,6 +587,8 @@ void pseries_setup_rfi_flush(void)
enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS);
setup_uaccess_flush(enable);
+
+ setup_stf_barrier();
}
#ifdef CONFIG_PCI_IOV
@@ -776,8 +778,7 @@ static void __init pSeries_setup_arch(void)
fwnmi_init();
- pseries_setup_rfi_flush();
- setup_stf_barrier();
+ pseries_setup_security_mitigations();
pseries_lpar_read_hblkrm_characteristics();
/* By default, only probe PCI (can be overridden by rtas_pci) */
--
2.25.1
^ permalink raw reply related
* [PATCH 6/7] selftests/powerpc: refactor entry and rfi_flush tests
From: Daniel Axtens @ 2020-11-19 23:13 UTC (permalink / raw)
To: linuxppc-dev; +Cc: dja, cmr, spoorts2, npiggin
In-Reply-To: <20201119231333.361771-1-dja@axtens.net>
For simplicity in backporting, the original entry_flush test contained
a lot of duplicated code from the rfi_flush test. De-duplicate that code.
Signed-off-by: Daniel Axtens <dja@axtens.net>
---
.../testing/selftests/powerpc/include/utils.h | 5 ++
.../selftests/powerpc/security/Makefile | 2 +
.../selftests/powerpc/security/entry_flush.c | 61 +---------------
.../selftests/powerpc/security/flush_utils.c | 70 +++++++++++++++++++
.../selftests/powerpc/security/flush_utils.h | 17 +++++
.../selftests/powerpc/security/rfi_flush.c | 61 +---------------
6 files changed, 96 insertions(+), 120 deletions(-)
create mode 100644 tools/testing/selftests/powerpc/security/flush_utils.c
create mode 100644 tools/testing/selftests/powerpc/security/flush_utils.h
diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h
index 052b5a775dc2..b7d188fc87c7 100644
--- a/tools/testing/selftests/powerpc/include/utils.h
+++ b/tools/testing/selftests/powerpc/include/utils.h
@@ -42,6 +42,11 @@ int perf_event_enable(int fd);
int perf_event_disable(int fd);
int perf_event_reset(int fd);
+struct perf_event_read {
+ __u64 nr;
+ __u64 l1d_misses;
+};
+
#if !defined(__GLIBC_PREREQ) || !__GLIBC_PREREQ(2, 30)
#include <unistd.h>
#include <sys/syscall.h>
diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile
index 921152caf1dc..f25e854fe370 100644
--- a/tools/testing/selftests/powerpc/security/Makefile
+++ b/tools/testing/selftests/powerpc/security/Makefile
@@ -11,3 +11,5 @@ $(TEST_GEN_PROGS): ../harness.c ../utils.c
$(OUTPUT)/spectre_v2: CFLAGS += -m64
$(OUTPUT)/spectre_v2: ../pmu/event.c branch_loops.S
+$(OUTPUT)/rfi_flush: flush_utils.c
+$(OUTPUT)/entry_flush: flush_utils.c
diff --git a/tools/testing/selftests/powerpc/security/entry_flush.c b/tools/testing/selftests/powerpc/security/entry_flush.c
index 7ae7e37204c5..78cf914fa321 100644
--- a/tools/testing/selftests/powerpc/security/entry_flush.c
+++ b/tools/testing/selftests/powerpc/security/entry_flush.c
@@ -15,66 +15,7 @@
#include <string.h>
#include <stdio.h>
#include "utils.h"
-
-#define CACHELINE_SIZE 128
-
-struct perf_event_read {
- __u64 nr;
- __u64 l1d_misses;
-};
-
-static inline __u64 load(void *addr)
-{
- __u64 tmp;
-
- asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr));
-
- return tmp;
-}
-
-static void syscall_loop(char *p, unsigned long iterations,
- unsigned long zero_size)
-{
- for (unsigned long i = 0; i < iterations; i++) {
- for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE)
- load(p + j);
- getppid();
- }
-}
-
-static void sigill_handler(int signr, siginfo_t *info, void *unused)
-{
- static int warned;
- ucontext_t *ctx = (ucontext_t *)unused;
- unsigned long *pc = &UCONTEXT_NIA(ctx);
-
- /* mtspr 3,RS to check for move to DSCR below */
- if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) {
- if (!warned++)
- printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
- *pc += 4;
- } else {
- printf("SIGILL at %p\n", pc);
- abort();
- }
-}
-
-static void set_dscr(unsigned long val)
-{
- static int init;
- struct sigaction sa;
-
- if (!init) {
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = sigill_handler;
- sa.sa_flags = SA_SIGINFO;
- if (sigaction(SIGILL, &sa, NULL))
- perror("sigill_handler");
- init = 1;
- }
-
- asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
-}
+#include "flush_utils.h"
int entry_flush_test(void)
{
diff --git a/tools/testing/selftests/powerpc/security/flush_utils.c b/tools/testing/selftests/powerpc/security/flush_utils.c
new file mode 100644
index 000000000000..0c3c4c40c7fb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/security/flush_utils.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2018 IBM Corporation.
+ */
+
+#define __SANE_USERSPACE_TYPES__
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "utils.h"
+#include "flush_utils.h"
+
+static inline __u64 load(void *addr)
+{
+ __u64 tmp;
+
+ asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr));
+
+ return tmp;
+}
+
+void syscall_loop(char *p, unsigned long iterations,
+ unsigned long zero_size)
+{
+ for (unsigned long i = 0; i < iterations; i++) {
+ for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE)
+ load(p + j);
+ getppid();
+ }
+}
+
+static void sigill_handler(int signr, siginfo_t *info, void *unused)
+{
+ static int warned;
+ ucontext_t *ctx = (ucontext_t *)unused;
+ unsigned long *pc = &UCONTEXT_NIA(ctx);
+
+ /* mtspr 3,RS to check for move to DSCR below */
+ if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) {
+ if (!warned++)
+ printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
+ *pc += 4;
+ } else {
+ printf("SIGILL at %p\n", pc);
+ abort();
+ }
+}
+
+void set_dscr(unsigned long val)
+{
+ static int init;
+ struct sigaction sa;
+
+ if (!init) {
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = sigill_handler;
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGILL, &sa, NULL))
+ perror("sigill_handler");
+ init = 1;
+ }
+
+ asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+}
diff --git a/tools/testing/selftests/powerpc/security/flush_utils.h b/tools/testing/selftests/powerpc/security/flush_utils.h
new file mode 100644
index 000000000000..07a5eb301466
--- /dev/null
+++ b/tools/testing/selftests/powerpc/security/flush_utils.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+/*
+ * Copyright 2018 IBM Corporation.
+ */
+
+#ifndef _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H
+#define _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H
+
+#define CACHELINE_SIZE 128
+
+void syscall_loop(char *p, unsigned long iterations,
+ unsigned long zero_size);
+
+void set_dscr(unsigned long val);
+
+#endif /* _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H */
diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c
index 4e7b2776c367..7565fd786640 100644
--- a/tools/testing/selftests/powerpc/security/rfi_flush.c
+++ b/tools/testing/selftests/powerpc/security/rfi_flush.c
@@ -10,71 +10,12 @@
#include <stdint.h>
#include <malloc.h>
#include <unistd.h>
-#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "utils.h"
+#include "flush_utils.h"
-#define CACHELINE_SIZE 128
-
-struct perf_event_read {
- __u64 nr;
- __u64 l1d_misses;
-};
-
-static inline __u64 load(void *addr)
-{
- __u64 tmp;
-
- asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr));
-
- return tmp;
-}
-
-static void syscall_loop(char *p, unsigned long iterations,
- unsigned long zero_size)
-{
- for (unsigned long i = 0; i < iterations; i++) {
- for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE)
- load(p + j);
- getppid();
- }
-}
-
-static void sigill_handler(int signr, siginfo_t *info, void *unused)
-{
- static int warned = 0;
- ucontext_t *ctx = (ucontext_t *)unused;
- unsigned long *pc = &UCONTEXT_NIA(ctx);
-
- /* mtspr 3,RS to check for move to DSCR below */
- if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) {
- if (!warned++)
- printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
- *pc += 4;
- } else {
- printf("SIGILL at %p\n", pc);
- abort();
- }
-}
-
-static void set_dscr(unsigned long val)
-{
- static int init = 0;
- struct sigaction sa;
-
- if (!init) {
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = sigill_handler;
- sa.sa_flags = SA_SIGINFO;
- if (sigaction(SIGILL, &sa, NULL))
- perror("sigill_handler");
- init = 1;
- }
-
- asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
-}
int rfi_flush_test(void)
{
--
2.25.1
^ permalink raw reply related
* [PATCH 5/7] selftests/powerpc: entry flush test
From: Daniel Axtens @ 2020-11-19 23:13 UTC (permalink / raw)
To: linuxppc-dev; +Cc: dja, cmr, spoorts2, npiggin
In-Reply-To: <20201119231333.361771-1-dja@axtens.net>
Add a test modelled on the RFI flush test which counts the number
of L1D misses doing a simple syscall with the entry flush on and off.
For simplicity of backporting, this test duplicates a lot of code from
rfi_flush. We clean that up in the next patch.
Signed-off-by: Daniel Axtens <dja@axtens.net>
---
.../selftests/powerpc/security/.gitignore | 1 +
.../selftests/powerpc/security/Makefile | 2 +-
.../selftests/powerpc/security/entry_flush.c | 198 ++++++++++++++++++
3 files changed, 200 insertions(+), 1 deletion(-)
create mode 100644 tools/testing/selftests/powerpc/security/entry_flush.c
diff --git a/tools/testing/selftests/powerpc/security/.gitignore b/tools/testing/selftests/powerpc/security/.gitignore
index f795e06f5ae3..4257a1f156bb 100644
--- a/tools/testing/selftests/powerpc/security/.gitignore
+++ b/tools/testing/selftests/powerpc/security/.gitignore
@@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
rfi_flush
+entry_flush
diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile
index eadbbff50be6..921152caf1dc 100644
--- a/tools/testing/selftests/powerpc/security/Makefile
+++ b/tools/testing/selftests/powerpc/security/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0+
-TEST_GEN_PROGS := rfi_flush spectre_v2
+TEST_GEN_PROGS := rfi_flush entry_flush spectre_v2
top_srcdir = ../../../../..
CFLAGS += -I../../../../../usr/include
diff --git a/tools/testing/selftests/powerpc/security/entry_flush.c b/tools/testing/selftests/powerpc/security/entry_flush.c
new file mode 100644
index 000000000000..7ae7e37204c5
--- /dev/null
+++ b/tools/testing/selftests/powerpc/security/entry_flush.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2018 IBM Corporation.
+ */
+
+#define __SANE_USERSPACE_TYPES__
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <malloc.h>
+#include <unistd.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "utils.h"
+
+#define CACHELINE_SIZE 128
+
+struct perf_event_read {
+ __u64 nr;
+ __u64 l1d_misses;
+};
+
+static inline __u64 load(void *addr)
+{
+ __u64 tmp;
+
+ asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr));
+
+ return tmp;
+}
+
+static void syscall_loop(char *p, unsigned long iterations,
+ unsigned long zero_size)
+{
+ for (unsigned long i = 0; i < iterations; i++) {
+ for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE)
+ load(p + j);
+ getppid();
+ }
+}
+
+static void sigill_handler(int signr, siginfo_t *info, void *unused)
+{
+ static int warned;
+ ucontext_t *ctx = (ucontext_t *)unused;
+ unsigned long *pc = &UCONTEXT_NIA(ctx);
+
+ /* mtspr 3,RS to check for move to DSCR below */
+ if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) {
+ if (!warned++)
+ printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
+ *pc += 4;
+ } else {
+ printf("SIGILL at %p\n", pc);
+ abort();
+ }
+}
+
+static void set_dscr(unsigned long val)
+{
+ static int init;
+ struct sigaction sa;
+
+ if (!init) {
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = sigill_handler;
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGILL, &sa, NULL))
+ perror("sigill_handler");
+ init = 1;
+ }
+
+ asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+}
+
+int entry_flush_test(void)
+{
+ char *p;
+ int repetitions = 10;
+ int fd, passes = 0, iter, rc = 0;
+ struct perf_event_read v;
+ __u64 l1d_misses_total = 0;
+ unsigned long iterations = 100000, zero_size = 24 * 1024;
+ unsigned long l1d_misses_expected;
+ int rfi_flush_orig;
+ int entry_flush, entry_flush_orig;
+
+ SKIP_IF(geteuid() != 0);
+
+ // The PMU event we use only works on Power7 or later
+ SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+
+ if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_orig) < 0) {
+ perror("Unable to read powerpc/rfi_flush debugfs file");
+ SKIP_IF(1);
+ }
+
+ if (read_debugfs_file("powerpc/entry_flush", &entry_flush_orig) < 0) {
+ perror("Unable to read powerpc/entry_flush debugfs file");
+ SKIP_IF(1);
+ }
+
+ if (rfi_flush_orig != 0) {
+ if (write_debugfs_file("powerpc/rfi_flush", 0) < 0) {
+ perror("error writing to powerpc/rfi_flush debugfs file");
+ FAIL_IF(1);
+ }
+ }
+
+ entry_flush = entry_flush_orig;
+
+ fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1);
+ FAIL_IF(fd < 0);
+
+ p = (char *)memalign(zero_size, CACHELINE_SIZE);
+
+ FAIL_IF(perf_event_enable(fd));
+
+ // disable L1 prefetching
+ set_dscr(1);
+
+ iter = repetitions;
+
+ /*
+ * We expect to see l1d miss for each cacheline access when entry_flush
+ * is set. Allow a small variation on this.
+ */
+ l1d_misses_expected = iterations * (zero_size / CACHELINE_SIZE - 2);
+
+again:
+ FAIL_IF(perf_event_reset(fd));
+
+ syscall_loop(p, iterations, zero_size);
+
+ FAIL_IF(read(fd, &v, sizeof(v)) != sizeof(v));
+
+ if (entry_flush && v.l1d_misses >= l1d_misses_expected)
+ passes++;
+ else if (!entry_flush && v.l1d_misses < (l1d_misses_expected / 2))
+ passes++;
+
+ l1d_misses_total += v.l1d_misses;
+
+ while (--iter)
+ goto again;
+
+ if (passes < repetitions) {
+ printf("FAIL (L1D misses with entry_flush=%d: %llu %c %lu) [%d/%d failures]\n",
+ entry_flush, l1d_misses_total, entry_flush ? '<' : '>',
+ entry_flush ? repetitions * l1d_misses_expected :
+ repetitions * l1d_misses_expected / 2,
+ repetitions - passes, repetitions);
+ rc = 1;
+ } else {
+ printf("PASS (L1D misses with entry_flush=%d: %llu %c %lu) [%d/%d pass]\n",
+ entry_flush, l1d_misses_total, entry_flush ? '>' : '<',
+ entry_flush ? repetitions * l1d_misses_expected :
+ repetitions * l1d_misses_expected / 2,
+ passes, repetitions);
+ }
+
+ if (entry_flush == entry_flush_orig) {
+ entry_flush = !entry_flush_orig;
+ if (write_debugfs_file("powerpc/entry_flush", entry_flush) < 0) {
+ perror("error writing to powerpc/entry_flush debugfs file");
+ return 1;
+ }
+ iter = repetitions;
+ l1d_misses_total = 0;
+ passes = 0;
+ goto again;
+ }
+
+ perf_event_disable(fd);
+ close(fd);
+
+ set_dscr(0);
+
+ if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_orig) < 0) {
+ perror("unable to restore original value of powerpc/rfi_flush debugfs file");
+ return 1;
+ }
+
+ if (write_debugfs_file("powerpc/entry_flush", entry_flush_orig) < 0) {
+ perror("unable to restore original value of powerpc/entry_flush debugfs file");
+ return 1;
+ }
+
+ return rc;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(entry_flush_test, "entry_flush_test");
+}
--
2.25.1
^ permalink raw reply related
* [PATCH 4/7] powerpc: Only include kup-radix.h for 64-bit Book3S
From: Daniel Axtens @ 2020-11-19 23:13 UTC (permalink / raw)
To: linuxppc-dev; +Cc: dja, cmr, spoorts2, npiggin
In-Reply-To: <20201119231333.361771-1-dja@axtens.net>
From: Michael Ellerman <mpe@ellerman.id.au>
In kup.h we currently include kup-radix.h for all 64-bit builds, which
includes Book3S and Book3E. The latter doesn't make sense, Book3E
never uses the Radix MMU.
This has worked up until now, but almost by accident, and the recent
uaccess flush changes introduced a build breakage on Book3E because of
the bad structure of the code.
So disentangle things so that we only use kup-radix.h for Book3S. This
requires some more stubs in kup.h and fixing an include in
syscall_64.c.
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
arch/powerpc/include/asm/book3s/64/kup-radix.h | 4 ++--
arch/powerpc/include/asm/kup.h | 11 ++++++++---
arch/powerpc/kernel/syscall_64.c | 2 +-
3 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
index 97c2394e7dea..28716e2f13e3 100644
--- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -27,6 +27,7 @@
#endif
.endm
+#ifdef CONFIG_PPC_KUAP
.macro kuap_check_amr gpr1, gpr2
#ifdef CONFIG_PPC_KUAP_DEBUG
BEGIN_MMU_FTR_SECTION_NESTED(67)
@@ -38,6 +39,7 @@
END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67)
#endif
.endm
+#endif
.macro kuap_save_amr_and_lock gpr1, gpr2, use_cr, msr_pr_cr
#ifdef CONFIG_PPC_KUAP
@@ -148,8 +150,6 @@ static inline unsigned long kuap_get_and_check_amr(void)
return 0UL;
}
-static inline void kuap_check_amr(void) { }
-
static inline unsigned long get_kuap(void)
{
return AMR_KUAP_BLOCKED;
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index 0f5c606ae057..0d93331d0fab 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -14,7 +14,7 @@
#define KUAP_CURRENT_WRITE 8
#define KUAP_CURRENT (KUAP_CURRENT_READ | KUAP_CURRENT_WRITE)
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3S_64
#include <asm/book3s/64/kup-radix.h>
#endif
#ifdef CONFIG_PPC_8xx
@@ -35,6 +35,9 @@
.macro kuap_check current, gpr
.endm
+.macro kuap_check_amr gpr1, gpr2
+.endm
+
#endif
#else /* !__ASSEMBLY__ */
@@ -60,19 +63,21 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
return false;
}
+static inline void kuap_check_amr(void) { }
+
/*
* book3s/64/kup-radix.h defines these functions for the !KUAP case to flush
* the L1D cache after user accesses. Only include the empty stubs for other
* platforms.
*/
-#ifndef CONFIG_PPC64
+#ifndef CONFIG_PPC_BOOK3S_64
static inline void allow_user_access(void __user *to, const void __user *from,
unsigned long size, unsigned long dir) { }
static inline void prevent_user_access(void __user *to, const void __user *from,
unsigned long size, unsigned long dir) { }
static inline unsigned long prevent_user_access_return(void) { return 0UL; }
static inline void restore_user_access(unsigned long flags) { }
-#endif /* CONFIG_PPC64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
#endif /* CONFIG_PPC_KUAP */
static inline void allow_read_from_user(const void __user *from, unsigned long size)
diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c
index 8e50818aa50b..310bcd768cd5 100644
--- a/arch/powerpc/kernel/syscall_64.c
+++ b/arch/powerpc/kernel/syscall_64.c
@@ -2,7 +2,7 @@
#include <linux/err.h>
#include <asm/asm-prototypes.h>
-#include <asm/book3s/64/kup-radix.h>
+#include <asm/kup.h>
#include <asm/cputime.h>
#include <asm/hw_irq.h>
#include <asm/kprobes.h>
--
2.25.1
^ permalink raw reply related
* [PATCH 3/7] powerpc/64s: flush L1D after user accesses
From: Daniel Axtens @ 2020-11-19 23:13 UTC (permalink / raw)
To: linuxppc-dev; +Cc: dja, cmr, spoorts2, npiggin
In-Reply-To: <20201119231333.361771-1-dja@axtens.net>
From: Nicholas Piggin <npiggin@gmail.com>
IBM Power9 processors can speculatively operate on data in the L1 cache
before it has been completely validated, via a way-prediction mechanism. It
is not possible for an attacker to determine the contents of impermissible
memory using this method, since these systems implement a combination of
hardware and software security measures to prevent scenarios where
protected data could be leaked.
However these measures don't address the scenario where an attacker induces
the operating system to speculatively execute instructions using data that
the attacker controls. This can be used for example to speculatively bypass
"kernel user access prevention" techniques, as discovered by Anthony
Steinhauser of Google's Safeside Project. This is not an attack by itself,
but there is a possibility it could be used in conjunction with
side-channels or other weaknesses in the privileged code to construct an
attack.
This issue can be mitigated by flushing the L1 cache between privilege
boundaries of concern. This patch flushes the L1 cache after user accesses.
This is part of the fix for CVE-2020-4788.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Daniel Axtens <dja@axtens.net>
---
.../admin-guide/kernel-parameters.txt | 4 +
.../powerpc/include/asm/book3s/64/kup-radix.h | 66 ++++++++------
arch/powerpc/include/asm/exception-64s.h | 3 +
arch/powerpc/include/asm/feature-fixups.h | 9 ++
arch/powerpc/include/asm/kup.h | 19 +++--
arch/powerpc/include/asm/security_features.h | 3 +
arch/powerpc/include/asm/setup.h | 1 +
arch/powerpc/kernel/exceptions-64s.S | 85 ++++++-------------
arch/powerpc/kernel/setup_64.c | 62 ++++++++++++++
arch/powerpc/kernel/vmlinux.lds.S | 7 ++
arch/powerpc/lib/feature-fixups.c | 50 +++++++++++
arch/powerpc/platforms/powernv/setup.c | 10 ++-
arch/powerpc/platforms/pseries/setup.c | 4 +
13 files changed, 233 insertions(+), 90 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 282789cfa1dc..d1b417dd1266 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2876,6 +2876,7 @@
tsx_async_abort=off [X86]
kvm.nx_huge_pages=off [X86]
no_entry_flush [PPC]
+ no_uaccess_flush [PPC]
Exceptions:
This does not have any effect on
@@ -3255,6 +3256,9 @@
nospec_store_bypass_disable
[HW] Disable all mitigations for the Speculative Store Bypass vulnerability
+ no_uaccess_flush
+ [PPC] Don't flush the L1-D cache after accessing user data.
+
noxsave [BUGS=X86] Disables x86 extended register state save
and restore using xsave. The kernel will fallback to
enabling legacy floating-point and sse state.
diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
index 3ee1ec60be84..97c2394e7dea 100644
--- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -61,6 +61,8 @@
#else /* !__ASSEMBLY__ */
+DECLARE_STATIC_KEY_FALSE(uaccess_flush_key);
+
#ifdef CONFIG_PPC_KUAP
#include <asm/mmu.h>
@@ -103,8 +105,16 @@ static inline void kuap_check_amr(void)
static inline unsigned long get_kuap(void)
{
+ /*
+ * We return AMR_KUAP_BLOCKED when we don't support KUAP because
+ * prevent_user_access_return needs to return AMR_KUAP_BLOCKED to
+ * cause restore_user_access to do a flush.
+ *
+ * This has no effect in terms of actually blocking things on hash,
+ * so it doesn't break anything.
+ */
if (!early_mmu_has_feature(MMU_FTR_RADIX_KUAP))
- return 0;
+ return AMR_KUAP_BLOCKED;
return mfspr(SPRN_AMR);
}
@@ -123,6 +133,31 @@ static inline void set_kuap(unsigned long value)
isync();
}
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+{
+ return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) &&
+ (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)),
+ "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read");
+}
+#else /* CONFIG_PPC_KUAP */
+static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr) { }
+
+static inline unsigned long kuap_get_and_check_amr(void)
+{
+ return 0UL;
+}
+
+static inline void kuap_check_amr(void) { }
+
+static inline unsigned long get_kuap(void)
+{
+ return AMR_KUAP_BLOCKED;
+}
+
+static inline void set_kuap(unsigned long value) { }
+#endif /* !CONFIG_PPC_KUAP */
+
static __always_inline void allow_user_access(void __user *to, const void __user *from,
unsigned long size, unsigned long dir)
{
@@ -142,6 +177,8 @@ static inline void prevent_user_access(void __user *to, const void __user *from,
unsigned long size, unsigned long dir)
{
set_kuap(AMR_KUAP_BLOCKED);
+ if (static_branch_unlikely(&uaccess_flush_key))
+ do_uaccess_flush();
}
static inline unsigned long prevent_user_access_return(void)
@@ -149,6 +186,8 @@ static inline unsigned long prevent_user_access_return(void)
unsigned long flags = get_kuap();
set_kuap(AMR_KUAP_BLOCKED);
+ if (static_branch_unlikely(&uaccess_flush_key))
+ do_uaccess_flush();
return flags;
}
@@ -156,30 +195,9 @@ static inline unsigned long prevent_user_access_return(void)
static inline void restore_user_access(unsigned long flags)
{
set_kuap(flags);
+ if (static_branch_unlikely(&uaccess_flush_key) && flags == AMR_KUAP_BLOCKED)
+ do_uaccess_flush();
}
-
-static inline bool
-bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
-{
- return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) &&
- (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)),
- "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read");
-}
-#else /* CONFIG_PPC_KUAP */
-static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr)
-{
-}
-
-static inline void kuap_check_amr(void)
-{
-}
-
-static inline unsigned long kuap_get_and_check_amr(void)
-{
- return 0;
-}
-#endif /* CONFIG_PPC_KUAP */
-
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H */
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 83fa88bc9935..1d32b174ab6a 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -144,6 +144,9 @@
RFSCV; \
b rfscv_flush_fallback
+#else /* __ASSEMBLY__ */
+/* Prototype for function defined in exceptions-64s.S */
+void do_uaccess_flush(void);
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_EXCEPTION_H */
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index 06a48219bbf2..fbd406cd6916 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -205,6 +205,14 @@ label##3: \
FTR_ENTRY_OFFSET 955b-956b; \
.popsection;
+#define UACCESS_FLUSH_FIXUP_SECTION \
+959: \
+ .pushsection __uaccess_flush_fixup,"a"; \
+ .align 2; \
+960: \
+ FTR_ENTRY_OFFSET 959b-960b; \
+ .popsection;
+
#define ENTRY_FLUSH_FIXUP_SECTION \
957: \
.pushsection __entry_flush_fixup,"a"; \
@@ -248,6 +256,7 @@ extern long stf_barrier_fallback;
extern long entry_flush_fallback;
extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup;
extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup;
+extern long __start___uaccess_flush_fixup, __stop___uaccess_flush_fixup;
extern long __start___entry_flush_fixup, __stop___entry_flush_fixup;
extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup;
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index 1d0f7d838b2e..0f5c606ae057 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -53,17 +53,26 @@ static inline void setup_kuep(bool disabled) { }
void setup_kuap(bool disabled);
#else
static inline void setup_kuap(bool disabled) { }
+
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+{
+ return false;
+}
+
+/*
+ * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush
+ * the L1D cache after user accesses. Only include the empty stubs for other
+ * platforms.
+ */
+#ifndef CONFIG_PPC64
static inline void allow_user_access(void __user *to, const void __user *from,
unsigned long size, unsigned long dir) { }
static inline void prevent_user_access(void __user *to, const void __user *from,
unsigned long size, unsigned long dir) { }
static inline unsigned long prevent_user_access_return(void) { return 0UL; }
static inline void restore_user_access(unsigned long flags) { }
-static inline bool
-bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
-{
- return false;
-}
+#endif /* CONFIG_PPC64 */
#endif /* CONFIG_PPC_KUAP */
static inline void allow_read_from_user(const void __user *from, unsigned long size)
diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h
index 9e7459d2edca..b774a4477d5f 100644
--- a/arch/powerpc/include/asm/security_features.h
+++ b/arch/powerpc/include/asm/security_features.h
@@ -89,6 +89,8 @@ static inline bool security_ftr_enabled(u64 feature)
// The L1-D cache should be flushed when entering the kernel
#define SEC_FTR_L1D_FLUSH_ENTRY 0x0000000000004000ull
+// The L1-D cache should be flushed after user accesses from the kernel
+#define SEC_FTR_L1D_FLUSH_UACCESS 0x0000000000008000ull
// Features enabled by default
#define SEC_FTR_DEFAULT \
@@ -96,6 +98,7 @@ static inline bool security_ftr_enabled(u64 feature)
SEC_FTR_L1D_FLUSH_PR | \
SEC_FTR_BNDS_CHK_SPEC_BAR | \
SEC_FTR_L1D_FLUSH_ENTRY | \
+ SEC_FTR_L1D_FLUSH_UACCESS | \
SEC_FTR_FAVOUR_SECURITY)
#endif /* _ASM_POWERPC_SECURITY_FEATURES_H */
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index e2fcd3e874f8..a466749703f1 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -60,6 +60,7 @@ void setup_barrier_nospec(void);
#else
static inline void setup_barrier_nospec(void) { };
#endif
+void do_uaccess_flush_fixups(enum l1d_flush_type types);
void do_entry_flush_fixups(enum l1d_flush_type types);
void do_barrier_nospec_fixups(bool enable);
extern bool barrier_nospec_enabled;
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 5577dd887d37..f63a3d3bca3d 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -2951,11 +2951,8 @@ TRAMP_REAL_BEGIN(stf_barrier_fallback)
.endr
blr
-TRAMP_REAL_BEGIN(entry_flush_fallback)
- std r9,PACA_EXRFI+EX_R9(r13)
- std r10,PACA_EXRFI+EX_R10(r13)
- std r11,PACA_EXRFI+EX_R11(r13)
- mfctr r9
+/* Clobbers r10, r11, ctr */
+.macro L1D_DISPLACEMENT_FLUSH
ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
ld r11,PACA_L1D_FLUSH_SIZE(r13)
srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
@@ -2981,7 +2978,14 @@ TRAMP_REAL_BEGIN(entry_flush_fallback)
ld r11,(0x80 + 8)*7(r10)
addi r10,r10,0x80*8
bdnz 1b
+.endm
+TRAMP_REAL_BEGIN(entry_flush_fallback)
+ std r9,PACA_EXRFI+EX_R9(r13)
+ std r10,PACA_EXRFI+EX_R10(r13)
+ std r11,PACA_EXRFI+EX_R11(r13)
+ mfctr r9
+ L1D_DISPLACEMENT_FLUSH
mtctr r9
ld r9,PACA_EXRFI+EX_R9(r13)
ld r10,PACA_EXRFI+EX_R10(r13)
@@ -2997,32 +3001,7 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback)
std r10,PACA_EXRFI+EX_R10(r13)
std r11,PACA_EXRFI+EX_R11(r13)
mfctr r9
- ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
- ld r11,PACA_L1D_FLUSH_SIZE(r13)
- srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
- mtctr r11
- DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
-
- /* order ld/st prior to dcbt stop all streams with flushing */
- sync
-
- /*
- * The load adresses are at staggered offsets within cachelines,
- * which suits some pipelines better (on others it should not
- * hurt).
- */
-1:
- ld r11,(0x80 + 8)*0(r10)
- ld r11,(0x80 + 8)*1(r10)
- ld r11,(0x80 + 8)*2(r10)
- ld r11,(0x80 + 8)*3(r10)
- ld r11,(0x80 + 8)*4(r10)
- ld r11,(0x80 + 8)*5(r10)
- ld r11,(0x80 + 8)*6(r10)
- ld r11,(0x80 + 8)*7(r10)
- addi r10,r10,0x80*8
- bdnz 1b
-
+ L1D_DISPLACEMENT_FLUSH
mtctr r9
ld r9,PACA_EXRFI+EX_R9(r13)
ld r10,PACA_EXRFI+EX_R10(r13)
@@ -3040,32 +3019,7 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback)
std r10,PACA_EXRFI+EX_R10(r13)
std r11,PACA_EXRFI+EX_R11(r13)
mfctr r9
- ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
- ld r11,PACA_L1D_FLUSH_SIZE(r13)
- srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
- mtctr r11
- DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
-
- /* order ld/st prior to dcbt stop all streams with flushing */
- sync
-
- /*
- * The load adresses are at staggered offsets within cachelines,
- * which suits some pipelines better (on others it should not
- * hurt).
- */
-1:
- ld r11,(0x80 + 8)*0(r10)
- ld r11,(0x80 + 8)*1(r10)
- ld r11,(0x80 + 8)*2(r10)
- ld r11,(0x80 + 8)*3(r10)
- ld r11,(0x80 + 8)*4(r10)
- ld r11,(0x80 + 8)*5(r10)
- ld r11,(0x80 + 8)*6(r10)
- ld r11,(0x80 + 8)*7(r10)
- addi r10,r10,0x80*8
- bdnz 1b
-
+ L1D_DISPLACEMENT_FLUSH
mtctr r9
ld r9,PACA_EXRFI+EX_R9(r13)
ld r10,PACA_EXRFI+EX_R10(r13)
@@ -3116,8 +3070,21 @@ TRAMP_REAL_BEGIN(rfscv_flush_fallback)
RFSCV
USE_TEXT_SECTION()
- MASKED_INTERRUPT
- MASKED_INTERRUPT hsrr=1
+
+_GLOBAL(do_uaccess_flush)
+ UACCESS_FLUSH_FIXUP_SECTION
+ nop
+ nop
+ nop
+ blr
+ L1D_DISPLACEMENT_FLUSH
+ blr
+_ASM_NOKPROBE_SYMBOL(do_uaccess_flush)
+EXPORT_SYMBOL(do_uaccess_flush)
+
+
+MASKED_INTERRUPT
+MASKED_INTERRUPT hsrr=1
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
kvmppc_skip_interrupt:
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 7d447b43ad13..74fd47f46fa5 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -946,8 +946,12 @@ static enum l1d_flush_type enabled_flush_types;
static void *l1d_flush_fallback_area;
static bool no_rfi_flush;
static bool no_entry_flush;
+static bool no_uaccess_flush;
bool rfi_flush;
bool entry_flush;
+bool uaccess_flush;
+DEFINE_STATIC_KEY_FALSE(uaccess_flush_key);
+EXPORT_SYMBOL(uaccess_flush_key);
static int __init handle_no_rfi_flush(char *p)
{
@@ -965,6 +969,14 @@ static int __init handle_no_entry_flush(char *p)
}
early_param("no_entry_flush", handle_no_entry_flush);
+static int __init handle_no_uaccess_flush(char *p)
+{
+ pr_info("uaccess-flush: disabled on command line.");
+ no_uaccess_flush = true;
+ return 0;
+}
+early_param("no_uaccess_flush", handle_no_uaccess_flush);
+
/*
* The RFI flush is not KPTI, but because users will see doco that says to use
* nopti we hijack that option here to also disable the RFI flush.
@@ -1008,6 +1020,20 @@ void entry_flush_enable(bool enable)
entry_flush = enable;
}
+void uaccess_flush_enable(bool enable)
+{
+ if (enable) {
+ do_uaccess_flush_fixups(enabled_flush_types);
+ static_branch_enable(&uaccess_flush_key);
+ on_each_cpu(do_nothing, NULL, 1);
+ } else {
+ static_branch_disable(&uaccess_flush_key);
+ do_uaccess_flush_fixups(L1D_FLUSH_NONE);
+ }
+
+ uaccess_flush = enable;
+}
+
static void __ref init_fallback_flush(void)
{
u64 l1d_size, limit;
@@ -1079,6 +1105,15 @@ void setup_entry_flush(bool enable)
entry_flush_enable(enable);
}
+void setup_uaccess_flush(bool enable)
+{
+ if (cpu_mitigations_off())
+ return;
+
+ if (!no_uaccess_flush)
+ uaccess_flush_enable(enable);
+}
+
#ifdef CONFIG_DEBUG_FS
static int rfi_flush_set(void *data, u64 val)
{
@@ -1132,10 +1167,37 @@ static int entry_flush_get(void *data, u64 *val)
DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n");
+static int uaccess_flush_set(void *data, u64 val)
+{
+ bool enable;
+
+ if (val == 1)
+ enable = true;
+ else if (val == 0)
+ enable = false;
+ else
+ return -EINVAL;
+
+ /* Only do anything if we're changing state */
+ if (enable != uaccess_flush)
+ uaccess_flush_enable(enable);
+
+ return 0;
+}
+
+static int uaccess_flush_get(void *data, u64 *val)
+{
+ *val = uaccess_flush ? 1 : 0;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set, "%llu\n");
+
static __init int rfi_flush_debugfs_init(void)
{
debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush);
debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush);
+ debugfs_create_file("uaccess_flush", 0600, powerpc_debugfs_root, NULL, &fops_uaccess_flush);
return 0;
}
device_initcall(rfi_flush_debugfs_init);
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 715ec4d4614a..6db90cdf11da 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -131,6 +131,13 @@ SECTIONS
__stop___stf_entry_barrier_fixup = .;
}
+ . = ALIGN(8);
+ __uaccess_flush_fixup : AT(ADDR(__uaccess_flush_fixup) - LOAD_OFFSET) {
+ __start___uaccess_flush_fixup = .;
+ *(__uaccess_flush_fixup)
+ __stop___uaccess_flush_fixup = .;
+ }
+
. = ALIGN(8);
__entry_flush_fixup : AT(ADDR(__entry_flush_fixup) - LOAD_OFFSET) {
__start___entry_flush_fixup = .;
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 70e83cfd74aa..321c12a9ef6b 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -234,6 +234,56 @@ void do_stf_barrier_fixups(enum stf_barrier_type types)
do_stf_exit_barrier_fixups(types);
}
+void do_uaccess_flush_fixups(enum l1d_flush_type types)
+{
+ unsigned int instrs[4], *dest;
+ long *start, *end;
+ int i;
+
+ start = PTRRELOC(&__start___uaccess_flush_fixup);
+ end = PTRRELOC(&__stop___uaccess_flush_fixup);
+
+ instrs[0] = 0x60000000; /* nop */
+ instrs[1] = 0x60000000; /* nop */
+ instrs[2] = 0x60000000; /* nop */
+ instrs[3] = 0x4e800020; /* blr */
+
+ i = 0;
+ if (types == L1D_FLUSH_FALLBACK) {
+ instrs[3] = 0x60000000; /* nop */
+ /* fallthrough to fallback flush */
+ }
+
+ if (types & L1D_FLUSH_ORI) {
+ instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+ instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
+ }
+
+ if (types & L1D_FLUSH_MTTRIG)
+ instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
+
+ for (i = 0; start < end; start++, i++) {
+ dest = (void *)start + *start;
+
+ pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+ patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+
+ patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
+ patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+ patch_instruction((struct ppc_inst *)(dest + 3), ppc_inst(instrs[3]));
+ }
+
+ printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i,
+ (types == L1D_FLUSH_NONE) ? "no" :
+ (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" :
+ (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG)
+ ? "ori+mttrig type"
+ : "ori type" :
+ (types & L1D_FLUSH_MTTRIG) ? "mttrig type"
+ : "unknown");
+}
+
void do_entry_flush_fixups(enum l1d_flush_type types)
{
unsigned int instrs[3], *dest;
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index d04a085c423d..087ec92acfc4 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -124,10 +124,12 @@ static void pnv_setup_rfi_flush(void)
/*
* If we are non-Power9 bare metal, we don't need to flush on kernel
- * entry: it fixes a P9 specific vulnerability.
+ * entry or after user access: they fix a P9 specific vulnerability.
*/
- if (!pvr_version_is(PVR_POWER9))
+ if (!pvr_version_is(PVR_POWER9)) {
security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY);
+ security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS);
+ }
enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
(security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || \
@@ -139,6 +141,10 @@ static void pnv_setup_rfi_flush(void)
enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY);
setup_entry_flush(enable);
+
+ enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+ security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS);
+ setup_uaccess_flush(enable);
}
static void __init pnv_check_guarded_cores(void)
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 8136c5368ee4..3617cdb079f6 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -583,6 +583,10 @@ void pseries_setup_rfi_flush(void)
enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY);
setup_entry_flush(enable);
+
+ enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+ security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS);
+ setup_uaccess_flush(enable);
}
#ifdef CONFIG_PCI_IOV
--
2.25.1
^ permalink raw reply related
* [PATCH 2/7] powerpc/64s: flush L1D on kernel entry
From: Daniel Axtens @ 2020-11-19 23:13 UTC (permalink / raw)
To: linuxppc-dev; +Cc: dja, cmr, spoorts2, npiggin
In-Reply-To: <20201119231333.361771-1-dja@axtens.net>
From: Nicholas Piggin <npiggin@gmail.com>
IBM Power9 processors can speculatively operate on data in the L1 cache
before it has been completely validated, via a way-prediction mechanism. It
is not possible for an attacker to determine the contents of impermissible
memory using this method, since these systems implement a combination of
hardware and software security measures to prevent scenarios where
protected data could be leaked.
However these measures don't address the scenario where an attacker induces
the operating system to speculatively execute instructions using data that
the attacker controls. This can be used for example to speculatively bypass
"kernel user access prevention" techniques, as discovered by Anthony
Steinhauser of Google's Safeside Project. This is not an attack by itself,
but there is a possibility it could be used in conjunction with
side-channels or other weaknesses in the privileged code to construct an
attack.
This issue can be mitigated by flushing the L1 cache between privilege
boundaries of concern. This patch flushes the L1 cache on kernel entry.
This is part of the fix for CVE-2020-4788.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Daniel Axtens <dja@axtens.net>
---
.../admin-guide/kernel-parameters.txt | 3 +
arch/powerpc/include/asm/exception-64s.h | 9 ++-
arch/powerpc/include/asm/feature-fixups.h | 10 ++++
arch/powerpc/include/asm/security_features.h | 4 ++
arch/powerpc/include/asm/setup.h | 3 +
arch/powerpc/kernel/exceptions-64s.S | 37 ++++++++++++
arch/powerpc/kernel/setup_64.c | 60 ++++++++++++++++++-
arch/powerpc/kernel/vmlinux.lds.S | 7 +++
arch/powerpc/lib/feature-fixups.c | 54 +++++++++++++++++
arch/powerpc/platforms/powernv/setup.c | 11 ++++
arch/powerpc/platforms/pseries/setup.c | 4 ++
11 files changed, 200 insertions(+), 2 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index af4ec2a8f32a..282789cfa1dc 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2875,6 +2875,7 @@
mds=off [X86]
tsx_async_abort=off [X86]
kvm.nx_huge_pages=off [X86]
+ no_entry_flush [PPC]
Exceptions:
This does not have any effect on
@@ -3203,6 +3204,8 @@
noefi Disable EFI runtime services support.
+ no_entry_flush [PPC] Don't flush the L1-D cache when entering the kernel.
+
noexec [IA-64]
noexec [X86]
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index ebe95aa04d53..83fa88bc9935 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -57,11 +57,18 @@
nop; \
nop
+#define ENTRY_FLUSH_SLOT \
+ ENTRY_FLUSH_FIXUP_SECTION; \
+ nop; \
+ nop; \
+ nop;
+
/*
* r10 must be free to use, r13 must be paca
*/
#define INTERRUPT_TO_KERNEL \
- STF_ENTRY_BARRIER_SLOT
+ STF_ENTRY_BARRIER_SLOT; \
+ ENTRY_FLUSH_SLOT
/*
* Macros for annotating the expected destination of (h)rfid
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index b0af97add751..06a48219bbf2 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -205,6 +205,14 @@ label##3: \
FTR_ENTRY_OFFSET 955b-956b; \
.popsection;
+#define ENTRY_FLUSH_FIXUP_SECTION \
+957: \
+ .pushsection __entry_flush_fixup,"a"; \
+ .align 2; \
+958: \
+ FTR_ENTRY_OFFSET 957b-958b; \
+ .popsection;
+
#define RFI_FLUSH_FIXUP_SECTION \
951: \
.pushsection __rfi_flush_fixup,"a"; \
@@ -237,8 +245,10 @@ label##3: \
#include <linux/types.h>
extern long stf_barrier_fallback;
+extern long entry_flush_fallback;
extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup;
extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup;
+extern long __start___entry_flush_fixup, __stop___entry_flush_fixup;
extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup;
extern long __start__btb_flush_fixup, __stop__btb_flush_fixup;
diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h
index fbb8fa32150f..9e7459d2edca 100644
--- a/arch/powerpc/include/asm/security_features.h
+++ b/arch/powerpc/include/asm/security_features.h
@@ -86,12 +86,16 @@ static inline bool security_ftr_enabled(u64 feature)
// Software required to flush link stack on context switch
#define SEC_FTR_FLUSH_LINK_STACK 0x0000000000001000ull
+// The L1-D cache should be flushed when entering the kernel
+#define SEC_FTR_L1D_FLUSH_ENTRY 0x0000000000004000ull
+
// Features enabled by default
#define SEC_FTR_DEFAULT \
(SEC_FTR_L1D_FLUSH_HV | \
SEC_FTR_L1D_FLUSH_PR | \
SEC_FTR_BNDS_CHK_SPEC_BAR | \
+ SEC_FTR_L1D_FLUSH_ENTRY | \
SEC_FTR_FAVOUR_SECURITY)
#endif /* _ASM_POWERPC_SECURITY_FEATURES_H */
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index 9efbddee2bca..e2fcd3e874f8 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -52,12 +52,15 @@ enum l1d_flush_type {
};
void setup_rfi_flush(enum l1d_flush_type, bool enable);
+void setup_entry_flush(bool enable);
+void setup_uaccess_flush(bool enable);
void do_rfi_flush_fixups(enum l1d_flush_type types);
#ifdef CONFIG_PPC_BARRIER_NOSPEC
void setup_barrier_nospec(void);
#else
static inline void setup_barrier_nospec(void) { };
#endif
+void do_entry_flush_fixups(enum l1d_flush_type types);
void do_barrier_nospec_fixups(bool enable);
extern bool barrier_nospec_enabled;
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index f7d748b88705..5577dd887d37 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -2951,6 +2951,43 @@ TRAMP_REAL_BEGIN(stf_barrier_fallback)
.endr
blr
+TRAMP_REAL_BEGIN(entry_flush_fallback)
+ std r9,PACA_EXRFI+EX_R9(r13)
+ std r10,PACA_EXRFI+EX_R10(r13)
+ std r11,PACA_EXRFI+EX_R11(r13)
+ mfctr r9
+ ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
+ ld r11,PACA_L1D_FLUSH_SIZE(r13)
+ srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
+ mtctr r11
+ DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+
+ /* order ld/st prior to dcbt stop all streams with flushing */
+ sync
+
+ /*
+ * The load addresses are at staggered offsets within cachelines,
+ * which suits some pipelines better (on others it should not
+ * hurt).
+ */
+1:
+ ld r11,(0x80 + 8)*0(r10)
+ ld r11,(0x80 + 8)*1(r10)
+ ld r11,(0x80 + 8)*2(r10)
+ ld r11,(0x80 + 8)*3(r10)
+ ld r11,(0x80 + 8)*4(r10)
+ ld r11,(0x80 + 8)*5(r10)
+ ld r11,(0x80 + 8)*6(r10)
+ ld r11,(0x80 + 8)*7(r10)
+ addi r10,r10,0x80*8
+ bdnz 1b
+
+ mtctr r9
+ ld r9,PACA_EXRFI+EX_R9(r13)
+ ld r10,PACA_EXRFI+EX_R10(r13)
+ ld r11,PACA_EXRFI+EX_R11(r13)
+ blr
+
TRAMP_REAL_BEGIN(rfi_flush_fallback)
SET_SCRATCH0(r13);
GET_PACA(r13);
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index bb9cab3641d7..7d447b43ad13 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -945,7 +945,9 @@ early_initcall(disable_hardlockup_detector);
static enum l1d_flush_type enabled_flush_types;
static void *l1d_flush_fallback_area;
static bool no_rfi_flush;
+static bool no_entry_flush;
bool rfi_flush;
+bool entry_flush;
static int __init handle_no_rfi_flush(char *p)
{
@@ -955,6 +957,14 @@ static int __init handle_no_rfi_flush(char *p)
}
early_param("no_rfi_flush", handle_no_rfi_flush);
+static int __init handle_no_entry_flush(char *p)
+{
+ pr_info("entry-flush: disabled on command line.");
+ no_entry_flush = true;
+ return 0;
+}
+early_param("no_entry_flush", handle_no_entry_flush);
+
/*
* The RFI flush is not KPTI, but because users will see doco that says to use
* nopti we hijack that option here to also disable the RFI flush.
@@ -986,6 +996,18 @@ void rfi_flush_enable(bool enable)
rfi_flush = enable;
}
+void entry_flush_enable(bool enable)
+{
+ if (enable) {
+ do_entry_flush_fixups(enabled_flush_types);
+ on_each_cpu(do_nothing, NULL, 1);
+ } else {
+ do_entry_flush_fixups(L1D_FLUSH_NONE);
+ }
+
+ entry_flush = enable;
+}
+
static void __ref init_fallback_flush(void)
{
u64 l1d_size, limit;
@@ -1044,10 +1066,19 @@ void setup_rfi_flush(enum l1d_flush_type types, bool enable)
enabled_flush_types = types;
- if (!no_rfi_flush && !cpu_mitigations_off())
+ if (!cpu_mitigations_off() && !no_rfi_flush)
rfi_flush_enable(enable);
}
+void setup_entry_flush(bool enable)
+{
+ if (cpu_mitigations_off())
+ return;
+
+ if (!no_entry_flush)
+ entry_flush_enable(enable);
+}
+
#ifdef CONFIG_DEBUG_FS
static int rfi_flush_set(void *data, u64 val)
{
@@ -1075,9 +1106,36 @@ static int rfi_flush_get(void *data, u64 *val)
DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n");
+static int entry_flush_set(void *data, u64 val)
+{
+ bool enable;
+
+ if (val == 1)
+ enable = true;
+ else if (val == 0)
+ enable = false;
+ else
+ return -EINVAL;
+
+ /* Only do anything if we're changing state */
+ if (enable != entry_flush)
+ entry_flush_enable(enable);
+
+ return 0;
+}
+
+static int entry_flush_get(void *data, u64 *val)
+{
+ *val = entry_flush ? 1 : 0;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n");
+
static __init int rfi_flush_debugfs_init(void)
{
debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush);
+ debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush);
return 0;
}
device_initcall(rfi_flush_debugfs_init);
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index e0548b4950de..715ec4d4614a 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -131,6 +131,13 @@ SECTIONS
__stop___stf_entry_barrier_fixup = .;
}
+ . = ALIGN(8);
+ __entry_flush_fixup : AT(ADDR(__entry_flush_fixup) - LOAD_OFFSET) {
+ __start___entry_flush_fixup = .;
+ *(__entry_flush_fixup)
+ __stop___entry_flush_fixup = .;
+ }
+
. = ALIGN(8);
__stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) {
__start___stf_exit_barrier_fixup = .;
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 4c0a7ee9fa00..70e83cfd74aa 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -234,6 +234,60 @@ void do_stf_barrier_fixups(enum stf_barrier_type types)
do_stf_exit_barrier_fixups(types);
}
+void do_entry_flush_fixups(enum l1d_flush_type types)
+{
+ unsigned int instrs[3], *dest;
+ long *start, *end;
+ int i;
+
+ start = PTRRELOC(&__start___entry_flush_fixup);
+ end = PTRRELOC(&__stop___entry_flush_fixup);
+
+ instrs[0] = 0x60000000; /* nop */
+ instrs[1] = 0x60000000; /* nop */
+ instrs[2] = 0x60000000; /* nop */
+
+ i = 0;
+ if (types == L1D_FLUSH_FALLBACK) {
+ instrs[i++] = 0x7d4802a6; /* mflr r10 */
+ instrs[i++] = 0x60000000; /* branch patched below */
+ instrs[i++] = 0x7d4803a6; /* mtlr r10 */
+ }
+
+ if (types & L1D_FLUSH_ORI) {
+ instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+ instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
+ }
+
+ if (types & L1D_FLUSH_MTTRIG)
+ instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
+
+ for (i = 0; start < end; start++, i++) {
+ dest = (void *)start + *start;
+
+ pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+ patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+
+ if (types == L1D_FLUSH_FALLBACK)
+ patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&entry_flush_fallback,
+ BRANCH_SET_LINK);
+ else
+ patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
+
+ patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+ }
+
+ printk(KERN_DEBUG "entry-flush: patched %d locations (%s flush)\n", i,
+ (types == L1D_FLUSH_NONE) ? "no" :
+ (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" :
+ (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG)
+ ? "ori+mttrig type"
+ : "ori type" :
+ (types & L1D_FLUSH_MTTRIG) ? "mttrig type"
+ : "unknown");
+}
+
void do_rfi_flush_fixups(enum l1d_flush_type types)
{
unsigned int instrs[3], *dest;
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 9acaa0f131b9..d04a085c423d 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -122,12 +122,23 @@ static void pnv_setup_rfi_flush(void)
type = L1D_FLUSH_ORI;
}
+ /*
+ * If we are non-Power9 bare metal, we don't need to flush on kernel
+ * entry: it fixes a P9 specific vulnerability.
+ */
+ if (!pvr_version_is(PVR_POWER9))
+ security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY);
+
enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
(security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || \
security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV));
setup_rfi_flush(type, enable);
setup_count_cache_flush();
+
+ enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+ security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY);
+ setup_entry_flush(enable);
}
static void __init pnv_check_guarded_cores(void)
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 633c45ec406d..8136c5368ee4 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -579,6 +579,10 @@ void pseries_setup_rfi_flush(void)
setup_rfi_flush(types, enable);
setup_count_cache_flush();
+
+ enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+ security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY);
+ setup_entry_flush(enable);
}
#ifdef CONFIG_PCI_IOV
--
2.25.1
^ permalink raw reply related
* [PATCH 1/7] selftests/powerpc: rfi_flush: disable entry flush if present
From: Daniel Axtens @ 2020-11-19 23:13 UTC (permalink / raw)
To: linuxppc-dev; +Cc: dja, cmr, spoorts2, npiggin
In-Reply-To: <20201119231333.361771-1-dja@axtens.net>
From: Russell Currey <ruscur@russell.cc>
We are about to add an entry flush. The rfi (exit) flush test measures
the number of L1D flushes over a syscall with the RFI flush enabled and
disabled. But if the entry flush is also enabled, the effect of enabling
and disabling the RFI flush is masked.
If there is a debugfs entry for the entry flush, disable it during the RFI
flush and restore it later.
Reported-by: Spoorthy S <spoorts2@in.ibm.com>
Signed-off-by: Russell Currey <ruscur@russell.cc>
Signed-off-by: Daniel Axtens <dja@axtens.net>
---
.../selftests/powerpc/security/rfi_flush.c | 35 +++++++++++++++----
1 file changed, 29 insertions(+), 6 deletions(-)
diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c
index 93a65bd1f231..4e7b2776c367 100644
--- a/tools/testing/selftests/powerpc/security/rfi_flush.c
+++ b/tools/testing/selftests/powerpc/security/rfi_flush.c
@@ -85,19 +85,33 @@ int rfi_flush_test(void)
__u64 l1d_misses_total = 0;
unsigned long iterations = 100000, zero_size = 24 * 1024;
unsigned long l1d_misses_expected;
- int rfi_flush_org, rfi_flush;
+ int rfi_flush_orig, rfi_flush;
+ int have_entry_flush, entry_flush_orig;
SKIP_IF(geteuid() != 0);
// The PMU event we use only works on Power7 or later
SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
- if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_org)) {
+ if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_orig) < 0) {
perror("Unable to read powerpc/rfi_flush debugfs file");
SKIP_IF(1);
}
- rfi_flush = rfi_flush_org;
+ if (read_debugfs_file("powerpc/entry_flush", &entry_flush_orig) < 0) {
+ have_entry_flush = 0;
+ } else {
+ have_entry_flush = 1;
+
+ if (entry_flush_orig != 0) {
+ if (write_debugfs_file("powerpc/entry_flush", 0) < 0) {
+ perror("error writing to powerpc/entry_flush debugfs file");
+ return 1;
+ }
+ }
+ }
+
+ rfi_flush = rfi_flush_orig;
fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1);
FAIL_IF(fd < 0);
@@ -106,6 +120,7 @@ int rfi_flush_test(void)
FAIL_IF(perf_event_enable(fd));
+ // disable L1 prefetching
set_dscr(1);
iter = repetitions;
@@ -147,8 +162,8 @@ again:
repetitions * l1d_misses_expected / 2,
passes, repetitions);
- if (rfi_flush == rfi_flush_org) {
- rfi_flush = !rfi_flush_org;
+ if (rfi_flush == rfi_flush_orig) {
+ rfi_flush = !rfi_flush_orig;
if (write_debugfs_file("powerpc/rfi_flush", rfi_flush) < 0) {
perror("error writing to powerpc/rfi_flush debugfs file");
return 1;
@@ -164,11 +179,19 @@ again:
set_dscr(0);
- if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_org) < 0) {
+ if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_orig) < 0) {
perror("unable to restore original value of powerpc/rfi_flush debugfs file");
return 1;
}
+ if (have_entry_flush) {
+ if (write_debugfs_file("powerpc/entry_flush", entry_flush_orig) < 0) {
+ perror("unable to restore original value of powerpc/entry_flush "
+ "debugfs file");
+ return 1;
+ }
+ }
+
return rc;
}
--
2.25.1
^ permalink raw reply related
* [PATCH 0/7] CVE-2020-4788: Speculation on incompletely validated data on IBM Power9
From: Daniel Axtens @ 2020-11-19 23:13 UTC (permalink / raw)
To: linuxppc-dev; +Cc: dja, cmr, spoorts2, npiggin
IBM Power9 processors can speculatively operate on data in the L1
cache before it has been completely validated, via a way-prediction
mechanism. It is not possible for an attacker to determine the
contents of impermissible memory using this method, since these
systems implement a combination of hardware and software security
measures to prevent scenarios where protected data could be leaked.
However these measures don't address the scenario where an attacker
induces the operating system to speculatively execute instructions
using data that the attacker controls. This can be used for example to
speculatively bypass "kernel user access prevention" techniques, as
discovered by Anthony Steinhauser of Google's Safeside Project. This
is not an attack by itself, but there is a possibility it could be
used in conjunction with side-channels or other weaknesses in the
privileged code to construct an attack.
This issue can be mitigated by flushing the L1 cache between privilege
boundaries of concern. This series flushes the cache on kernel entry and
after kernel user accesses.
Thanks to Nick Piggin, Russell Currey, Christopher M. Riedl, Michael
Ellerman and Spoorthy S for their work in developing, optimising,
testing and backporting these fixes, and to the many others who helped
behind the scenes.
Daniel Axtens (3):
selftests/powerpc: entry flush test
selftests/powerpc: refactor entry and rfi_flush tests
powerpc/64s: rename pnv|pseries_setup_rfi_flush to
_setup_security_mitigations
Michael Ellerman (1):
powerpc: Only include kup-radix.h for 64-bit Book3S
Nicholas Piggin (2):
powerpc/64s: flush L1D on kernel entry
powerpc/64s: flush L1D after user accesses
Russell Currey (1):
selftests/powerpc: rfi_flush: disable entry flush if present
.../admin-guide/kernel-parameters.txt | 7 +
.../powerpc/include/asm/book3s/64/kup-radix.h | 66 ++++++---
arch/powerpc/include/asm/exception-64s.h | 12 +-
arch/powerpc/include/asm/feature-fixups.h | 19 +++
arch/powerpc/include/asm/kup.h | 26 +++-
arch/powerpc/include/asm/security_features.h | 7 +
arch/powerpc/include/asm/setup.h | 4 +
arch/powerpc/kernel/exceptions-64s.S | 80 +++++-----
arch/powerpc/kernel/setup_64.c | 122 ++++++++++++++-
arch/powerpc/kernel/syscall_64.c | 2 +-
arch/powerpc/kernel/vmlinux.lds.S | 14 ++
arch/powerpc/lib/feature-fixups.c | 104 +++++++++++++
arch/powerpc/platforms/powernv/setup.c | 24 ++-
arch/powerpc/platforms/pseries/mobility.c | 4 +-
arch/powerpc/platforms/pseries/pseries.h | 2 +-
arch/powerpc/platforms/pseries/setup.c | 15 +-
.../testing/selftests/powerpc/include/utils.h | 5 +
.../selftests/powerpc/security/.gitignore | 1 +
.../selftests/powerpc/security/Makefile | 4 +-
.../selftests/powerpc/security/entry_flush.c | 139 ++++++++++++++++++
.../selftests/powerpc/security/flush_utils.c | 70 +++++++++
.../selftests/powerpc/security/flush_utils.h | 17 +++
.../selftests/powerpc/security/rfi_flush.c | 96 ++++--------
23 files changed, 693 insertions(+), 147 deletions(-)
create mode 100644 tools/testing/selftests/powerpc/security/entry_flush.c
create mode 100644 tools/testing/selftests/powerpc/security/flush_utils.c
create mode 100644 tools/testing/selftests/powerpc/security/flush_utils.h
--
2.25.1
^ permalink raw reply
* Re: [PATCH v3 2/2] powerpc/ptrace: Hard wire PT_SOFTE value to 1 in gpr_get() too
From: Oleg Nesterov @ 2020-11-19 22:43 UTC (permalink / raw)
To: Christophe Leroy
Cc: Christophe Leroy, Madhavan Srinivasan, linuxppc-dev,
Nicholas Piggin, linux-kernel, Paul Mackerras, Al Viro,
Aneesh Kumar K.V, Jan Kratochvil
In-Reply-To: <20201119221033.Horde.be-msjDTeIW4XeXARjUu7g1@messagerie.c-s.fr>
On 11/19, Christophe Leroy wrote:
>
> I think the following should work, and not require the first patch (compile
> tested only).
>
> --- a/arch/powerpc/kernel/ptrace/ptrace-view.c
> +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c
> @@ -234,9 +234,21 @@ static int gpr_get(struct task_struct *target, const
> struct user_regset *regset,
> BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
> offsetof(struct pt_regs, msr) + sizeof(long));
>
> +#ifdef CONFIG_PPC64
> + membuf_write(&to, &target->thread.regs->orig_gpr3,
> + offsetof(struct pt_regs, softe) - offsetof(struct pt_regs,
> orig_gpr3));
> + membuf_store(&to, 1UL);
> +
> + BUILD_BUG_ON(offsetof(struct pt_regs, trap) !=
> + offsetof(struct pt_regs, softe) + sizeof(long));
> +
> + membuf_write(&to, &target->thread.regs->trap,
> + sizeof(struct user_pt_regs) - offsetof(struct pt_regs, trap));
> +#else
> membuf_write(&to, &target->thread.regs->orig_gpr3,
> sizeof(struct user_pt_regs) -
> offsetof(struct pt_regs, orig_gpr3));
> +#endif
> return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) -
> sizeof(struct user_pt_regs));
> }
Probably yes.
This mirrors the previous patch I sent (https://lore.kernel.org/lkml/20190917143753.GA12300@redhat.com/)
and this is exactly what I tried to avoid, we can make a simpler fix now.
But let me repeat, I agree with any fix even if imp my version simplifies the code, just
commit this change and lets forget this problem.
Oleg.
^ permalink raw reply
* Re: [PATCH v2 2/2] kbuild: Disable CONFIG_LD_ORPHAN_WARN for ld.lld 10.0.1
From: Nick Desaulniers @ 2020-11-19 21:13 UTC (permalink / raw)
To: Nathan Chancellor
Cc: Michal Marek, Kees Cook, kernelci . org bot,
Linux Kbuild mailing list, Mark Brown, Catalin Marinas,
Masahiro Yamada, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
Russell King, LKML, linuxppc-dev, Arvind Sankar, Ingo Molnar,
Borislav Petkov, clang-built-linux, Thomas Gleixner, Will Deacon,
Linux ARM
In-Reply-To: <20201119204656.3261686-2-natechancellor@gmail.com>
On Thu, Nov 19, 2020 at 12:57 PM Nathan Chancellor
<natechancellor@gmail.com> wrote:
>
> ld.lld 10.0.1 spews a bunch of various warnings about .rela sections,
> along with a few others. Newer versions of ld.lld do not have these
> warnings. As a result, do not add '--orphan-handling=warn' to
> LDFLAGS_vmlinux if ld.lld's version is not new enough.
>
> Link: https://github.com/ClangBuiltLinux/linux/issues/1187
> Link: https://github.com/ClangBuiltLinux/linux/issues/1193
> Reported-by: Arvind Sankar <nivedita@alum.mit.edu>
> Reported-by: kernelci.org bot <bot@kernelci.org>
> Reported-by: Mark Brown <broonie@kernel.org>
> Reviewed-by: Kees Cook <keescook@chromium.org>
> Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Thanks for the additions in v2.
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
> ---
>
> v1 -> v2:
>
> * Add condition as a depends on line (Kees Cook)
>
> * Capture output of "$* --version" to avoid invoking linker twice (Nick
> Desaulniers)
>
> * Improve documentation of script in comments (Nick Desaulniers)
>
> * Pick up review tag from Kees
>
> MAINTAINERS | 1 +
> init/Kconfig | 5 +++++
> scripts/lld-version.sh | 20 ++++++++++++++++++++
> 3 files changed, 26 insertions(+)
> create mode 100755 scripts/lld-version.sh
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index e451dcce054f..e6f74f130ae1 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -4284,6 +4284,7 @@ B: https://github.com/ClangBuiltLinux/linux/issues
> C: irc://chat.freenode.net/clangbuiltlinux
> F: Documentation/kbuild/llvm.rst
> F: scripts/clang-tools/
> +F: scripts/lld-version.sh
> K: \b(?i:clang|llvm)\b
>
> CLEANCACHE API
> diff --git a/init/Kconfig b/init/Kconfig
> index 92c58b45abb8..b9037d6c5ab3 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -47,6 +47,10 @@ config CLANG_VERSION
> int
> default $(shell,$(srctree)/scripts/clang-version.sh $(CC))
>
> +config LLD_VERSION
> + int
> + default $(shell,$(srctree)/scripts/lld-version.sh $(LD))
> +
> config CC_CAN_LINK
> bool
> default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(m64-flag)) if 64BIT
> @@ -1351,6 +1355,7 @@ config LD_DEAD_CODE_DATA_ELIMINATION
> config LD_ORPHAN_WARN
> def_bool y
> depends on ARCH_WANT_LD_ORPHAN_WARN
> + depends on !LD_IS_LLD || LLD_VERSION >= 110000
> depends on $(ld-option,--orphan-handling=warn)
>
> config SYSCTL
> diff --git a/scripts/lld-version.sh b/scripts/lld-version.sh
> new file mode 100755
> index 000000000000..d70edb4d8a4f
> --- /dev/null
> +++ b/scripts/lld-version.sh
> @@ -0,0 +1,20 @@
> +#!/bin/sh
> +# SPDX-License-Identifier: GPL-2.0
> +#
> +# Usage: $ ./scripts/lld-version.sh ld.lld
> +#
> +# Print the linker version of `ld.lld' in a 5 or 6-digit form
> +# such as `100001' for ld.lld 10.0.1 etc.
> +
> +linker_string="$($* --version)"
> +
> +if ! ( echo $linker_string | grep -q LLD ); then
> + echo 0
> + exit 1
> +fi
> +
> +VERSION=$(echo $linker_string | cut -d ' ' -f 2)
> +MAJOR=$(echo $VERSION | cut -d . -f 1)
> +MINOR=$(echo $VERSION | cut -d . -f 2)
> +PATCHLEVEL=$(echo $VERSION | cut -d . -f 3)
> +printf "%d%02d%02d\\n" $MAJOR $MINOR $PATCHLEVEL
> --
> 2.29.2
>
--
Thanks,
~Nick Desaulniers
^ permalink raw reply
* Re: [PATCH v3 2/2] powerpc/ptrace: Hard wire PT_SOFTE value to 1 in gpr_get() too
From: Christophe Leroy @ 2020-11-19 21:10 UTC (permalink / raw)
To: Oleg Nesterov
Cc: Christophe Leroy, Madhavan Srinivasan, linuxppc-dev,
Nicholas Piggin, linux-kernel, Paul Mackerras, Al Viro,
Aneesh Kumar K.V, Jan Kratochvil
In-Reply-To: <20201119160247.GB5188@redhat.com>
Quoting Oleg Nesterov <oleg@redhat.com>:
> The commit a8a4b03ab95f ("powerpc: Hard wire PT_SOFTE value to 1 in
> ptrace & signals") changed ptrace_get_reg(PT_SOFTE) to report 0x1,
> but PTRACE_GETREGS still copies pt_regs->softe as is.
>
> This is not consistent and this breaks the user-regs-peekpoke test
> from https://sourceware.org/systemtap/wiki/utrace/tests/
>
> Reported-by: Jan Kratochvil <jan.kratochvil@redhat.com>
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>
> ---
> arch/powerpc/kernel/ptrace/ptrace-tm.c | 8 +++++++-
> arch/powerpc/kernel/ptrace/ptrace-view.c | 8 +++++++-
> 2 files changed, 14 insertions(+), 2 deletions(-)
>
I think the following should work, and not require the first patch
(compile tested only).
diff --git a/arch/powerpc/kernel/ptrace/ptrace-tm.c
b/arch/powerpc/kernel/ptrace/ptrace-tm.c
index 54f2d076206f..f779b3bc0279 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-tm.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-tm.c
@@ -104,8 +104,14 @@ int tm_cgpr_get(struct task_struct *target, const
struct user_regset *regset,
offsetof(struct pt_regs, msr) + sizeof(long));
membuf_write(&to, &target->thread.ckpt_regs.orig_gpr3,
- sizeof(struct user_pt_regs) -
- offsetof(struct pt_regs, orig_gpr3));
+ offsetof(struct pt_regs, softe) - offsetof(struct pt_regs,
orig_gpr3));
+ membuf_store(&to, 1UL);
+
+ BUILD_BUG_ON(offsetof(struct pt_regs, trap) !=
+ offsetof(struct pt_regs, softe) + sizeof(long));
+
+ membuf_write(&to, &target->thread.ckpt_regs.trap,
+ sizeof(struct user_pt_regs) - offsetof(struct pt_regs, trap));
return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) -
sizeof(struct user_pt_regs));
}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c
b/arch/powerpc/kernel/ptrace/ptrace-view.c
index 7e6478e7ed07..736bfbf33890 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-view.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-view.c
@@ -234,9 +234,21 @@ static int gpr_get(struct task_struct *target,
const struct user_regset *regset,
BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
offsetof(struct pt_regs, msr) + sizeof(long));
+#ifdef CONFIG_PPC64
+ membuf_write(&to, &target->thread.regs->orig_gpr3,
+ offsetof(struct pt_regs, softe) - offsetof(struct pt_regs,
orig_gpr3));
+ membuf_store(&to, 1UL);
+
+ BUILD_BUG_ON(offsetof(struct pt_regs, trap) !=
+ offsetof(struct pt_regs, softe) + sizeof(long));
+
+ membuf_write(&to, &target->thread.regs->trap,
+ sizeof(struct user_pt_regs) - offsetof(struct pt_regs, trap));
+#else
membuf_write(&to, &target->thread.regs->orig_gpr3,
sizeof(struct user_pt_regs) -
offsetof(struct pt_regs, orig_gpr3));
+#endif
return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) -
sizeof(struct user_pt_regs));
}
---
Christophe
^ permalink raw reply related
* Re: [PATCH v3 23/23] mtd: devices: powernv_flash: Add function names to headers and fix 'dev'
From: Miquel Raynal @ 2020-11-19 21:07 UTC (permalink / raw)
To: Lee Jones
Cc: Vignesh Raghavendra, Richard Weinberger, Miquel Raynal,
linux-kernel, Paul Mackerras, linux-mtd, linuxppc-dev
In-Reply-To: <20201109182206.3037326-24-lee.jones@linaro.org>
On Mon, 2020-11-09 at 18:22:06 UTC, Lee Jones wrote:
> Fixes the following W=1 kernel build warning(s):
>
> drivers/mtd/devices/powernv_flash.c:129: warning: Cannot understand * @mtd: the device
> drivers/mtd/devices/powernv_flash.c:145: warning: Cannot understand * @mtd: the device
> drivers/mtd/devices/powernv_flash.c:161: warning: Cannot understand * @mtd: the device
> drivers/mtd/devices/powernv_flash.c:184: warning: Function parameter or member 'dev' not described in 'powernv_flash_set_driver_info'
>
> Cc: Miquel Raynal <miquel.raynal@bootlin.com>
> Cc: Richard Weinberger <richard@nod.at>
> Cc: Vignesh Raghavendra <vigneshr@ti.com>
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Cc: Paul Mackerras <paulus@samba.org>
> Cc: linux-mtd@lists.infradead.org
> Cc: linuxppc-dev@lists.ozlabs.org
> Signed-off-by: Lee Jones <lee.jones@linaro.org>
Applied to https://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git nand/next, thanks.
Miquel
^ permalink raw reply
* Re: [PATCH net-next v2 9/9] ibmvnic: Do not replenish RX buffers after every polling loop
From: Thomas Falcon @ 2020-11-19 20:58 UTC (permalink / raw)
To: ljp
Cc: cforno12, netdev, ricklind, dnbanerg, Linuxppc-dev, drt, brking,
kuba, sukadev, linuxppc-dev
In-Reply-To: <7853649c6c1f2f4ce6d8bf9643cd1a43@linux.vnet.ibm.com>
On 11/19/20 2:38 PM, ljp wrote:
> On 2020-11-19 14:26, Thomas Falcon wrote:
>> On 11/19/20 3:43 AM, ljp wrote:
>>
>>> On 2020-11-18 19:12, Thomas Falcon wrote:
>>>
>>>> From: "Dwip N. Banerjee" <dnbanerg@us.ibm.com>
>>>>
>>>> Reduce the amount of time spent replenishing RX buffers by
>>>> only doing so once available buffers has fallen under a certain
>>>> threshold, in this case half of the total number of buffers, or
>>>> if the polling loop exits before the packets processed is less
>>>> than its budget.
>>>>
>>>> Signed-off-by: Dwip N. Banerjee <dnbanerg@us.ibm.com>
>>>> ---
>>>> drivers/net/ethernet/ibm/ibmvnic.c | 5 ++++-
>>>> 1 file changed, 4 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/net/ethernet/ibm/ibmvnic.c
>>>> b/drivers/net/ethernet/ibm/ibmvnic.c
>>>> index 96df6d8fa277..9fe43ab0496d 100644
>>>> --- a/drivers/net/ethernet/ibm/ibmvnic.c
>>>> +++ b/drivers/net/ethernet/ibm/ibmvnic.c
>>>> @@ -2537,7 +2537,10 @@ static int ibmvnic_poll(struct napi_struct
>>>>
>>>> *napi, int budget)
>>>> frames_processed++;
>>>> }
>>>>
>>>> - if (adapter->state != VNIC_CLOSING)
>>>> + if (adapter->state != VNIC_CLOSING &&
>>>> + ((atomic_read(&adapter->rx_pool[scrq_num].available) <
>>>> + adapter->req_rx_add_entries_per_subcrq / 2) ||
>>>> + frames_processed < budget))
>>>
>>> 1/2 seems a simple and good algorithm.
>>> Explaining why "frames_process < budget" is necessary in the commit
>>> message
>>> or source code also helps.
>>
>> Hello, Lijun. The patch author, Dwip Banerjee, suggested the modified
>> commit message below:
>>
>> Reduce the amount of time spent replenishing RX buffers by
>> only doing so once available buffers has fallen under a certain
>> threshold, in this case half of the total number of buffers, or
>> if the polling loop exits before the packets processed is less
>> than its budget. Non-exhaustion of NAPI budget implies lower
>> incoming packet pressure, allowing the leeway to refill the buffers
>> in preparation for any impending burst.
>
> It looks good to me.
>
>>
>> Would such an update require a v3?
>
> I assume you ask Jakub, right?
>
>
Yes. There was an issue with my mail client in my earlier response, so I
am posting Dwip's modified commit message again below.
Reduce the amount of time spent replenishing RX buffers by only doing so
once available buffers has fallen under a certain threshold, in this
case half of the total number of buffers, or if the polling loop exits
before the packets processed is less than its budget. Non-exhaustion of
NAPI budget implies lower incoming packet pressure, allowing the leeway
to refill the buffers in preparation for any impending burst.
>>>> replenish_rx_pool(adapter, &adapter->rx_pool[scrq_num]);
>>>> if (frames_processed < budget) {
>>>> if (napi_complete_done(napi, frames_processed)) {
^ permalink raw reply
* [PATCH v2 2/2] kbuild: Disable CONFIG_LD_ORPHAN_WARN for ld.lld 10.0.1
From: Nathan Chancellor @ 2020-11-19 20:46 UTC (permalink / raw)
To: Masahiro Yamada, Michal Marek, Kees Cook
Cc: linuxppc-dev, kernelci . org bot, linux-kbuild, Catalin Marinas,
Mark Brown, x86, Nick Desaulniers, Russell King, linux-kernel,
clang-built-linux, Arvind Sankar, Ingo Molnar, Borislav Petkov,
Thomas Gleixner, Will Deacon, Nathan Chancellor, linux-arm-kernel
In-Reply-To: <20201113195553.1487659-1-natechancellor@gmail.com>
ld.lld 10.0.1 spews a bunch of various warnings about .rela sections,
along with a few others. Newer versions of ld.lld do not have these
warnings. As a result, do not add '--orphan-handling=warn' to
LDFLAGS_vmlinux if ld.lld's version is not new enough.
Link: https://github.com/ClangBuiltLinux/linux/issues/1187
Link: https://github.com/ClangBuiltLinux/linux/issues/1193
Reported-by: Arvind Sankar <nivedita@alum.mit.edu>
Reported-by: kernelci.org bot <bot@kernelci.org>
Reported-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
---
v1 -> v2:
* Add condition as a depends on line (Kees Cook)
* Capture output of "$* --version" to avoid invoking linker twice (Nick
Desaulniers)
* Improve documentation of script in comments (Nick Desaulniers)
* Pick up review tag from Kees
MAINTAINERS | 1 +
init/Kconfig | 5 +++++
scripts/lld-version.sh | 20 ++++++++++++++++++++
3 files changed, 26 insertions(+)
create mode 100755 scripts/lld-version.sh
diff --git a/MAINTAINERS b/MAINTAINERS
index e451dcce054f..e6f74f130ae1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4284,6 +4284,7 @@ B: https://github.com/ClangBuiltLinux/linux/issues
C: irc://chat.freenode.net/clangbuiltlinux
F: Documentation/kbuild/llvm.rst
F: scripts/clang-tools/
+F: scripts/lld-version.sh
K: \b(?i:clang|llvm)\b
CLEANCACHE API
diff --git a/init/Kconfig b/init/Kconfig
index 92c58b45abb8..b9037d6c5ab3 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -47,6 +47,10 @@ config CLANG_VERSION
int
default $(shell,$(srctree)/scripts/clang-version.sh $(CC))
+config LLD_VERSION
+ int
+ default $(shell,$(srctree)/scripts/lld-version.sh $(LD))
+
config CC_CAN_LINK
bool
default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(m64-flag)) if 64BIT
@@ -1351,6 +1355,7 @@ config LD_DEAD_CODE_DATA_ELIMINATION
config LD_ORPHAN_WARN
def_bool y
depends on ARCH_WANT_LD_ORPHAN_WARN
+ depends on !LD_IS_LLD || LLD_VERSION >= 110000
depends on $(ld-option,--orphan-handling=warn)
config SYSCTL
diff --git a/scripts/lld-version.sh b/scripts/lld-version.sh
new file mode 100755
index 000000000000..d70edb4d8a4f
--- /dev/null
+++ b/scripts/lld-version.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Usage: $ ./scripts/lld-version.sh ld.lld
+#
+# Print the linker version of `ld.lld' in a 5 or 6-digit form
+# such as `100001' for ld.lld 10.0.1 etc.
+
+linker_string="$($* --version)"
+
+if ! ( echo $linker_string | grep -q LLD ); then
+ echo 0
+ exit 1
+fi
+
+VERSION=$(echo $linker_string | cut -d ' ' -f 2)
+MAJOR=$(echo $VERSION | cut -d . -f 1)
+MINOR=$(echo $VERSION | cut -d . -f 2)
+PATCHLEVEL=$(echo $VERSION | cut -d . -f 3)
+printf "%d%02d%02d\\n" $MAJOR $MINOR $PATCHLEVEL
--
2.29.2
^ permalink raw reply related
* Re: [PATCH net-next v2 9/9] ibmvnic: Do not replenish RX buffers after every polling loop
From: Thomas Falcon @ 2020-11-19 20:26 UTC (permalink / raw)
To: ljp
Cc: cforno12, netdev, ricklind, dnbanerg, Linuxppc-dev, drt, brking,
kuba, sukadev, linuxppc-dev
In-Reply-To: <1a4e7b1ef1fb101cbb26fb9d5867ee46@linux.vnet.ibm.com>
[-- Attachment #1: Type: text/plain, Size: 2129 bytes --]
On 11/19/20 3:43 AM, ljp wrote:
> On 2020-11-18 19:12, Thomas Falcon wrote:
>> From: "Dwip N. Banerjee" <dnbanerg@us.ibm.com>
>>
>> Reduce the amount of time spent replenishing RX buffers by
>> only doing so once available buffers has fallen under a certain
>> threshold, in this case half of the total number of buffers, or
>> if the polling loop exits before the packets processed is less
>> than its budget.
>>
>> Signed-off-by: Dwip N. Banerjee <dnbanerg@us.ibm.com>
>> ---
>> drivers/net/ethernet/ibm/ibmvnic.c | 5 ++++-
>> 1 file changed, 4 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/net/ethernet/ibm/ibmvnic.c
>> b/drivers/net/ethernet/ibm/ibmvnic.c
>> index 96df6d8fa277..9fe43ab0496d 100644
>> --- a/drivers/net/ethernet/ibm/ibmvnic.c
>> +++ b/drivers/net/ethernet/ibm/ibmvnic.c
>> @@ -2537,7 +2537,10 @@ static int ibmvnic_poll(struct napi_struct
>> *napi, int budget)
>> frames_processed++;
>> }
>>
>> - if (adapter->state != VNIC_CLOSING)
>> + if (adapter->state != VNIC_CLOSING &&
>> + ((atomic_read(&adapter->rx_pool[scrq_num].available) <
>> + adapter->req_rx_add_entries_per_subcrq / 2) ||
>> + frames_processed < budget))
>
> 1/2 seems a simple and good algorithm.
> Explaining why "frames_process < budget" is necessary in the commit
> message
> or source code also helps.
>
Hello, Lijun. The patch author, Dwip Banerjee, suggested the modified
commit message below:
Reduce the amount of time spent replenishing RX buffers by
only doing so once available buffers has fallen under a certain
threshold, in this case half of the total number of buffers, or
if the polling loop exits before the packets processed is less
than its budget. Non-exhaustion of NAPI budget implies lower
incoming packet pressure, allowing the leeway to refill the buffers
in preparation for any impending burst.
Would such an update require a v3?
>
>> replenish_rx_pool(adapter, &adapter->rx_pool[scrq_num]);
>> if (frames_processed < budget) {
>> if (napi_complete_done(napi, frames_processed)) {
[-- Attachment #2: Type: text/html, Size: 3641 bytes --]
^ permalink raw reply
* [PATCH v2 1/2] kbuild: Hoist '--orphan-handling' into Kconfig
From: Nathan Chancellor @ 2020-11-19 20:46 UTC (permalink / raw)
To: Masahiro Yamada, Michal Marek, Kees Cook
Cc: linuxppc-dev, linux-kbuild, Catalin Marinas, x86,
Nick Desaulniers, Russell King, linux-kernel, clang-built-linux,
Arvind Sankar, Ingo Molnar, Borislav Petkov, Thomas Gleixner,
Will Deacon, Nathan Chancellor, linux-arm-kernel
In-Reply-To: <20201113195553.1487659-1-natechancellor@gmail.com>
Currently, '--orphan-handling=warn' is spread out across four different
architectures in their respective Makefiles, which makes it a little
unruly to deal with in case it needs to be disabled for a specific
linker version (in this case, ld.lld 10.0.1).
To make it easier to control this, hoist this warning into Kconfig and
the main Makefile so that disabling it is simpler, as the warning will
only be enabled in a couple places (main Makefile and a couple of
compressed boot folders that blow away LDFLAGS_vmlinx) and making it
conditional is easier due to Kconfig syntax. One small additional
benefit of this is saving a call to ld-option on incremental builds
because we will have already evaluated it for CONFIG_LD_ORPHAN_WARN.
To keep the list of supported architectures the same, introduce
CONFIG_ARCH_WANT_LD_ORPHAN_WARN, which an architecture can select to
gain this automatically after all of the sections are specified and size
asserted. A special thanks to Kees Cook for the help text on this
config.
Link: https://github.com/ClangBuiltLinux/linux/issues/1187
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc)
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Tested-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
---
v1 -> v2:
* Change
ifeq ($(CONFIG_LD_ORPHAN_WARN),y)
to
ifdef CONFIG_LD_ORPHAN_WARN
to improve readability (Michael Ellerman)
* Separate conditions for CONFIG_LD_ORPHAN warn to improve
readability (Kees Cook)
* Pick up tags from Kees, Michael, and Nick
Makefile | 6 ++++++
arch/Kconfig | 9 +++++++++
arch/arm/Kconfig | 1 +
arch/arm/Makefile | 4 ----
arch/arm/boot/compressed/Makefile | 4 +++-
arch/arm64/Kconfig | 1 +
arch/arm64/Makefile | 4 ----
arch/powerpc/Kconfig | 1 +
arch/powerpc/Makefile | 1 -
arch/x86/Kconfig | 1 +
arch/x86/Makefile | 3 ---
arch/x86/boot/compressed/Makefile | 4 +++-
init/Kconfig | 5 +++++
13 files changed, 30 insertions(+), 14 deletions(-)
diff --git a/Makefile b/Makefile
index e2c3f65c4721..2c7116299f1f 100644
--- a/Makefile
+++ b/Makefile
@@ -984,6 +984,12 @@ ifeq ($(CONFIG_RELR),y)
LDFLAGS_vmlinux += --pack-dyn-relocs=relr
endif
+# We never want expected sections to be placed heuristically by the
+# linker. All sections should be explicitly named in the linker script.
+ifdef CONFIG_LD_ORPHAN_WARN
+LDFLAGS_vmlinux += --orphan-handling=warn
+endif
+
# Align the bit size of userspace programs with the kernel
KBUILD_USERCFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CFLAGS))
KBUILD_USERLDFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CFLAGS))
diff --git a/arch/Kconfig b/arch/Kconfig
index 56b6ccc0e32d..ba4e966484ab 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1028,6 +1028,15 @@ config HAVE_STATIC_CALL_INLINE
bool
depends on HAVE_STATIC_CALL
+config ARCH_WANT_LD_ORPHAN_WARN
+ bool
+ help
+ An arch should select this symbol once all linker sections are explicitly
+ included, size-asserted, or discarded in the linker scripts. This is
+ important because we never want expected sections to be placed heuristically
+ by the linker, since the locations of such sections can change between linker
+ versions.
+
source "kernel/gcov/Kconfig"
source "scripts/gcc-plugins/Kconfig"
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index fe2f17eb2b50..002e0cf025f5 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -35,6 +35,7 @@ config ARM
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
select ARCH_WANT_IPC_PARSE_VERSION
+ select ARCH_WANT_LD_ORPHAN_WARN
select BINFMT_FLAT_ARGVP_ENVP_ON_STACK
select BUILDTIME_TABLE_SORT if MMU
select CLONE_BACKWARDS
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 4d76eab2b22d..e15f76ca2887 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -16,10 +16,6 @@ LDFLAGS_vmlinux += --be8
KBUILD_LDFLAGS_MODULE += --be8
endif
-# We never want expected sections to be placed heuristically by the
-# linker. All sections should be explicitly named in the linker script.
-LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn)
-
GZFLAGS :=-9
#KBUILD_CFLAGS +=-pipe
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 47f001ca5499..e1567418a2b1 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -129,7 +129,9 @@ LDFLAGS_vmlinux += --no-undefined
# Delete all temporary local symbols
LDFLAGS_vmlinux += -X
# Report orphan sections
-LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn)
+ifdef CONFIG_LD_ORPHAN_WARN
+LDFLAGS_vmlinux += --orphan-handling=warn
+endif
# Next argument is a linker script
LDFLAGS_vmlinux += -T
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 1515f6f153a0..a6b5b7ef40ae 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -81,6 +81,7 @@ config ARM64
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
select ARCH_WANT_FRAME_POINTERS
select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
+ select ARCH_WANT_LD_ORPHAN_WARN
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARM_AMBA
select ARM_ARCH_TIMER
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 5789c2d18d43..6a87d592bd00 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -28,10 +28,6 @@ LDFLAGS_vmlinux += --fix-cortex-a53-843419
endif
endif
-# We never want expected sections to be placed heuristically by the
-# linker. All sections should be explicitly named in the linker script.
-LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn)
-
ifeq ($(CONFIG_ARM64_USE_LSE_ATOMICS), y)
ifneq ($(CONFIG_ARM64_LSE_ATOMICS), y)
$(warning LSE atomics not supported by binutils)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e9f13fe08492..5181872f9452 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -152,6 +152,7 @@ config PPC
select ARCH_USE_QUEUED_SPINLOCKS if PPC_QUEUED_SPINLOCKS
select ARCH_WANT_IPC_PARSE_VERSION
select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
+ select ARCH_WANT_LD_ORPHAN_WARN
select ARCH_WEAK_RELEASE_ACQUIRE
select BINFMT_ELF
select BUILDTIME_TABLE_SORT
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index a4d56f0a41d9..d9eb0da845e1 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -123,7 +123,6 @@ endif
LDFLAGS_vmlinux-y := -Bstatic
LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) := -pie
LDFLAGS_vmlinux := $(LDFLAGS_vmlinux-y)
-LDFLAGS_vmlinux += $(call ld-option,--orphan-handling=warn)
ifdef CONFIG_PPC64
ifeq ($(call cc-option-yn,-mcmodel=medium),y)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f6946b81f74a..fbf26e0f7a6a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -100,6 +100,7 @@ config X86
select ARCH_WANT_DEFAULT_BPF_JIT if X86_64
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
select ARCH_WANT_HUGE_PMD_SHARE
+ select ARCH_WANT_LD_ORPHAN_WARN
select ARCH_WANTS_THP_SWAP if X86_64
select BUILDTIME_TABLE_SORT
select CLKEVT_I8253
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 154259f18b8b..1bf21746f4ce 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -209,9 +209,6 @@ ifdef CONFIG_X86_64
LDFLAGS_vmlinux += -z max-page-size=0x200000
endif
-# We never want expected sections to be placed heuristically by the
-# linker. All sections should be explicitly named in the linker script.
-LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn)
archscripts: scripts_basic
$(Q)$(MAKE) $(build)=arch/x86/tools relocs
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index ee249088cbfe..40b8fd375d52 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -61,7 +61,9 @@ KBUILD_LDFLAGS += $(call ld-option,--no-ld-generated-unwind-info)
# Compressed kernel should be built as PIE since it may be loaded at any
# address by the bootloader.
LDFLAGS_vmlinux := -pie $(call ld-option, --no-dynamic-linker)
-LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn)
+ifdef CONFIG_LD_ORPHAN_WARN
+LDFLAGS_vmlinux += --orphan-handling=warn
+endif
LDFLAGS_vmlinux += -T
hostprogs := mkpiggy
diff --git a/init/Kconfig b/init/Kconfig
index c9446911cf41..92c58b45abb8 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1348,6 +1348,11 @@ config LD_DEAD_CODE_DATA_ELIMINATION
present. This option is not well tested yet, so use at your
own risk.
+config LD_ORPHAN_WARN
+ def_bool y
+ depends on ARCH_WANT_LD_ORPHAN_WARN
+ depends on $(ld-option,--orphan-handling=warn)
+
config SYSCTL
bool
base-commit: 09162bc32c880a791c6c0668ce0745cf7958f576
--
2.29.2
^ permalink raw reply related
* Re: [PATCH net-next v2 9/9] ibmvnic: Do not replenish RX buffers after every polling loop
From: ljp @ 2020-11-19 20:38 UTC (permalink / raw)
To: Thomas Falcon
Cc: cforno12, netdev, ricklind, dnbanerg, Linuxppc-dev, drt, brking,
kuba, sukadev, linuxppc-dev
In-Reply-To: <83ca37f3-07be-4179-8414-88c8c83bfe56@linux.ibm.com>
On 2020-11-19 14:26, Thomas Falcon wrote:
> On 11/19/20 3:43 AM, ljp wrote:
>
>> On 2020-11-18 19:12, Thomas Falcon wrote:
>>
>>> From: "Dwip N. Banerjee" <dnbanerg@us.ibm.com>
>>>
>>> Reduce the amount of time spent replenishing RX buffers by
>>> only doing so once available buffers has fallen under a certain
>>> threshold, in this case half of the total number of buffers, or
>>> if the polling loop exits before the packets processed is less
>>> than its budget.
>>>
>>> Signed-off-by: Dwip N. Banerjee <dnbanerg@us.ibm.com>
>>> ---
>>> drivers/net/ethernet/ibm/ibmvnic.c | 5 ++++-
>>> 1 file changed, 4 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/net/ethernet/ibm/ibmvnic.c
>>> b/drivers/net/ethernet/ibm/ibmvnic.c
>>> index 96df6d8fa277..9fe43ab0496d 100644
>>> --- a/drivers/net/ethernet/ibm/ibmvnic.c
>>> +++ b/drivers/net/ethernet/ibm/ibmvnic.c
>>> @@ -2537,7 +2537,10 @@ static int ibmvnic_poll(struct napi_struct
>>>
>>> *napi, int budget)
>>> frames_processed++;
>>> }
>>>
>>> - if (adapter->state != VNIC_CLOSING)
>>> + if (adapter->state != VNIC_CLOSING &&
>>> + ((atomic_read(&adapter->rx_pool[scrq_num].available) <
>>> + adapter->req_rx_add_entries_per_subcrq / 2) ||
>>> + frames_processed < budget))
>>
>> 1/2 seems a simple and good algorithm.
>> Explaining why "frames_process < budget" is necessary in the commit
>> message
>> or source code also helps.
>
> Hello, Lijun. The patch author, Dwip Banerjee, suggested the modified
> commit message below:
>
> Reduce the amount of time spent replenishing RX buffers by
> only doing so once available buffers has fallen under a certain
> threshold, in this case half of the total number of buffers, or
> if the polling loop exits before the packets processed is less
> than its budget. Non-exhaustion of NAPI budget implies lower
> incoming packet pressure, allowing the leeway to refill the buffers
> in preparation for any impending burst.
It looks good to me.
>
> Would such an update require a v3?
I assume you ask Jakub, right?
>>> replenish_rx_pool(adapter, &adapter->rx_pool[scrq_num]);
>>> if (frames_processed < budget) {
>>> if (napi_complete_done(napi, frames_processed)) {
^ permalink raw reply
* Re: [PATCH v3 0/2] powerpc/ptrace: Hard wire PT_SOFTE value to 1 in gpr_get() too
From: Oleg Nesterov @ 2020-11-19 18:22 UTC (permalink / raw)
To: Christophe Leroy
Cc: Christophe Leroy, Madhavan Srinivasan, linuxppc-dev,
Nicholas Piggin, linux-kernel, Paul Mackerras, Al Viro,
Aneesh Kumar K.V, Jan Kratochvil
In-Reply-To: <d7c3ed05-b7e7-fac0-871f-4c43c1a7e90c@csgroup.eu>
On 11/19, Christophe Leroy wrote:
>
>
> Le 19/11/2020 à 17:01, Oleg Nesterov a écrit :
> >Can we finally fix this problem? ;)
> >
> >My previous attempt was ignored, see
>
> That doesn't seems right.
>
> Michael made some suggestion it seems, can you respond to it ?
I did, see https://lore.kernel.org/lkml/20200611105830.GB12500@redhat.com/
> >Sorry, uncompiled/untested, I don't have a ppc machine.
>
> I compiled with ppc64_defconfig, that seems ok. Still untested.
Thanks.
Oleg.
^ permalink raw reply
* Re: [PATCH v3 1/2] powerpc/ptrace: simplify gpr_get/tm_cgpr_get
From: Oleg Nesterov @ 2020-11-19 18:18 UTC (permalink / raw)
To: Christophe Leroy
Cc: Christophe Leroy, Madhavan Srinivasan, linuxppc-dev,
Nicholas Piggin, linux-kernel, Paul Mackerras, Al Viro,
Aneesh Kumar K.V, Jan Kratochvil
In-Reply-To: <94c56c46-e336-f61c-3623-1b2014fcbb2e@csgroup.eu>
On 11/19, Christophe Leroy wrote:
>
>
> Le 19/11/2020 à 17:02, Oleg Nesterov a écrit :
> >gpr_get() does membuf_write() twice to override pt_regs->msr in between.
>
> Is there anything wrong with that ?
Nothing wrong, but imo the code and 2/2 looks simpler after this patch.
I tried to explain this in the changelog.
> > int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset,
> > struct membuf to)
> > {
> >+ struct membuf to_msr = membuf_at(&to, offsetof(struct pt_regs, msr));
> >+
> > if (!cpu_has_feature(CPU_FTR_TM))
> > return -ENODEV;
> >@@ -97,17 +99,12 @@ int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset,
> > flush_altivec_to_thread(target);
> > membuf_write(&to, &target->thread.ckpt_regs,
> >- offsetof(struct pt_regs, msr));
> >- membuf_store(&to, get_user_ckpt_msr(target));
> >+ sizeof(struct user_pt_regs));
>
> This looks mis-aligned. But it should fit on a single line, now we allow up to 100 chars on a line.
OK, I can change this.
> >- BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
> >- offsetof(struct pt_regs, msr) + sizeof(long));
> >+ membuf_store(&to_msr, get_user_ckpt_msr(target));
> >- membuf_write(&to, &target->thread.ckpt_regs.orig_gpr3,
> >- sizeof(struct user_pt_regs) -
> >- offsetof(struct pt_regs, orig_gpr3));
> > return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) -
> >- sizeof(struct user_pt_regs));
> >+ sizeof(struct user_pt_regs));
>
> I can't see any change here except the alignment. Can you leave it as is ?
I just tried to make tm_cgpr_get() and gpr_get() look similar.
Sure, I can leave it as is.
Better yet, could you please fix this problem somehow so that I could forget
about the bug assigned to me?
I know nothing about powerpc, and personally I do not care about this (minor)
bug, I agree with any changes.
> >- membuf_write(&to, target->thread.regs, offsetof(struct pt_regs, msr));
> >- membuf_store(&to, get_user_msr(target));
> >+ membuf_write(&to, target->thread.regs,
> >+ sizeof(struct user_pt_regs));
>
> This should fit on a single line.
>
> > return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) -
> >- sizeof(struct user_pt_regs));
> >+ sizeof(struct user_pt_regs));
>
> This should not change, it's not part of the changes for this patch.
See above, I can leave it as is.
> >--- a/include/linux/regset.h
> >+++ b/include/linux/regset.h
> >@@ -46,6 +46,18 @@ static inline int membuf_write(struct membuf *s, const void *v, size_t size)
> > return s->left;
> > }
> >+static inline struct membuf membuf_at(const struct membuf *s, size_t offs)
> >+{
> >+ struct membuf n = *s;
>
> Is there any point in using a struct membuf * instaed of a struct membuf as parameter ?
This matches other membuf_ helpers.
Oleg.
^ permalink raw reply
* Re: [PATCH v3 0/2] powerpc/ptrace: Hard wire PT_SOFTE value to 1 in gpr_get() too
From: Christophe Leroy @ 2020-11-19 17:19 UTC (permalink / raw)
To: Oleg Nesterov, Benjamin Herrenschmidt, Madhavan Srinivasan,
Michael Ellerman, Paul Mackerras
Cc: Christophe Leroy, Aneesh Kumar K.V, linux-kernel, Nicholas Piggin,
Jan Kratochvil, Al Viro, linuxppc-dev
In-Reply-To: <20201119160154.GA5183@redhat.com>
Le 19/11/2020 à 17:01, Oleg Nesterov a écrit :
> Can we finally fix this problem? ;)
>
> My previous attempt was ignored, see
That doesn't seems right.
Michael made some suggestion it seems, can you respond to it ?
>
> https://lore.kernel.org/lkml/20190917121256.GA8659@redhat.com/
>
> Now that gpr_get() was changed to use membuf API we can make a simpler fix.
>
> Sorry, uncompiled/untested, I don't have a ppc machine.
I compiled with ppc64_defconfig, that seems ok. Still untested.
Christophe
>
> Oleg.
>
> arch/powerpc/kernel/ptrace/ptrace-tm.c | 21 ++++++++++++---------
> arch/powerpc/kernel/ptrace/ptrace-view.c | 21 ++++++++++++---------
> include/linux/regset.h | 12 ++++++++++++
> 3 files changed, 36 insertions(+), 18 deletions(-)
>
^ permalink raw reply
* Re: [PATCH v3 2/2] powerpc/ptrace: Hard wire PT_SOFTE value to 1 in gpr_get() too
From: Christophe Leroy @ 2020-11-19 17:18 UTC (permalink / raw)
To: Oleg Nesterov, Benjamin Herrenschmidt, Madhavan Srinivasan,
Michael Ellerman, Paul Mackerras
Cc: Christophe Leroy, Aneesh Kumar K.V, linux-kernel, Nicholas Piggin,
Jan Kratochvil, Al Viro, linuxppc-dev
In-Reply-To: <20201119160247.GB5188@redhat.com>
Le 19/11/2020 à 17:02, Oleg Nesterov a écrit :
> The commit a8a4b03ab95f ("powerpc: Hard wire PT_SOFTE value to 1 in
> ptrace & signals") changed ptrace_get_reg(PT_SOFTE) to report 0x1,
> but PTRACE_GETREGS still copies pt_regs->softe as is.
>
> This is not consistent and this breaks the user-regs-peekpoke test
> from https://sourceware.org/systemtap/wiki/utrace/tests/
>
> Reported-by: Jan Kratochvil <jan.kratochvil@redhat.com>
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>
> ---
> arch/powerpc/kernel/ptrace/ptrace-tm.c | 8 +++++++-
> arch/powerpc/kernel/ptrace/ptrace-view.c | 8 +++++++-
> 2 files changed, 14 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/kernel/ptrace/ptrace-tm.c b/arch/powerpc/kernel/ptrace/ptrace-tm.c
> index f8fcbd85d4cb..d0d339f86e61 100644
> --- a/arch/powerpc/kernel/ptrace/ptrace-tm.c
> +++ b/arch/powerpc/kernel/ptrace/ptrace-tm.c
> @@ -87,6 +87,10 @@ int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset,
> struct membuf to)
> {
> struct membuf to_msr = membuf_at(&to, offsetof(struct pt_regs, msr));
> +#ifdef CONFIG_PPC64
> + struct membuf to_softe = membuf_at(&to,
> + offsetof(struct pt_regs, softe));
Should fit on a single line I think.
> +#endif
>
> if (!cpu_has_feature(CPU_FTR_TM))
> return -ENODEV;
> @@ -102,7 +106,9 @@ int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset,
> sizeof(struct user_pt_regs));
>
> membuf_store(&to_msr, get_user_ckpt_msr(target));
> -
> +#ifdef CONFIG_PPC64
> + membuf_store(&to_softe, 0x1ul);
> +#endif
> return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) -
> sizeof(struct user_pt_regs));
> }
> diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c
> index 39686ede40b3..f554ccfcbfae 100644
> --- a/arch/powerpc/kernel/ptrace/ptrace-view.c
> +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c
> @@ -218,6 +218,10 @@ static int gpr_get(struct task_struct *target, const struct user_regset *regset,
> struct membuf to)
> {
> struct membuf to_msr = membuf_at(&to, offsetof(struct pt_regs, msr));
> +#ifdef CONFIG_PPC64
> + struct membuf to_softe = membuf_at(&to,
> + offsetof(struct pt_regs, softe));
Should fit on a single line I think.
> +#endif
> int i;
>
> if (target->thread.regs == NULL)
> @@ -233,7 +237,9 @@ static int gpr_get(struct task_struct *target, const struct user_regset *regset,
> sizeof(struct user_pt_regs));
>
> membuf_store(&to_msr, get_user_msr(target));
> -
> +#ifdef CONFIG_PPC64
> + membuf_store(&to_softe, 0x1ul);
> +#endif
> return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) -
> sizeof(struct user_pt_regs));
> }
>
Christophe
^ permalink raw reply
* Re: [PATCH v3 1/2] powerpc/ptrace: simplify gpr_get/tm_cgpr_get
From: Christophe Leroy @ 2020-11-19 17:16 UTC (permalink / raw)
To: Oleg Nesterov, Benjamin Herrenschmidt, Madhavan Srinivasan,
Michael Ellerman, Paul Mackerras
Cc: Christophe Leroy, Aneesh Kumar K.V, linux-kernel, Nicholas Piggin,
Jan Kratochvil, Al Viro, linuxppc-dev
In-Reply-To: <20201119160221.GA5188@redhat.com>
Le 19/11/2020 à 17:02, Oleg Nesterov a écrit :
> gpr_get() does membuf_write() twice to override pt_regs->msr in between.
Is there anything wrong with that ?
> We can call membuf_write() once and change ->msr in the kernel buffer,
> this simplifies the code and the next fix.
>
> The patch adds a new simple helper, membuf_at(offs), it returns the new
> membuf which can be safely used after membuf_write().
>
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>
> ---
> arch/powerpc/kernel/ptrace/ptrace-tm.c | 13 +++++--------
> arch/powerpc/kernel/ptrace/ptrace-view.c | 13 +++++--------
> include/linux/regset.h | 12 ++++++++++++
> 3 files changed, 22 insertions(+), 16 deletions(-)
>
> diff --git a/arch/powerpc/kernel/ptrace/ptrace-tm.c b/arch/powerpc/kernel/ptrace/ptrace-tm.c
> index 54f2d076206f..f8fcbd85d4cb 100644
> --- a/arch/powerpc/kernel/ptrace/ptrace-tm.c
> +++ b/arch/powerpc/kernel/ptrace/ptrace-tm.c
> @@ -86,6 +86,8 @@ int tm_cgpr_active(struct task_struct *target, const struct user_regset *regset)
> int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset,
> struct membuf to)
> {
> + struct membuf to_msr = membuf_at(&to, offsetof(struct pt_regs, msr));
> +
> if (!cpu_has_feature(CPU_FTR_TM))
> return -ENODEV;
>
> @@ -97,17 +99,12 @@ int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset,
> flush_altivec_to_thread(target);
>
> membuf_write(&to, &target->thread.ckpt_regs,
> - offsetof(struct pt_regs, msr));
> - membuf_store(&to, get_user_ckpt_msr(target));
> + sizeof(struct user_pt_regs));
This looks mis-aligned. But it should fit on a single line, now we allow up to 100 chars on a line.
>
> - BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
> - offsetof(struct pt_regs, msr) + sizeof(long));
> + membuf_store(&to_msr, get_user_ckpt_msr(target));
>
> - membuf_write(&to, &target->thread.ckpt_regs.orig_gpr3,
> - sizeof(struct user_pt_regs) -
> - offsetof(struct pt_regs, orig_gpr3));
> return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) -
> - sizeof(struct user_pt_regs));
> + sizeof(struct user_pt_regs));
I can't see any change here except the alignment. Can you leave it as is ?
> }
>
> /*
> diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c
> index 7e6478e7ed07..39686ede40b3 100644
> --- a/arch/powerpc/kernel/ptrace/ptrace-view.c
> +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c
> @@ -217,6 +217,7 @@ int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data)
> static int gpr_get(struct task_struct *target, const struct user_regset *regset,
> struct membuf to)
> {
> + struct membuf to_msr = membuf_at(&to, offsetof(struct pt_regs, msr));
> int i;
>
> if (target->thread.regs == NULL)
> @@ -228,17 +229,13 @@ static int gpr_get(struct task_struct *target, const struct user_regset *regset,
> target->thread.regs->gpr[i] = NV_REG_POISON;
> }
>
> - membuf_write(&to, target->thread.regs, offsetof(struct pt_regs, msr));
> - membuf_store(&to, get_user_msr(target));
> + membuf_write(&to, target->thread.regs,
> + sizeof(struct user_pt_regs));
This should fit on a single line.
>
> - BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
> - offsetof(struct pt_regs, msr) + sizeof(long));
> + membuf_store(&to_msr, get_user_msr(target));
>
> - membuf_write(&to, &target->thread.regs->orig_gpr3,
> - sizeof(struct user_pt_regs) -
> - offsetof(struct pt_regs, orig_gpr3));
> return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) -
> - sizeof(struct user_pt_regs));
> + sizeof(struct user_pt_regs));
This should not change, it's not part of the changes for this patch.
> }
>
> static int gpr_set(struct task_struct *target, const struct user_regset *regset,
> diff --git a/include/linux/regset.h b/include/linux/regset.h
> index c3403f328257..a00765f0e8cf 100644
> --- a/include/linux/regset.h
> +++ b/include/linux/regset.h
> @@ -46,6 +46,18 @@ static inline int membuf_write(struct membuf *s, const void *v, size_t size)
> return s->left;
> }
>
> +static inline struct membuf membuf_at(const struct membuf *s, size_t offs)
> +{
> + struct membuf n = *s;
Is there any point in using a struct membuf * instaed of a struct membuf as parameter ?
> +
> + if (offs > n.left)
> + offs = n.left;
> + n.p += offs;
> + n.left -= offs;
> +
> + return n;
> +}
> +
> /* current s->p must be aligned for v; v must be a scalar */
> #define membuf_store(s, v) \
> ({ \
>
Christophe
^ permalink raw reply
* Re: [PATCH v2] ASoC: fsl_sai: Correct the clock source for mclk0
From: Mark Brown @ 2020-11-19 17:09 UTC (permalink / raw)
To: Xiubo.Lee, Shengjiu Wang, nicoleotsuka, timur, perex, festevam,
alsa-devel, tiwai
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <1605768038-4582-1-git-send-email-shengjiu.wang@nxp.com>
On Thu, 19 Nov 2020 14:40:38 +0800, Shengjiu Wang wrote:
> On VF610, mclk0 = bus_clk;
> On i.MX6SX/6UL/6ULL/7D, mclk0 = mclk1;
> On i.MX7ULP, mclk0 = bus_clk;
> On i.MX8QM/8QXP, mclk0 = bus_clk;
> On i.MX8MQ/8MN/8MM/8MP, mclk0 = bus_clk;
>
> So add variable mclk0_is_mclk1 in fsl_sai_soc_data to
> distinguish these platforms.
Applied to
https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git for-next
Thanks!
[1/1] ASoC: fsl_sai: Correct the clock source for mclk0
commit: 53233e40c142b1e0e1df9d9ac0ffc0945cfffbc9
All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.
You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.
If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.
Please add any relevant lists and maintainers to the CCs when replying
to this mail.
Thanks,
Mark
^ permalink raw reply
* Re: [PATCH v3 2/2] powerpc/ptrace: Hard wire PT_SOFTE value to 1 in gpr_get() too
From: Oleg Nesterov @ 2020-11-19 16:05 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Madhavan Srinivasan, Michael Ellerman,
Paul Mackerras
Cc: Christophe Leroy, Aneesh Kumar K.V, linux-kernel, Nicholas Piggin,
Jan Kratochvil, Al Viro, linuxppc-dev
In-Reply-To: <20201119160247.GB5188@redhat.com>
On 11/19, Oleg Nesterov wrote:
>
> This is not consistent and this breaks the user-regs-peekpoke test
> from https://sourceware.org/systemtap/wiki/utrace/tests/
See the test-case below.
Oleg.
/* Test case for PTRACE_SETREGS modifying the requested ragisters.
x86* counterpart of the s390* testcase `user-area-access.c'.
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely. */
/* FIXME: EFLAGS should be tested restricted on the appropriate bits. */
#define _GNU_SOURCE 1
#if defined __powerpc__ || defined __sparc__
# define user_regs_struct pt_regs
#endif
#ifdef __ia64__
#define ia64_fpreg ia64_fpreg_DISABLE
#define pt_all_user_regs pt_all_user_regs_DISABLE
#endif /* __ia64__ */
#include <sys/ptrace.h>
#ifdef __ia64__
#undef ia64_fpreg
#undef pt_all_user_regs
#endif /* __ia64__ */
#include <linux/ptrace.h>
#include <sys/types.h>
#include <sys/user.h>
#if defined __i386__ || defined __x86_64__
#include <sys/debugreg.h>
#endif
#include <asm/unistd.h>
#include <assert.h>
#include <errno.h>
#include <error.h>
#include <unistd.h>
#include <signal.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/wait.h>
#include <sys/time.h>
#include <string.h>
#include <stddef.h>
/* ia64 has PTRACE_SETREGS but it has no USER_REGS_STRUCT. */
#if !defined PTRACE_SETREGS || defined __ia64__
int
main (void)
{
return 77;
}
#else /* PTRACE_SETREGS */
/* The minimal alignment we use for the random access ranges. */
#define REGALIGN (sizeof (long))
static pid_t child;
static void
cleanup (void)
{
if (child > 0)
kill (child, SIGKILL);
child = 0;
}
static void
handler_fail (int signo)
{
cleanup ();
signal (SIGABRT, SIG_DFL);
abort ();
}
int
main (void)
{
long l;
int status, i;
pid_t pid;
union
{
struct user_regs_struct user;
unsigned char byte[sizeof (struct user_regs_struct)];
} u, u2;
int start;
setbuf (stdout, NULL);
atexit (cleanup);
signal (SIGABRT, handler_fail);
signal (SIGALRM, handler_fail);
signal (SIGINT, handler_fail);
i = alarm (10);
assert (i == 0);
child = fork ();
switch (child)
{
case -1:
assert_perror (errno);
assert (0);
case 0:
l = ptrace (PTRACE_TRACEME, 0, NULL, NULL);
assert (l == 0);
// Prevent rt_sigprocmask() call called by glibc after raise().
syscall (__NR_tkill, getpid (), SIGSTOP);
assert (0);
default:
break;
}
pid = waitpid (child, &status, 0);
assert (pid == child);
assert (WIFSTOPPED (status));
assert (WSTOPSIG (status) == SIGSTOP);
/* Fetch U2 from the inferior. */
errno = 0;
# ifdef __sparc__
l = ptrace (PTRACE_GETREGS, child, &u2.user, NULL);
# else
l = ptrace (PTRACE_GETREGS, child, NULL, &u2.user);
# endif
assert_perror (errno);
assert (l == 0);
/* Initialize U with a pattern. */
for (i = 0; i < sizeof u.byte; i++)
u.byte[i] = i;
#ifdef __x86_64__
/* non-EFLAGS modifications fail with EIO, EFLAGS gets back different. */
u.user.eflags = u2.user.eflags;
u.user.cs = u2.user.cs;
u.user.ds = u2.user.ds;
u.user.es = u2.user.es;
u.user.fs = u2.user.fs;
u.user.gs = u2.user.gs;
u.user.ss = u2.user.ss;
u.user.fs_base = u2.user.fs_base;
u.user.gs_base = u2.user.gs_base;
/* RHEL-4 refuses to set too high (and invalid) PC values. */
u.user.rip = (unsigned long) handler_fail;
/* 2.6.25 always truncates and sign-extends orig_rax. */
u.user.orig_rax = (int) u.user.orig_rax;
#endif /* __x86_64__ */
#ifdef __i386__
/* These values get back different. */
u.user.xds = u2.user.xds;
u.user.xes = u2.user.xes;
u.user.xfs = u2.user.xfs;
u.user.xgs = u2.user.xgs;
u.user.xcs = u2.user.xcs;
u.user.eflags = u2.user.eflags;
u.user.xss = u2.user.xss;
/* RHEL-4 refuses to set too high (and invalid) PC values. */
u.user.eip = (unsigned long) handler_fail;
#endif /* __i386__ */
#ifdef __powerpc__
/* These fields are constrained. */
u.user.msr = u2.user.msr;
# ifdef __powerpc64__
u.user.softe = u2.user.softe;
# else
u.user.mq = u2.user.mq;
# endif /* __powerpc64__ */
u.user.trap = u2.user.trap;
u.user.dar = u2.user.dar;
u.user.dsisr = u2.user.dsisr;
u.user.result = u2.user.result;
#endif /* __powerpc__ */
/* Poke U. */
# ifdef __sparc__
l = ptrace (PTRACE_SETREGS, child, &u.user, NULL);
# else
l = ptrace (PTRACE_SETREGS, child, NULL, &u.user);
# endif
assert (l == 0);
/* Peek into U2. */
# ifdef __sparc__
l = ptrace (PTRACE_GETREGS, child, &u2.user, NULL);
# else
l = ptrace (PTRACE_GETREGS, child, NULL, &u2.user);
# endif
assert (l == 0);
/* Verify it matches. */
if (memcmp (&u.user, &u2.user, sizeof u.byte) != 0)
{
for (start = 0; start + REGALIGN <= sizeof u.byte; start += REGALIGN)
if (*(unsigned long *) (u.byte + start)
!= *(unsigned long *) (u2.byte + start))
printf ("\
mismatch at offset %#x: SETREGS wrote %lx GETREGS read %lx\n",
start, *(unsigned long *) (u.byte + start),
*(unsigned long *) (u2.byte + start));
return 1;
}
/* Reverse the pattern. */
for (i = 0; i < sizeof u.byte; i++)
u.byte[i] ^= -1;
#ifdef __x86_64__
/* non-EFLAGS modifications fail with EIO, EFLAGS gets back different. */
u.user.eflags = u2.user.eflags;
u.user.cs = u2.user.cs;
u.user.ds = u2.user.ds;
u.user.es = u2.user.es;
u.user.fs = u2.user.fs;
u.user.gs = u2.user.gs;
u.user.ss = u2.user.ss;
u.user.fs_base = u2.user.fs_base;
u.user.gs_base = u2.user.gs_base;
/* RHEL-4 refuses to set too high (and invalid) PC values. */
u.user.rip = (unsigned long) handler_fail;
/* 2.6.25 always truncates and sign-extends orig_rax. */
u.user.orig_rax = (int) u.user.orig_rax;
#endif /* __x86_64__ */
#ifdef __i386__
/* These values get back different. */
u.user.xds = u2.user.xds;
u.user.xes = u2.user.xes;
u.user.xfs = u2.user.xfs;
u.user.xgs = u2.user.xgs;
u.user.xcs = u2.user.xcs;
u.user.eflags = u2.user.eflags;
u.user.xss = u2.user.xss;
/* RHEL-4 refuses to set too high (and invalid) PC values. */
u.user.eip = (unsigned long) handler_fail;
#endif /* __i386__ */
#ifdef __powerpc__
/* These fields are constrained. */
u.user.msr = u2.user.msr;
# ifdef __powerpc64__
u.user.softe = u2.user.softe;
# else
u.user.mq = u2.user.mq;
# endif /* __powerpc64__ */
u.user.trap = u2.user.trap;
u.user.dar = u2.user.dar;
u.user.dsisr = u2.user.dsisr;
u.user.result = u2.user.result;
#endif /* __powerpc__ */
/* Poke U. */
# ifdef __sparc__
l = ptrace (PTRACE_SETREGS, child, &u.user, NULL);
# else
l = ptrace (PTRACE_SETREGS, child, NULL, &u.user);
# endif
assert (l == 0);
/* Peek into U2. */
# ifdef __sparc__
l = ptrace (PTRACE_GETREGS, child, &u2.user, NULL);
# else
l = ptrace (PTRACE_GETREGS, child, NULL, &u2.user);
# endif
assert (l == 0);
/* Verify it matches. */
if (memcmp (&u.user, &u2.user, sizeof u.byte) != 0)
{
for (start = 0; start + REGALIGN <= sizeof u.byte; start += REGALIGN)
if (*(unsigned long *) (u.byte + start)
!= *(unsigned long *) (u2.byte + start))
printf ("\
mismatch at offset %#x: SETREGS wrote %lx GETREGS read %lx\n",
start, *(unsigned long *) (u.byte + start),
*(unsigned long *) (u2.byte + start));
return 1;
}
/* Now try poking arbitrary ranges and verifying it reads back right.
We expect the U area is already a random enough pattern. */
for (start = 0; start + REGALIGN <= sizeof u.byte; start += REGALIGN)
{
for (i = start; i < start + REGALIGN; i++)
u.byte[i]++;
#ifdef __x86_64__
/* non-EFLAGS modifications fail with EIO, EFLAGS gets back different. */
u.user.eflags = u2.user.eflags;
u.user.cs = u2.user.cs;
u.user.ds = u2.user.ds;
u.user.es = u2.user.es;
u.user.fs = u2.user.fs;
u.user.gs = u2.user.gs;
u.user.ss = u2.user.ss;
u.user.fs_base = u2.user.fs_base;
u.user.gs_base = u2.user.gs_base;
/* RHEL-4 refuses to set too high (and invalid) PC values. */
u.user.rip = (unsigned long) handler_fail;
/* 2.6.25 always truncates and sign-extends orig_rax. */
u.user.orig_rax = (int) u.user.orig_rax;
#endif /* __x86_64__ */
#ifdef __i386__
/* These values get back different. */
u.user.xds = u2.user.xds;
u.user.xes = u2.user.xes;
u.user.xfs = u2.user.xfs;
u.user.xgs = u2.user.xgs;
u.user.xcs = u2.user.xcs;
u.user.eflags = u2.user.eflags;
u.user.xss = u2.user.xss;
/* RHEL-4 refuses to set too high (and invalid) PC values. */
u.user.eip = (unsigned long) handler_fail;
#endif /* __i386__ */
#ifdef __powerpc__
/* These fields are constrained. */
u.user.msr = u2.user.msr;
# ifdef __powerpc64__
u.user.softe = u2.user.softe;
# else
u.user.mq = u2.user.mq;
# endif /* __powerpc64__ */
u.user.trap = u2.user.trap;
u.user.dar = u2.user.dar;
u.user.dsisr = u2.user.dsisr;
u.user.result = u2.user.result;
if (start > offsetof (struct pt_regs, ccr))
break;
#endif /* __powerpc__ */
/* Poke U. */
l = ptrace (PTRACE_POKEUSER, child, (void *) (unsigned long) start,
(void *) *(unsigned long *) (u.byte + start));
if (l != 0)
error (1, errno, "PTRACE_POKEUSER at %x", start);
/* Peek into U2. */
# ifdef __sparc__
l = ptrace (PTRACE_GETREGS, child, &u2.user, NULL);
# else
l = ptrace (PTRACE_GETREGS, child, NULL, &u2.user);
# endif
assert (l == 0);
/* Verify it matches. */
if (memcmp (&u.user, &u2.user, sizeof u.byte) != 0)
{
printf ("mismatch at offset %#x: poked %lx but GETREGS read %lx\n",
start, *(unsigned long *) (u.byte + start),
*(unsigned long *) (u2.byte + start));
return 1;
}
}
/* Now try peeking arbitrary ranges and verifying it is the same.
We expect the U area is already a random enough pattern. */
for (start = 0; start + REGALIGN <= sizeof u.byte; start += REGALIGN)
{
/* Peek for the U comparation. */
errno = 0;
l = ptrace (PTRACE_PEEKUSER, child, (void *) (unsigned long) start,
NULL);
assert_perror (errno);
/* Verify it matches. */
if (*(unsigned long *) (u.byte + start) != l)
{
printf ("mismatch at offset %#x: poked %lx but peeked %lx\n",
start, *(unsigned long *) (u.byte + start), l);
return 1;
}
}
return 0;
}
#endif /* PTRACE_SETREGS */
^ permalink raw reply
* [PATCH v3 2/2] powerpc/ptrace: Hard wire PT_SOFTE value to 1 in gpr_get() too
From: Oleg Nesterov @ 2020-11-19 16:02 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Madhavan Srinivasan, Michael Ellerman,
Paul Mackerras
Cc: Christophe Leroy, Aneesh Kumar K.V, linux-kernel, Nicholas Piggin,
Jan Kratochvil, Al Viro, linuxppc-dev
In-Reply-To: <20201119160154.GA5183@redhat.com>
The commit a8a4b03ab95f ("powerpc: Hard wire PT_SOFTE value to 1 in
ptrace & signals") changed ptrace_get_reg(PT_SOFTE) to report 0x1,
but PTRACE_GETREGS still copies pt_regs->softe as is.
This is not consistent and this breaks the user-regs-peekpoke test
from https://sourceware.org/systemtap/wiki/utrace/tests/
Reported-by: Jan Kratochvil <jan.kratochvil@redhat.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
arch/powerpc/kernel/ptrace/ptrace-tm.c | 8 +++++++-
arch/powerpc/kernel/ptrace/ptrace-view.c | 8 +++++++-
2 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/kernel/ptrace/ptrace-tm.c b/arch/powerpc/kernel/ptrace/ptrace-tm.c
index f8fcbd85d4cb..d0d339f86e61 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-tm.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-tm.c
@@ -87,6 +87,10 @@ int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset,
struct membuf to)
{
struct membuf to_msr = membuf_at(&to, offsetof(struct pt_regs, msr));
+#ifdef CONFIG_PPC64
+ struct membuf to_softe = membuf_at(&to,
+ offsetof(struct pt_regs, softe));
+#endif
if (!cpu_has_feature(CPU_FTR_TM))
return -ENODEV;
@@ -102,7 +106,9 @@ int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset,
sizeof(struct user_pt_regs));
membuf_store(&to_msr, get_user_ckpt_msr(target));
-
+#ifdef CONFIG_PPC64
+ membuf_store(&to_softe, 0x1ul);
+#endif
return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) -
sizeof(struct user_pt_regs));
}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c
index 39686ede40b3..f554ccfcbfae 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-view.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-view.c
@@ -218,6 +218,10 @@ static int gpr_get(struct task_struct *target, const struct user_regset *regset,
struct membuf to)
{
struct membuf to_msr = membuf_at(&to, offsetof(struct pt_regs, msr));
+#ifdef CONFIG_PPC64
+ struct membuf to_softe = membuf_at(&to,
+ offsetof(struct pt_regs, softe));
+#endif
int i;
if (target->thread.regs == NULL)
@@ -233,7 +237,9 @@ static int gpr_get(struct task_struct *target, const struct user_regset *regset,
sizeof(struct user_pt_regs));
membuf_store(&to_msr, get_user_msr(target));
-
+#ifdef CONFIG_PPC64
+ membuf_store(&to_softe, 0x1ul);
+#endif
return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) -
sizeof(struct user_pt_regs));
}
--
2.25.1.362.g51ebf55
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox