* [PATCH v14 16/22] selftests/vm: fix an assertion in test_pkey_alloc_exhaust()
From: Ram Pai @ 2018-07-17 13:49 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1531835365-32387-1-git-send-email-linuxram@us.ibm.com>
The maximum number of keys that can be allocated has to
take into consideration, that some keys are reserved by
the architecture for specific purpose. Hence cannot
be allocated.
Fix the assertion in test_pkey_alloc_exhaust()
cc: Dave Hansen <dave.hansen@intel.com>
cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
---
tools/testing/selftests/vm/protection_keys.c | 12 ++++--------
1 files changed, 4 insertions(+), 8 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index d27fa5e..67d841e 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -1171,15 +1171,11 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
pkey_assert(i < NR_PKEYS*2);
/*
- * There are 16 pkeys supported in hardware. Three are
- * allocated by the time we get here:
- * 1. The default key (0)
- * 2. One possibly consumed by an execute-only mapping.
- * 3. One allocated by the test code and passed in via
- * 'pkey' to this function.
- * Ensure that we can allocate at least another 13 (16-3).
+ * There are NR_PKEYS pkeys supported in hardware. arch_reserved_keys()
+ * are reserved. And one key is allocated by the test code and passed
+ * in via 'pkey' to this function.
*/
- pkey_assert(i >= NR_PKEYS-3);
+ pkey_assert(i >= (NR_PKEYS-arch_reserved_keys()-1));
for (i = 0; i < nr_allocated_pkeys; i++) {
err = sys_pkey_free(allocated_pkeys[i]);
--
1.7.1
^ permalink raw reply related
* [PATCH v14 15/22] selftests/vm: powerpc implementation to check support for pkey
From: Ram Pai @ 2018-07-17 13:49 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1531835365-32387-1-git-send-email-linuxram@us.ibm.com>
pkey subsystem is supported if the hardware and kernel has support.
We determine that by checking if allocation of a key succeeds or not.
cc: Dave Hansen <dave.hansen@intel.com>
cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
---
tools/testing/selftests/vm/pkey-helpers.h | 2 ++
tools/testing/selftests/vm/pkey-powerpc.h | 14 ++++++++++++--
tools/testing/selftests/vm/pkey-x86.h | 8 ++++----
tools/testing/selftests/vm/protection_keys.c | 9 +++++----
4 files changed, 23 insertions(+), 10 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h
index 321bbbd..288ccff 100644
--- a/tools/testing/selftests/vm/pkey-helpers.h
+++ b/tools/testing/selftests/vm/pkey-helpers.h
@@ -76,6 +76,8 @@ static inline void sigsafe_printf(const char *format, ...)
__attribute__((noinline)) int read_ptr(int *ptr);
void expected_pkey_fault(int pkey);
+int sys_pkey_alloc(unsigned long flags, u64 init_val);
+int sys_pkey_free(unsigned long pkey);
#if defined(__i386__) || defined(__x86_64__) /* arch */
#include "pkey-x86.h"
diff --git a/tools/testing/selftests/vm/pkey-powerpc.h b/tools/testing/selftests/vm/pkey-powerpc.h
index b0d1abe..b649e85 100644
--- a/tools/testing/selftests/vm/pkey-powerpc.h
+++ b/tools/testing/selftests/vm/pkey-powerpc.h
@@ -64,9 +64,19 @@ static inline void __write_pkey_reg(pkey_reg_t pkey_reg)
pkey_reg);
}
-static inline int cpu_has_pku(void)
+static inline bool is_pkey_supported(void)
{
- return 1;
+ /*
+ * No simple way to determine this.
+ * Lets try allocating a key and see if it succeeds.
+ */
+ int ret = sys_pkey_alloc(0, 0);
+
+ if (ret > 0) {
+ sys_pkey_free(ret);
+ return true;
+ }
+ return false;
}
static inline int arch_reserved_keys(void)
diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h
index f5d0ff2..887acf2 100644
--- a/tools/testing/selftests/vm/pkey-x86.h
+++ b/tools/testing/selftests/vm/pkey-x86.h
@@ -105,7 +105,7 @@ static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */
#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */
-static inline int cpu_has_pku(void)
+static inline bool is_pkey_supported(void)
{
unsigned int eax;
unsigned int ebx;
@@ -118,13 +118,13 @@ static inline int cpu_has_pku(void)
if (!(ecx & X86_FEATURE_PKU)) {
dprintf2("cpu does not have PKU\n");
- return 0;
+ return false;
}
if (!(ecx & X86_FEATURE_OSPKE)) {
dprintf2("cpu does not have OSPKE\n");
- return 0;
+ return false;
}
- return 1;
+ return true;
}
#define XSTATE_PKEY_BIT (9)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index 18e1bb7..d27fa5e 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -1389,8 +1389,8 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
int size = PAGE_SIZE;
int sret;
- if (cpu_has_pku()) {
- dprintf1("SKIP: %s: no CPU support\n", __func__);
+ if (is_pkey_supported()) {
+ dprintf1("SKIP: %s: no CPU/kernel support\n", __func__);
return;
}
@@ -1454,12 +1454,13 @@ void run_tests_once(void)
int main(void)
{
int nr_iterations = 22;
+ int pkey_supported = is_pkey_supported();
setup_handlers();
- printf("has pkey: %d\n", cpu_has_pku());
+ printf("has pkey: %s\n", pkey_supported ? "Yes" : "No");
- if (!cpu_has_pku()) {
+ if (!pkey_supported) {
int size = PAGE_SIZE;
int *ptr;
--
1.7.1
^ permalink raw reply related
* [PATCH v14 14/22] selftests/vm: Introduce generic abstractions
From: Ram Pai @ 2018-07-17 13:49 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1531835365-32387-1-git-send-email-linuxram@us.ibm.com>
Introduce generic abstractions and provide architecture
specific implementation for the abstractions.
cc: Dave Hansen <dave.hansen@intel.com>
cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
---
tools/testing/selftests/vm/pkey-helpers.h | 16 ++++-
tools/testing/selftests/vm/pkey-powerpc.h | 93 ++++++++++++++++++++++++++
tools/testing/selftests/vm/pkey-x86.h | 15 ++++
tools/testing/selftests/vm/protection_keys.c | 52 ++++++++------
4 files changed, 153 insertions(+), 23 deletions(-)
create mode 100644 tools/testing/selftests/vm/pkey-powerpc.h
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h
index 52a1152..321bbbd 100644
--- a/tools/testing/selftests/vm/pkey-helpers.h
+++ b/tools/testing/selftests/vm/pkey-helpers.h
@@ -74,8 +74,13 @@ static inline void sigsafe_printf(const char *format, ...)
} \
} while (0)
+__attribute__((noinline)) int read_ptr(int *ptr);
+void expected_pkey_fault(int pkey);
+
#if defined(__i386__) || defined(__x86_64__) /* arch */
#include "pkey-x86.h"
+#elif defined(__powerpc64__) /* arch */
+#include "pkey-powerpc.h"
#else /* arch */
#error Architecture not supported
#endif /* arch */
@@ -186,7 +191,16 @@ static inline int open_hugepage_file(int flag)
static inline int get_start_key(void)
{
- return 1;
+ return 0;
+}
+
+static inline u32 *siginfo_get_pkey_ptr(siginfo_t *si)
+{
+#ifdef si_pkey
+ return &si->si_pkey;
+#else
+ return (u32 *)(((u8 *)si) + si_pkey_offset);
+#endif
}
#endif /* _PKEYS_HELPER_H */
diff --git a/tools/testing/selftests/vm/pkey-powerpc.h b/tools/testing/selftests/vm/pkey-powerpc.h
new file mode 100644
index 0000000..b0d1abe
--- /dev/null
+++ b/tools/testing/selftests/vm/pkey-powerpc.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _PKEYS_POWERPC_H
+#define _PKEYS_POWERPC_H
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key 386
+#endif
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc 384
+# define SYS_pkey_free 385
+#endif
+#define REG_IP_IDX PT_NIP
+#define REG_TRAPNO PT_TRAP
+#define gregs gp_regs
+#define fpregs fp_regs
+#define si_pkey_offset 0x20
+
+#ifndef PKEY_DISABLE_ACCESS
+# define PKEY_DISABLE_ACCESS 0x3 /* disable read and write */
+#endif
+
+#ifndef PKEY_DISABLE_WRITE
+# define PKEY_DISABLE_WRITE 0x2
+#endif
+
+#define NR_PKEYS 32
+#define NR_RESERVED_PKEYS_4K 27 /* pkey-0, pkey-1, exec-only-pkey
+ and 24 other keys that cannot be
+ represented in the PTE */
+#define NR_RESERVED_PKEYS_64K 3 /* pkey-0, pkey-1 and exec-only-pkey */
+#define PKEY_BITS_PER_PKEY 2
+#define HPAGE_SIZE (1UL << 24)
+#define PAGE_SIZE (1UL << 16)
+#define pkey_reg_t u64
+#define PKEY_REG_FMT "%016lx"
+#define HUGEPAGE_FILE "/sys/kernel/mm/hugepages/hugepages-16384kB/nr_hugepages"
+
+static inline u32 pkey_bit_position(int pkey)
+{
+ return (NR_PKEYS - pkey - 1) * PKEY_BITS_PER_PKEY;
+}
+
+static inline pkey_reg_t __read_pkey_reg(void)
+{
+ pkey_reg_t pkey_reg;
+
+ asm volatile("mfspr %0, 0xd" : "=r" (pkey_reg));
+
+ return pkey_reg;
+}
+
+static inline void __write_pkey_reg(pkey_reg_t pkey_reg)
+{
+ pkey_reg_t eax = pkey_reg;
+
+ dprintf4("%s() changing "PKEY_REG_FMT" to "PKEY_REG_FMT"\n",
+ __func__, __read_pkey_reg(), pkey_reg);
+
+ asm volatile("mtspr 0xd, %0" : : "r" ((unsigned long)(eax)) : "memory");
+
+ dprintf4("%s() pkey register after changing "PKEY_REG_FMT" to "
+ PKEY_REG_FMT"\n", __func__, __read_pkey_reg(),
+ pkey_reg);
+}
+
+static inline int cpu_has_pku(void)
+{
+ return 1;
+}
+
+static inline int arch_reserved_keys(void)
+{
+ if (sysconf(_SC_PAGESIZE) == 4096)
+ return NR_RESERVED_PKEYS_4K;
+ else
+ return NR_RESERVED_PKEYS_64K;
+}
+
+void expect_fault_on_read_execonly_key(void *p1, int pkey)
+{
+ /* powerpc does not allow userspace to change permissions of exec-only
+ * keys since those keys are not allocated by userspace. The signal
+ * handler wont be able to reset the permissions, which means the code
+ * will infinitely continue to segfault here.
+ */
+ return;
+}
+
+/* 8-bytes of instruction * 16384bytes = 1 page */
+#define __page_o_noops() asm(".rept 16384 ; nop; .endr")
+
+#endif /* _PKEYS_POWERPC_H */
diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h
index d5fa299..f5d0ff2 100644
--- a/tools/testing/selftests/vm/pkey-x86.h
+++ b/tools/testing/selftests/vm/pkey-x86.h
@@ -42,6 +42,7 @@
#endif
#define NR_PKEYS 16
+#define NR_RESERVED_PKEYS 2 /* pkey-0 and exec-only-pkey */
#define PKEY_BITS_PER_PKEY 2
#define HPAGE_SIZE (1UL<<21)
#define PAGE_SIZE 4096
@@ -161,4 +162,18 @@ int pkey_reg_xstate_offset(void)
return xstate_offset;
}
+static inline int arch_reserved_keys(void)
+{
+ return NR_RESERVED_PKEYS;
+}
+
+void expect_fault_on_read_execonly_key(void *p1, int pkey)
+{
+ int ptr_contents;
+
+ ptr_contents = read_ptr(p1);
+ dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+ expected_pkey_fault(pkey);
+}
+
#endif /* _PKEYS_X86_H */
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index 304f74f..18e1bb7 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -197,17 +197,18 @@ void dump_mem(void *dumpme, int len_bytes)
int pkey_faults;
int last_si_pkey = -1;
+void pkey_access_allow(int pkey);
void signal_handler(int signum, siginfo_t *si, void *vucontext)
{
ucontext_t *uctxt = vucontext;
int trapno;
unsigned long ip;
char *fpregs;
+#if defined(__i386__) || defined(__x86_64__) /* arch */
pkey_reg_t *pkey_reg_ptr;
- u64 siginfo_pkey;
+#endif /* defined(__i386__) || defined(__x86_64__) */
+ u32 siginfo_pkey;
u32 *si_pkey_ptr;
- int pkey_reg_offset;
- fpregset_t fpregset;
dprint_in_signal = 1;
dprintf1(">>>>===============SIGSEGV============================\n");
@@ -217,12 +218,14 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
- fpregset = uctxt->uc_mcontext.fpregs;
- fpregs = (void *)fpregset;
+ fpregs = (char *) uctxt->uc_mcontext.fpregs;
dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n",
__func__, trapno, ip, si_code_str(si->si_code),
si->si_code);
+
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+
#ifdef __i386__
/*
* 32-bit has some extra padding so that userspace can tell whether
@@ -230,20 +233,21 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
* state. We just assume that it is here.
*/
fpregs += 0x70;
-#endif
- pkey_reg_offset = pkey_reg_xstate_offset();
- pkey_reg_ptr = (void *)(&fpregs[pkey_reg_offset]);
+#endif /* __i386__ */
- dprintf1("siginfo: %p\n", si);
- dprintf1(" fpregs: %p\n", fpregs);
+ pkey_reg_ptr = (void *)(&fpregs[pkey_reg_xstate_offset()]);
/*
- * If we got a PKEY fault, we *HAVE* to have at least one bit set in
+ * If we got a key fault, we *HAVE* to have at least one bit set in
* here.
*/
dprintf1("pkey_reg_xstate_offset: %d\n", pkey_reg_xstate_offset());
if (DEBUG_LEVEL > 4)
dump_mem(pkey_reg_ptr - 128, 256);
pkey_assert(*pkey_reg_ptr);
+#endif /* defined(__i386__) || defined(__x86_64__) */
+
+ dprintf1("siginfo: %p\n", si);
+ dprintf1(" fpregs: %p\n", fpregs);
if ((si->si_code == SEGV_MAPERR) ||
(si->si_code == SEGV_ACCERR) ||
@@ -252,22 +256,28 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
exit(4);
}
- si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
+ si_pkey_ptr = siginfo_get_pkey_ptr(si);
dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
- dump_mem((u8 *)si_pkey_ptr - 8, 24);
+ dump_mem(si_pkey_ptr - 8, 24);
siginfo_pkey = *si_pkey_ptr;
pkey_assert(siginfo_pkey < NR_PKEYS);
last_si_pkey = siginfo_pkey;
- dprintf1("signal pkey_reg from xsave: "PKEY_REG_FMT"\n", *pkey_reg_ptr);
/*
* need __read_pkey_reg() version so we do not do shadow_pkey_reg
* checking
*/
dprintf1("signal pkey_reg from pkey_reg: "PKEY_REG_FMT"\n",
__read_pkey_reg());
- dprintf1("pkey from siginfo: %jx\n", siginfo_pkey);
- *(u64 *)pkey_reg_ptr = 0x00000000;
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+ dprintf1("signal pkey_reg from xsave: "PKEY_REG_FMT"\n", *pkey_reg_ptr);
+ *(u64 *)pkey_reg_ptr &= clear_pkey_flags(siginfo_pkey,
+ PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE);
+#elif __powerpc64__
+ pkey_access_allow(siginfo_pkey);
+#endif
+ shadow_pkey_reg &= clear_pkey_flags(siginfo_pkey,
+ PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE);
dprintf1("WARNING: set PKEY_REG=0 to allow faulting instruction "
"to continue\n");
pkey_faults++;
@@ -1328,9 +1338,8 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
madvise(p1, PAGE_SIZE, MADV_DONTNEED);
lots_o_noops_around_write(&scratch);
do_not_expect_pkey_fault("executing on PROT_EXEC memory");
- ptr_contents = read_ptr(p1);
- dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
- expected_pkey_fault(pkey);
+
+ expect_fault_on_read_execonly_key(p1, pkey);
}
void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
@@ -1357,9 +1366,8 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
madvise(p1, PAGE_SIZE, MADV_DONTNEED);
lots_o_noops_around_write(&scratch);
do_not_expect_pkey_fault("executing on PROT_EXEC memory");
- ptr_contents = read_ptr(p1);
- dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
- expected_pkey_fault(UNKNOWN_PKEY);
+
+ expect_fault_on_read_execonly_key(p1, UNKNOWN_PKEY);
/*
* Put the memory back to non-PROT_EXEC. Should clear the
--
1.7.1
^ permalink raw reply related
* [PATCH v14 13/22] selftests/vm: generic cleanup
From: Ram Pai @ 2018-07-17 13:49 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1531835365-32387-1-git-send-email-linuxram@us.ibm.com>
cleanup the code to satisfy coding styles.
cc: Dave Hansen <dave.hansen@intel.com>
cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
---
tools/testing/selftests/vm/protection_keys.c | 64 +++++++++++++++++--------
1 files changed, 43 insertions(+), 21 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index f50cce8..304f74f 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -4,7 +4,7 @@
*
* There are examples in here of:
* * how to set protection keys on memory
- * * how to set/clear bits in pkey registers (the rights register)
+ * * how to set/clear bits in Protection Key registers (the rights register)
* * how to handle SEGV_PKUERR signals and extract pkey-relevant
* information from the siginfo
*
@@ -13,13 +13,18 @@
* prefault pages in at malloc, or not
* protect MPX bounds tables with protection keys?
* make sure VMA splitting/merging is working correctly
- * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys
- * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel
- * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks
+ * OOMs can destroy mm->mmap (see exit_mmap()),
+ * so make sure it is immune to pkeys
+ * look for pkey "leaks" where it is still set on a VMA
+ * but "freed" back to the kernel
+ * do a plain mprotect() to a mprotect_pkey() area and make
+ * sure the pkey sticks
*
* Compile like this:
- * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
- * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
+ * gcc -o protection_keys -O2 -g -std=gnu99
+ * -pthread -Wall protection_keys.c -lrt -ldl -lm
+ * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99
+ * -pthread -Wall protection_keys.c -lrt -ldl -lm
*/
#define _GNU_SOURCE
#include <errno.h>
@@ -263,10 +268,12 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
__read_pkey_reg());
dprintf1("pkey from siginfo: %jx\n", siginfo_pkey);
*(u64 *)pkey_reg_ptr = 0x00000000;
- dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
+ dprintf1("WARNING: set PKEY_REG=0 to allow faulting instruction "
+ "to continue\n");
pkey_faults++;
dprintf1("<<<<==================================================\n");
dprint_in_signal = 0;
+ return;
}
int wait_all_children(void)
@@ -384,7 +391,7 @@ void pkey_disable_set(int pkey, int flags)
{
unsigned long syscall_flags = 0;
int ret;
- int pkey_rights;
+ u32 pkey_rights;
pkey_reg_t orig_pkey_reg = read_pkey_reg();
dprintf1("START->%s(%d, 0x%x)\n", __func__,
@@ -487,9 +494,10 @@ int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
return sret;
}
-int sys_pkey_alloc(unsigned long flags, unsigned long init_val)
+int sys_pkey_alloc(unsigned long flags, u64 init_val)
{
int ret = syscall(SYS_pkey_alloc, flags, init_val);
+
dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n",
__func__, flags, init_val, ret, errno);
return ret;
@@ -513,7 +521,7 @@ void pkey_set_shadow(u32 key, u64 init_val)
int alloc_pkey(void)
{
int ret;
- unsigned long init_val = 0x0;
+ u64 init_val = 0x0;
dprintf1("%s()::%d, pkey_reg: "PKEY_REG_FMT" shadow: "PKEY_REG_FMT"\n",
__func__, __LINE__, __read_pkey_reg(), shadow_pkey_reg);
@@ -669,7 +677,9 @@ void record_pkey_malloc(void *ptr, long size, int prot)
/* every record is full */
size_t old_nr_records = nr_pkey_malloc_records;
size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1);
- size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record);
+ size_t new_size = new_nr_records *
+ sizeof(struct pkey_malloc_record);
+
dprintf2("new_nr_records: %zd\n", new_nr_records);
dprintf2("new_size: %zd\n", new_size);
pkey_malloc_records = realloc(pkey_malloc_records, new_size);
@@ -695,9 +705,11 @@ void free_pkey_malloc(void *ptr)
{
long i;
int ret;
+
dprintf3("%s(%p)\n", __func__, ptr);
for (i = 0; i < nr_pkey_malloc_records; i++) {
struct pkey_malloc_record *rec = &pkey_malloc_records[i];
+
dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n",
ptr, i, rec, rec->ptr, rec->size);
if ((ptr < rec->ptr) ||
@@ -778,11 +790,13 @@ void setup_hugetlbfs(void)
char buf[] = "123";
if (geteuid() != 0) {
- fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n");
+ fprintf(stderr,
+ "WARNING: not run as root, can not do hugetlb test\n");
return;
}
- cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages");
+ cat_into_file(__stringify(GET_NR_HUGE_PAGES),
+ "/proc/sys/vm/nr_hugepages");
/*
* Now go make sure that we got the pages and that they
@@ -803,7 +817,8 @@ void setup_hugetlbfs(void)
}
if (atoi(buf) != GET_NR_HUGE_PAGES) {
- fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n",
+ fprintf(stderr, "could not confirm 2M pages, got:"
+ " '%s' expected %d\n",
buf, GET_NR_HUGE_PAGES);
return;
}
@@ -945,6 +960,7 @@ void __save_test_fd(int fd)
int get_test_read_fd(void)
{
int test_fd = open("/etc/passwd", O_RDONLY);
+
__save_test_fd(test_fd);
return test_fd;
}
@@ -986,7 +1002,8 @@ void test_read_of_access_disabled_region(int *ptr, u16 pkey)
{
int ptr_contents;
- dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr);
+ dprintf1("disabling access to PKEY[%02d], doing read @ %p\n",
+ pkey, ptr);
read_pkey_reg();
pkey_access_deny(pkey);
ptr_contents = read_ptr(ptr);
@@ -1108,13 +1125,14 @@ void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
/* Assumes that all pkeys other than 'pkey' are unallocated */
void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
{
- int err;
+ int err = 0;
int allocated_pkeys[NR_PKEYS] = {0};
int nr_allocated_pkeys = 0;
int i;
for (i = 0; i < NR_PKEYS*2; i++) {
int new_pkey;
+
dprintf1("%s() alloc loop: %d\n", __func__, i);
new_pkey = alloc_pkey();
dprintf4("%s()::%d, err: %d pkey_reg: 0x"PKEY_REG_FMT
@@ -1122,9 +1140,11 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
__func__, __LINE__, err, __read_pkey_reg(),
shadow_pkey_reg);
read_pkey_reg(); /* for shadow checking */
- dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC);
+ dprintf2("%s() errno: %d ENOSPC: %d\n",
+ __func__, errno, ENOSPC);
if ((new_pkey == -1) && (errno == ENOSPC)) {
- dprintf2("%s() failed to allocate pkey after %d tries\n",
+ dprintf2("%s() failed to allocate pkey "
+ "after %d tries\n",
__func__, nr_allocated_pkeys);
break;
}
@@ -1416,7 +1436,8 @@ void run_tests_once(void)
tracing_off();
close_test_fds();
- printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr);
+ printf("test %2d PASSED (iteration %d)\n",
+ test_nr, iteration_nr);
dprintf1("======================\n\n");
}
iteration_nr++;
@@ -1428,7 +1449,7 @@ int main(void)
setup_handlers();
- printf("has pku: %d\n", cpu_has_pku());
+ printf("has pkey: %d\n", cpu_has_pku());
if (!cpu_has_pku()) {
int size = PAGE_SIZE;
@@ -1436,7 +1457,8 @@ int main(void)
printf("running PKEY tests for unsupported CPU/OS\n");
- ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ ptr = mmap(NULL, size, PROT_NONE,
+ MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
assert(ptr != (void *)-1);
test_mprotect_pkey_on_unsupported_cpu(ptr, 1);
exit(0);
--
1.7.1
^ permalink raw reply related
* [PATCH v14 12/22] selftests/vm: pkey register should match shadow pkey
From: Ram Pai @ 2018-07-17 13:49 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1531835365-32387-1-git-send-email-linuxram@us.ibm.com>
expected_pkey_fault() is comparing the contents of pkey
register with 0. This may not be true all the time. There
could be bits set by default by the architecture
which can never be changed. Hence compare the value against
shadow pkey register, which is supposed to track the bits
accurately all throughout
cc: Dave Hansen <dave.hansen@intel.com>
cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
---
tools/testing/selftests/vm/protection_keys.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index 2e448e0..f50cce8 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -913,10 +913,10 @@ void expected_pkey_fault(int pkey)
pkey_assert(last_si_pkey == pkey);
/*
- * The signal handler shold have cleared out PKEY register to let the
+ * The signal handler should have cleared out pkey-register to let the
* test program continue. We now have to restore it.
*/
- if (__read_pkey_reg() != 0)
+ if (__read_pkey_reg() != shadow_pkey_reg)
pkey_assert(0);
__write_pkey_reg(shadow_pkey_reg);
--
1.7.1
^ permalink raw reply related
* [PATCH v14 11/22] selftests/vm: introduce two arch independent abstraction
From: Ram Pai @ 2018-07-17 13:49 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1531835365-32387-1-git-send-email-linuxram@us.ibm.com>
open_hugepage_file() <- opens the huge page file
get_start_key() <-- provides the first non-reserved key.
cc: Dave Hansen <dave.hansen@intel.com>
cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
---
tools/testing/selftests/vm/pkey-helpers.h | 10 ++++++++++
tools/testing/selftests/vm/pkey-x86.h | 1 +
tools/testing/selftests/vm/protection_keys.c | 6 +++---
3 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h
index ada0146..52a1152 100644
--- a/tools/testing/selftests/vm/pkey-helpers.h
+++ b/tools/testing/selftests/vm/pkey-helpers.h
@@ -179,4 +179,14 @@ static inline void __pkey_write_allow(int pkey, int do_allow_write)
#define __stringify_1(x...) #x
#define __stringify(x...) __stringify_1(x)
+static inline int open_hugepage_file(int flag)
+{
+ return open(HUGEPAGE_FILE, flag);
+}
+
+static inline int get_start_key(void)
+{
+ return 1;
+}
+
#endif /* _PKEYS_HELPER_H */
diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h
index 2b3780d..d5fa299 100644
--- a/tools/testing/selftests/vm/pkey-x86.h
+++ b/tools/testing/selftests/vm/pkey-x86.h
@@ -48,6 +48,7 @@
#define MB (1<<20)
#define pkey_reg_t u32
#define PKEY_REG_FMT "%016x"
+#define HUGEPAGE_FILE "/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages"
static inline u32 pkey_bit_position(int pkey)
{
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index 2565b4c..2e448e0 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -788,7 +788,7 @@ void setup_hugetlbfs(void)
* Now go make sure that we got the pages and that they
* are 2M pages. Someone might have made 1G the default.
*/
- fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY);
+ fd = open_hugepage_file(O_RDONLY);
if (fd < 0) {
perror("opening sysfs 2M hugetlb config");
return;
@@ -1075,10 +1075,10 @@ void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey)
void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
{
int err;
- int i;
+ int i = get_start_key();
/* Note: 0 is the default pkey, so don't mess with it */
- for (i = 1; i < NR_PKEYS; i++) {
+ for (; i < NR_PKEYS; i++) {
if (pkey == i)
continue;
--
1.7.1
^ permalink raw reply related
* [PATCH v14 10/22] selftests/vm: fix alloc_random_pkey() to make it really random
From: Ram Pai @ 2018-07-17 13:49 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1531835365-32387-1-git-send-email-linuxram@us.ibm.com>
alloc_random_pkey() was allocating the same pkey every time.
Not all pkeys were geting tested. fixed it.
cc: Dave Hansen <dave.hansen@intel.com>
cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
---
tools/testing/selftests/vm/protection_keys.c | 6 +++++-
1 files changed, 5 insertions(+), 1 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index 8fa4f74..2565b4c 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -24,6 +24,7 @@
#define _GNU_SOURCE
#include <errno.h>
#include <linux/futex.h>
+#include <time.h>
#include <sys/time.h>
#include <sys/syscall.h>
#include <string.h>
@@ -573,13 +574,15 @@ int alloc_random_pkey(void)
int alloced_pkeys[NR_PKEYS];
int nr_alloced = 0;
int random_index;
+
memset(alloced_pkeys, 0, sizeof(alloced_pkeys));
+ srand((unsigned int)time(NULL));
/* allocate every possible key and make a note of which ones we got */
max_nr_pkey_allocs = NR_PKEYS;
- max_nr_pkey_allocs = 1;
for (i = 0; i < max_nr_pkey_allocs; i++) {
int new_pkey = alloc_pkey();
+
if (new_pkey < 0)
break;
alloced_pkeys[nr_alloced++] = new_pkey;
@@ -595,6 +598,7 @@ int alloc_random_pkey(void)
/* go through the allocated ones that we did not want and free them */
for (i = 0; i < nr_alloced; i++) {
int free_ret;
+
if (!alloced_pkeys[i])
continue;
free_ret = sys_pkey_free(alloced_pkeys[i]);
--
1.7.1
^ permalink raw reply related
* [PATCH v14 09/22] selftests/vm: fixed bugs in pkey_disable_clear()
From: Ram Pai @ 2018-07-17 13:49 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1531835365-32387-1-git-send-email-linuxram@us.ibm.com>
instead of clearing the bits, pkey_disable_clear() was setting
the bits. Fixed it.
Also fixed a wrong assertion in that function. When bits are
cleared, the resulting bit value will be less than the original.
This hasn't been a problem so far because this code isn't currently
used.
cc: Dave Hansen <dave.hansen@intel.com>
cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
---
tools/testing/selftests/vm/protection_keys.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index 2dd94c3..8fa4f74 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -433,7 +433,7 @@ void pkey_disable_clear(int pkey, int flags)
pkey, pkey, pkey_rights);
pkey_assert(pkey_rights >= 0);
- pkey_rights |= flags;
+ pkey_rights &= ~flags;
ret = hw_pkey_set(pkey, pkey_rights, 0);
shadow_pkey_reg &= clear_pkey_flags(pkey, flags);
@@ -446,7 +446,7 @@ void pkey_disable_clear(int pkey, int flags)
dprintf1("%s(%d) pkey_reg: 0x"PKEY_REG_FMT"\n", __func__,
pkey, read_pkey_reg());
if (flags)
- assert(read_pkey_reg() > orig_pkey_reg);
+ assert(read_pkey_reg() <= orig_pkey_reg);
}
void pkey_write_allow(int pkey)
--
1.7.1
^ permalink raw reply related
* [PATCH v14 08/22] selftests/vm: fix the wrong assert in pkey_disable_set()
From: Ram Pai @ 2018-07-17 13:49 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1531835365-32387-1-git-send-email-linuxram@us.ibm.com>
If the flag is 0, no bits will be set. Hence we cant expect
the resulting bitmap to have a higher value than what it
was earlier.
cc: Dave Hansen <dave.hansen@intel.com>
cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
---
tools/testing/selftests/vm/protection_keys.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index fb7dd32..2dd94c3 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -415,7 +415,7 @@ void pkey_disable_set(int pkey, int flags)
dprintf1("%s(%d) pkey_reg: 0x"PKEY_REG_FMT"\n",
__func__, pkey, read_pkey_reg());
if (flags)
- pkey_assert(read_pkey_reg() > orig_pkey_reg);
+ pkey_assert(read_pkey_reg() >= orig_pkey_reg);
dprintf1("END<---%s(%d, 0x%x)\n", __func__,
pkey, flags);
}
--
1.7.1
^ permalink raw reply related
* [PATCH v14 07/22] selftests/vm: generic function to handle shadow key register
From: Ram Pai @ 2018-07-17 13:49 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1531835365-32387-1-git-send-email-linuxram@us.ibm.com>
helper functions to handler shadow pkey register
cc: Dave Hansen <dave.hansen@intel.com>
cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
---
tools/testing/selftests/vm/pkey-helpers.h | 29 ++++++++++++++++++++++
tools/testing/selftests/vm/pkey-x86.h | 5 ++++
tools/testing/selftests/vm/protection_keys.c | 34 ++++++++++++++++---------
3 files changed, 56 insertions(+), 12 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h
index 2a1a024..ada0146 100644
--- a/tools/testing/selftests/vm/pkey-helpers.h
+++ b/tools/testing/selftests/vm/pkey-helpers.h
@@ -80,6 +80,35 @@ static inline void sigsafe_printf(const char *format, ...)
#error Architecture not supported
#endif /* arch */
+static inline pkey_reg_t clear_pkey_flags(int pkey, pkey_reg_t flags)
+{
+ u32 shift = pkey_bit_position(pkey);
+
+ return ~(flags << shift);
+}
+
+/*
+ * Takes pkey flags and puts them at the right bit position for the given key so
+ * that the result can be ORed into the register.
+ */
+static inline pkey_reg_t left_shift_bits(int pkey, pkey_reg_t bits)
+{
+ u32 shift = pkey_bit_position(pkey);
+
+ return (bits << shift);
+}
+
+/*
+ * Takes pkey register values and puts the flags for the given pkey at the least
+ * significant bits of the returned value.
+ */
+static inline pkey_reg_t right_shift_bits(int pkey, pkey_reg_t bits)
+{
+ u32 shift = pkey_bit_position(pkey);
+
+ return (bits >> shift);
+}
+
extern pkey_reg_t shadow_pkey_reg;
static inline pkey_reg_t _read_pkey_reg(int line)
diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h
index 5f40901..2b3780d 100644
--- a/tools/testing/selftests/vm/pkey-x86.h
+++ b/tools/testing/selftests/vm/pkey-x86.h
@@ -49,6 +49,11 @@
#define pkey_reg_t u32
#define PKEY_REG_FMT "%016x"
+static inline u32 pkey_bit_position(int pkey)
+{
+ return pkey * PKEY_BITS_PER_PKEY;
+}
+
static inline void __page_o_noops(void)
{
/* 8-bytes of instruction * 512 bytes = 1 page */
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index d95fe20..fb7dd32 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -342,7 +342,7 @@ static u32 hw_pkey_get(int pkey, unsigned long flags)
__func__, pkey, flags, 0, 0);
dprintf2("%s() raw pkey_reg: "PKEY_REG_FMT"\n", __func__, pkey_reg);
- shifted_pkey_reg = (pkey_reg >> (pkey * PKEY_BITS_PER_PKEY));
+ shifted_pkey_reg = right_shift_bits(pkey, pkey_reg);
dprintf2("%s() shifted_pkey_reg: "PKEY_REG_FMT"\n", __func__,
shifted_pkey_reg);
masked_pkey_reg = shifted_pkey_reg & mask;
@@ -366,9 +366,9 @@ static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
/* copy old pkey_reg */
new_pkey_reg = old_pkey_reg;
/* mask out bits from pkey in old value: */
- new_pkey_reg &= ~(mask << (pkey * PKEY_BITS_PER_PKEY));
+ new_pkey_reg &= clear_pkey_flags(pkey, mask);
/* OR in new bits for pkey: */
- new_pkey_reg |= (rights << (pkey * PKEY_BITS_PER_PKEY));
+ new_pkey_reg |= left_shift_bits(pkey, rights);
__write_pkey_reg(new_pkey_reg);
@@ -402,7 +402,7 @@ void pkey_disable_set(int pkey, int flags)
ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
assert(!ret);
/*pkey_reg and flags have the same format */
- shadow_pkey_reg |= flags << (pkey * 2);
+ shadow_pkey_reg |= left_shift_bits(pkey, flags);
dprintf1("%s(%d) shadow: 0x"PKEY_REG_FMT"\n",
__func__, pkey, shadow_pkey_reg);
@@ -436,7 +436,7 @@ void pkey_disable_clear(int pkey, int flags)
pkey_rights |= flags;
ret = hw_pkey_set(pkey, pkey_rights, 0);
- shadow_pkey_reg &= ~(flags << (pkey * 2));
+ shadow_pkey_reg &= clear_pkey_flags(pkey, flags);
pkey_assert(ret >= 0);
pkey_rights = hw_pkey_get(pkey, syscall_flags);
@@ -494,6 +494,21 @@ int sys_pkey_alloc(unsigned long flags, unsigned long init_val)
return ret;
}
+void pkey_setup_shadow(void)
+{
+ shadow_pkey_reg = __read_pkey_reg();
+}
+
+void pkey_reset_shadow(u32 key)
+{
+ shadow_pkey_reg &= clear_pkey_flags(key, 0x3);
+}
+
+void pkey_set_shadow(u32 key, u64 init_val)
+{
+ shadow_pkey_reg |= left_shift_bits(key, init_val);
+}
+
int alloc_pkey(void)
{
int ret;
@@ -512,7 +527,7 @@ int alloc_pkey(void)
shadow_pkey_reg);
if (ret) {
/* clear both the bits: */
- shadow_pkey_reg &= ~(0x3 << (ret * 2));
+ pkey_reset_shadow(ret);
dprintf4("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT
" shadow: 0x"PKEY_REG_FMT"\n",
__func__,
@@ -522,7 +537,7 @@ int alloc_pkey(void)
* move the new state in from init_val
* (remember, we cheated and init_val == pkey_reg format)
*/
- shadow_pkey_reg |= (init_val << (ret * 2));
+ pkey_set_shadow(ret, init_val);
}
dprintf4("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT
" shadow: 0x"PKEY_REG_FMT"\n",
@@ -1403,11 +1418,6 @@ void run_tests_once(void)
iteration_nr++;
}
-void pkey_setup_shadow(void)
-{
- shadow_pkey_reg = __read_pkey_reg();
-}
-
int main(void)
{
int nr_iterations = 22;
--
1.7.1
^ permalink raw reply related
* [PATCH v14 06/22] selftests/vm: typecast the pkey register
From: Ram Pai @ 2018-07-17 13:49 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1531835365-32387-1-git-send-email-linuxram@us.ibm.com>
This is in preparation to accomadate a differing size register
across architectures.
cc: Dave Hansen <dave.hansen@intel.com>
cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
---
tools/testing/selftests/vm/pkey-helpers.h | 23 ++++---
tools/testing/selftests/vm/pkey-x86.h | 16 +++--
tools/testing/selftests/vm/protection_keys.c | 87 +++++++++++++++----------
3 files changed, 73 insertions(+), 53 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h
index 7f18a82..2a1a024 100644
--- a/tools/testing/selftests/vm/pkey-helpers.h
+++ b/tools/testing/selftests/vm/pkey-helpers.h
@@ -80,13 +80,14 @@ static inline void sigsafe_printf(const char *format, ...)
#error Architecture not supported
#endif /* arch */
-extern unsigned int shadow_pkey_reg;
+extern pkey_reg_t shadow_pkey_reg;
-static inline unsigned int _read_pkey_reg(int line)
+static inline pkey_reg_t _read_pkey_reg(int line)
{
- unsigned int pkey_reg = __read_pkey_reg();
+ pkey_reg_t pkey_reg = __read_pkey_reg();
- dprintf4("read_pkey_reg(line=%d) pkey_reg: %x shadow: %x\n",
+ dprintf4("read_pkey_reg(line=%d) pkey_reg: "PKEY_REG_FMT
+ " shadow: "PKEY_REG_FMT"\n",
line, pkey_reg, shadow_pkey_reg);
assert(pkey_reg == shadow_pkey_reg);
@@ -95,15 +96,15 @@ static inline unsigned int _read_pkey_reg(int line)
#define read_pkey_reg() _read_pkey_reg(__LINE__)
-static inline void write_pkey_reg(unsigned int pkey_reg)
+static inline void write_pkey_reg(pkey_reg_t pkey_reg)
{
- dprintf4("%s() changing %08x to %08x\n", __func__,
+ dprintf4("%s() changing "PKEY_REG_FMT" to "PKEY_REG_FMT"\n", __func__,
__read_pkey_reg(), pkey_reg);
/* will do the shadow check for us: */
read_pkey_reg();
__write_pkey_reg(pkey_reg);
shadow_pkey_reg = pkey_reg;
- dprintf4("%s(%08x) pkey_reg: %08x\n", __func__,
+ dprintf4("%s("PKEY_REG_FMT") pkey_reg: "PKEY_REG_FMT"\n", __func__,
pkey_reg, __read_pkey_reg());
}
@@ -113,7 +114,7 @@ static inline void write_pkey_reg(unsigned int pkey_reg)
*/
static inline void __pkey_access_allow(int pkey, int do_allow)
{
- unsigned int pkey_reg = read_pkey_reg();
+ pkey_reg_t pkey_reg = read_pkey_reg();
int bit = pkey * 2;
if (do_allow)
@@ -121,13 +122,13 @@ static inline void __pkey_access_allow(int pkey, int do_allow)
else
pkey_reg |= (1<<bit);
- dprintf4("pkey_reg now: %08x\n", read_pkey_reg());
+ dprintf4("pkey_reg now: "PKEY_REG_FMT"\n", read_pkey_reg());
write_pkey_reg(pkey_reg);
}
static inline void __pkey_write_allow(int pkey, int do_allow_write)
{
- long pkey_reg = read_pkey_reg();
+ pkey_reg_t pkey_reg = read_pkey_reg();
int bit = pkey * 2 + 1;
if (do_allow_write)
@@ -136,7 +137,7 @@ static inline void __pkey_write_allow(int pkey, int do_allow_write)
pkey_reg |= (1<<bit);
write_pkey_reg(pkey_reg);
- dprintf4("pkey_reg now: %08x\n", read_pkey_reg());
+ dprintf4("pkey_reg now: "PKEY_REG_FMT"\n", read_pkey_reg());
}
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h
index 2f04ade..5f40901 100644
--- a/tools/testing/selftests/vm/pkey-x86.h
+++ b/tools/testing/selftests/vm/pkey-x86.h
@@ -46,6 +46,8 @@
#define HPAGE_SIZE (1UL<<21)
#define PAGE_SIZE 4096
#define MB (1<<20)
+#define pkey_reg_t u32
+#define PKEY_REG_FMT "%016x"
static inline void __page_o_noops(void)
{
@@ -53,11 +55,11 @@ static inline void __page_o_noops(void)
asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr");
}
-static inline unsigned int __read_pkey_reg(void)
+static inline pkey_reg_t __read_pkey_reg(void)
{
unsigned int eax, edx;
unsigned int ecx = 0;
- unsigned int pkey_reg;
+ pkey_reg_t pkey_reg;
asm volatile(".byte 0x0f,0x01,0xee\n\t"
: "=a" (eax), "=d" (edx)
@@ -66,13 +68,13 @@ static inline unsigned int __read_pkey_reg(void)
return pkey_reg;
}
-static inline void __write_pkey_reg(unsigned int pkey_reg)
+static inline void __write_pkey_reg(pkey_reg_t pkey_reg)
{
- unsigned int eax = pkey_reg;
- unsigned int ecx = 0;
- unsigned int edx = 0;
+ pkey_reg_t eax = pkey_reg;
+ pkey_reg_t ecx = 0;
+ pkey_reg_t edx = 0;
- dprintf4("%s() changing %08x to %08x\n", __func__,
+ dprintf4("%s() changing "PKEY_REG_FMT" to "PKEY_REG_FMT"\n", __func__,
__read_pkey_reg(), pkey_reg);
asm volatile(".byte 0x0f,0x01,0xef\n\t"
: : "a" (eax), "c" (ecx), "d" (edx));
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index 99e4e1e..d95fe20 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -48,7 +48,7 @@
int iteration_nr = 1;
int test_nr;
-unsigned int shadow_pkey_reg;
+pkey_reg_t shadow_pkey_reg;
int dprint_in_signal;
char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
@@ -163,7 +163,7 @@ void dump_mem(void *dumpme, int len_bytes)
for (i = 0; i < len_bytes; i += sizeof(u64)) {
u64 *ptr = (u64 *)(c + i);
- dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr);
+ dprintf1("dump[%03d][@%p]: %016lx\n", i, ptr, *ptr);
}
}
@@ -197,7 +197,7 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
int trapno;
unsigned long ip;
char *fpregs;
- u32 *pkey_reg_ptr;
+ pkey_reg_t *pkey_reg_ptr;
u64 siginfo_pkey;
u32 *si_pkey_ptr;
int pkey_reg_offset;
@@ -205,7 +205,8 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
dprint_in_signal = 1;
dprintf1(">>>>===============SIGSEGV============================\n");
- dprintf1("%s()::%d, pkey_reg: 0x%x shadow: %x\n", __func__, __LINE__,
+ dprintf1("%s()::%d, pkey_reg: "PKEY_REG_FMT" shadow: "PKEY_REG_FMT"\n",
+ __func__, __LINE__,
__read_pkey_reg(), shadow_pkey_reg);
trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
@@ -213,8 +214,9 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
fpregset = uctxt->uc_mcontext.fpregs;
fpregs = (void *)fpregset;
- dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__,
- trapno, ip, si_code_str(si->si_code), si->si_code);
+ dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n",
+ __func__, trapno, ip, si_code_str(si->si_code),
+ si->si_code);
#ifdef __i386__
/*
* 32-bit has some extra padding so that userspace can tell whether
@@ -251,12 +253,13 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
pkey_assert(siginfo_pkey < NR_PKEYS);
last_si_pkey = siginfo_pkey;
- dprintf1("signal pkey_reg from xsave: %08x\n", *pkey_reg_ptr);
+ dprintf1("signal pkey_reg from xsave: "PKEY_REG_FMT"\n", *pkey_reg_ptr);
/*
* need __read_pkey_reg() version so we do not do shadow_pkey_reg
* checking
*/
- dprintf1("signal pkey_reg from pkey_reg: %08x\n", __read_pkey_reg());
+ dprintf1("signal pkey_reg from pkey_reg: "PKEY_REG_FMT"\n",
+ __read_pkey_reg());
dprintf1("pkey from siginfo: %jx\n", siginfo_pkey);
*(u64 *)pkey_reg_ptr = 0x00000000;
dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
@@ -331,16 +334,17 @@ pid_t fork_lazy_child(void)
static u32 hw_pkey_get(int pkey, unsigned long flags)
{
u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
- u32 pkey_reg = __read_pkey_reg();
- u32 shifted_pkey_reg;
+ pkey_reg_t pkey_reg = __read_pkey_reg();
+ pkey_reg_t shifted_pkey_reg;
u32 masked_pkey_reg;
dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n",
__func__, pkey, flags, 0, 0);
- dprintf2("%s() raw pkey_reg: %x\n", __func__, pkey_reg);
+ dprintf2("%s() raw pkey_reg: "PKEY_REG_FMT"\n", __func__, pkey_reg);
shifted_pkey_reg = (pkey_reg >> (pkey * PKEY_BITS_PER_PKEY));
- dprintf2("%s() shifted_pkey_reg: %x\n", __func__, shifted_pkey_reg);
+ dprintf2("%s() shifted_pkey_reg: "PKEY_REG_FMT"\n", __func__,
+ shifted_pkey_reg);
masked_pkey_reg = shifted_pkey_reg & mask;
dprintf2("%s() masked pkey_reg: %x\n", __func__, masked_pkey_reg);
/*
@@ -353,8 +357,8 @@ static u32 hw_pkey_get(int pkey, unsigned long flags)
static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
{
u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
- u32 old_pkey_reg = __read_pkey_reg();
- u32 new_pkey_reg;
+ pkey_reg_t old_pkey_reg = __read_pkey_reg();
+ pkey_reg_t new_pkey_reg;
/* make sure that 'rights' only contains the bits we expect: */
assert(!(rights & ~mask));
@@ -369,7 +373,7 @@ static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
__write_pkey_reg(new_pkey_reg);
dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x"
- " pkey_reg now: %x old_pkey_reg: %x\n",
+ " pkey_reg now: "PKEY_REG_FMT" old_pkey_reg: "PKEY_REG_FMT"\n",
__func__, pkey, rights, flags, 0, __read_pkey_reg(),
old_pkey_reg);
return 0;
@@ -380,7 +384,7 @@ void pkey_disable_set(int pkey, int flags)
unsigned long syscall_flags = 0;
int ret;
int pkey_rights;
- u32 orig_pkey_reg = read_pkey_reg();
+ pkey_reg_t orig_pkey_reg = read_pkey_reg();
dprintf1("START->%s(%d, 0x%x)\n", __func__,
pkey, flags);
@@ -390,6 +394,7 @@ void pkey_disable_set(int pkey, int flags)
dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
pkey, pkey, pkey_rights);
+
pkey_assert(pkey_rights >= 0);
pkey_rights |= flags;
@@ -398,7 +403,8 @@ void pkey_disable_set(int pkey, int flags)
assert(!ret);
/*pkey_reg and flags have the same format */
shadow_pkey_reg |= flags << (pkey * 2);
- dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkey_reg);
+ dprintf1("%s(%d) shadow: 0x"PKEY_REG_FMT"\n",
+ __func__, pkey, shadow_pkey_reg);
pkey_assert(ret >= 0);
@@ -406,7 +412,8 @@ void pkey_disable_set(int pkey, int flags)
dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
pkey, pkey, pkey_rights);
- dprintf1("%s(%d) pkey_reg: 0x%x\n", __func__, pkey, read_pkey_reg());
+ dprintf1("%s(%d) pkey_reg: 0x"PKEY_REG_FMT"\n",
+ __func__, pkey, read_pkey_reg());
if (flags)
pkey_assert(read_pkey_reg() > orig_pkey_reg);
dprintf1("END<---%s(%d, 0x%x)\n", __func__,
@@ -418,7 +425,7 @@ void pkey_disable_clear(int pkey, int flags)
unsigned long syscall_flags = 0;
int ret;
int pkey_rights = hw_pkey_get(pkey, syscall_flags);
- u32 orig_pkey_reg = read_pkey_reg();
+ pkey_reg_t orig_pkey_reg = read_pkey_reg();
pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
@@ -436,7 +443,8 @@ void pkey_disable_clear(int pkey, int flags)
dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
pkey, pkey, pkey_rights);
- dprintf1("%s(%d) pkey_reg: 0x%x\n", __func__, pkey, read_pkey_reg());
+ dprintf1("%s(%d) pkey_reg: 0x"PKEY_REG_FMT"\n", __func__,
+ pkey, read_pkey_reg());
if (flags)
assert(read_pkey_reg() > orig_pkey_reg);
}
@@ -491,20 +499,22 @@ int alloc_pkey(void)
int ret;
unsigned long init_val = 0x0;
- dprintf1("%s()::%d, pkey_reg: 0x%x shadow: %x\n", __func__,
- __LINE__, __read_pkey_reg(), shadow_pkey_reg);
+ dprintf1("%s()::%d, pkey_reg: "PKEY_REG_FMT" shadow: "PKEY_REG_FMT"\n",
+ __func__, __LINE__, __read_pkey_reg(), shadow_pkey_reg);
ret = sys_pkey_alloc(0, init_val);
/*
* pkey_alloc() sets PKEY register, so we need to reflect it in
* shadow_pkey_reg:
*/
- dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n",
+ dprintf4("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT
+ " shadow: 0x"PKEY_REG_FMT"\n",
__func__, __LINE__, ret, __read_pkey_reg(),
shadow_pkey_reg);
if (ret) {
/* clear both the bits: */
shadow_pkey_reg &= ~(0x3 << (ret * 2));
- dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n",
+ dprintf4("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT
+ " shadow: 0x"PKEY_REG_FMT"\n",
__func__,
__LINE__, ret, __read_pkey_reg(),
shadow_pkey_reg);
@@ -514,13 +524,15 @@ int alloc_pkey(void)
*/
shadow_pkey_reg |= (init_val << (ret * 2));
}
- dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n",
+ dprintf4("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT
+ " shadow: 0x"PKEY_REG_FMT"\n",
__func__, __LINE__, ret, __read_pkey_reg(),
shadow_pkey_reg);
dprintf1("%s()::%d errno: %d\n", __func__, __LINE__, errno);
/* for shadow checking: */
read_pkey_reg();
- dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n",
+ dprintf4("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT
+ " shadow: 0x"PKEY_REG_FMT"\n",
__func__, __LINE__, ret, __read_pkey_reg(),
shadow_pkey_reg);
return ret;
@@ -573,7 +585,8 @@ int alloc_random_pkey(void)
free_ret = sys_pkey_free(alloced_pkeys[i]);
pkey_assert(!free_ret);
}
- dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", __func__,
+ dprintf1("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT
+ " shadow: 0x"PKEY_REG_FMT"\n", __func__,
__LINE__, ret, __read_pkey_reg(), shadow_pkey_reg);
return ret;
}
@@ -592,11 +605,13 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
if (nr_iterations-- < 0)
break;
- dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n",
+ dprintf1("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT
+ " shadow: 0x"PKEY_REG_FMT"\n",
__func__, __LINE__, ret, __read_pkey_reg(),
shadow_pkey_reg);
sys_pkey_free(rpkey);
- dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n",
+ dprintf1("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT
+ " shadow: 0x"PKEY_REG_FMT"\n",
__func__, __LINE__, ret, __read_pkey_reg(),
shadow_pkey_reg);
}
@@ -606,7 +621,8 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
ptr, size, orig_prot, pkey, ret);
pkey_assert(!ret);
- dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", __func__,
+ dprintf1("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT
+ " shadow: 0x"PKEY_REG_FMT"\n", __func__,
__LINE__, ret, __read_pkey_reg(), shadow_pkey_reg);
return ret;
}
@@ -885,7 +901,7 @@ void expected_pkey_fault(int pkey)
pkey_assert(0);
__write_pkey_reg(shadow_pkey_reg);
- dprintf1("%s() set pkey_reg=%x to restore state after signal "
+ dprintf1("%s() set pkey_reg="PKEY_REG_FMT" to restore state after signal "
"nuked it\n", __func__, shadow_pkey_reg);
last_pkey_faults = pkey_faults;
last_si_pkey = -1;
@@ -1082,7 +1098,8 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
int new_pkey;
dprintf1("%s() alloc loop: %d\n", __func__, i);
new_pkey = alloc_pkey();
- dprintf4("%s()::%d, err: %d pkey_reg: 0x%x shadow: 0x%x\n",
+ dprintf4("%s()::%d, err: %d pkey_reg: 0x"PKEY_REG_FMT
+ " shadow: 0x"PKEY_REG_FMT"\n",
__func__, __LINE__, err, __read_pkey_reg(),
shadow_pkey_reg);
read_pkey_reg(); /* for shadow checking */
@@ -1264,7 +1281,7 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
pkey_assert(!ret);
pkey_access_deny(pkey);
- dprintf2("pkey_reg: %x\n", read_pkey_reg());
+ dprintf2("pkey_reg: "PKEY_REG_FMT"\n", read_pkey_reg());
/*
* Make sure this is an *instruction* fault
@@ -1295,7 +1312,7 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
pkey_assert(!ret);
- dprintf2("pkru: %x\n", read_pkey_reg());
+ dprintf2("pkey_reg: "PKEY_REG_FMT"\n", read_pkey_reg());
/* Make sure this is an *instruction* fault */
madvise(p1, PAGE_SIZE, MADV_DONTNEED);
@@ -1412,7 +1429,7 @@ int main(void)
}
pkey_setup_shadow();
- printf("startup pkey_reg: %x\n", read_pkey_reg());
+ printf("startup pkey_reg: 0x"PKEY_REG_FMT"\n", read_pkey_reg());
setup_hugetlbfs();
while (nr_iterations-- > 0)
--
1.7.1
^ permalink raw reply related
* [PATCH v14 05/22] selftests/vm: Make gcc check arguments of sigsafe_printf()
From: Ram Pai @ 2018-07-17 13:49 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1531835365-32387-1-git-send-email-linuxram@us.ibm.com>
From: Thiago Jung Bauermann <bauerman@linux.ibm.com>
This will help us ensure we print pkey_reg_t values correctly in different
architectures.
Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
---
tools/testing/selftests/vm/pkey-helpers.h | 4 ++++
1 files changed, 4 insertions(+), 0 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h
index 3ed2f02..7f18a82 100644
--- a/tools/testing/selftests/vm/pkey-helpers.h
+++ b/tools/testing/selftests/vm/pkey-helpers.h
@@ -27,6 +27,10 @@
#define DPRINT_IN_SIGNAL_BUF_SIZE 4096
extern int dprint_in_signal;
extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
+
+#ifdef __GNUC__
+__attribute__((format(printf, 1, 2)))
+#endif
static inline void sigsafe_printf(const char *format, ...)
{
va_list ap;
--
1.7.1
^ permalink raw reply related
* [PATCH v14 04/22] selftests/vm: move arch-specific definitions to arch-specific header
From: Ram Pai @ 2018-07-17 13:49 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1531835365-32387-1-git-send-email-linuxram@us.ibm.com>
From: Thiago Jung Bauermann <bauerman@linux.ibm.com>
In preparation for multi-arch support, move definitions which have
arch-specific values to x86-specific header.
cc: Dave Hansen <dave.hansen@intel.com>
cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
---
tools/testing/selftests/vm/pkey-helpers.h | 111 +-----------------
tools/testing/selftests/vm/pkey-x86.h | 156 ++++++++++++++++++++++++++
tools/testing/selftests/vm/protection_keys.c | 47 --------
3 files changed, 162 insertions(+), 152 deletions(-)
create mode 100644 tools/testing/selftests/vm/pkey-x86.h
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h
index 6ad1bd5..3ed2f02 100644
--- a/tools/testing/selftests/vm/pkey-helpers.h
+++ b/tools/testing/selftests/vm/pkey-helpers.h
@@ -21,9 +21,6 @@
#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
-#define NR_PKEYS 16
-#define PKEY_BITS_PER_PKEY 2
-
#ifndef DEBUG_LEVEL
#define DEBUG_LEVEL 0
#endif
@@ -73,19 +70,13 @@ static inline void sigsafe_printf(const char *format, ...)
} \
} while (0)
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+#include "pkey-x86.h"
+#else /* arch */
+#error Architecture not supported
+#endif /* arch */
+
extern unsigned int shadow_pkey_reg;
-static inline unsigned int __read_pkey_reg(void)
-{
- unsigned int eax, edx;
- unsigned int ecx = 0;
- unsigned int pkey_reg;
-
- asm volatile(".byte 0x0f,0x01,0xee\n\t"
- : "=a" (eax), "=d" (edx)
- : "c" (ecx));
- pkey_reg = eax;
- return pkey_reg;
-}
static inline unsigned int _read_pkey_reg(int line)
{
@@ -100,19 +91,6 @@ static inline unsigned int _read_pkey_reg(int line)
#define read_pkey_reg() _read_pkey_reg(__LINE__)
-static inline void __write_pkey_reg(unsigned int pkey_reg)
-{
- unsigned int eax = pkey_reg;
- unsigned int ecx = 0;
- unsigned int edx = 0;
-
- dprintf4("%s() changing %08x to %08x\n", __func__,
- __read_pkey_reg(), pkey_reg);
- asm volatile(".byte 0x0f,0x01,0xef\n\t"
- : : "a" (eax), "c" (ecx), "d" (edx));
- assert(pkey_reg == __read_pkey_reg());
-}
-
static inline void write_pkey_reg(unsigned int pkey_reg)
{
dprintf4("%s() changing %08x to %08x\n", __func__,
@@ -157,83 +135,6 @@ static inline void __pkey_write_allow(int pkey, int do_allow_write)
dprintf4("pkey_reg now: %08x\n", read_pkey_reg());
}
-#define PAGE_SIZE 4096
-#define MB (1<<20)
-
-static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- /* ecx is often an input as well as an output. */
- asm volatile(
- "cpuid;"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx));
-}
-
-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */
-#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */
-#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */
-
-static inline int cpu_has_pku(void)
-{
- unsigned int eax;
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
-
- eax = 0x7;
- ecx = 0x0;
- __cpuid(&eax, &ebx, &ecx, &edx);
-
- if (!(ecx & X86_FEATURE_PKU)) {
- dprintf2("cpu does not have PKU\n");
- return 0;
- }
- if (!(ecx & X86_FEATURE_OSPKE)) {
- dprintf2("cpu does not have OSPKE\n");
- return 0;
- }
- return 1;
-}
-
-#define XSTATE_PKEY_BIT (9)
-#define XSTATE_PKEY 0x200
-
-int pkey_reg_xstate_offset(void)
-{
- unsigned int eax;
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
- int xstate_offset;
- int xstate_size;
- unsigned long XSTATE_CPUID = 0xd;
- int leaf;
-
- /* assume that XSTATE_PKEY is set in XCR0 */
- leaf = XSTATE_PKEY_BIT;
- {
- eax = XSTATE_CPUID;
- ecx = leaf;
- __cpuid(&eax, &ebx, &ecx, &edx);
-
- if (leaf == XSTATE_PKEY_BIT) {
- xstate_offset = ebx;
- xstate_size = eax;
- }
- }
-
- if (xstate_size == 0) {
- printf("could not find size/offset of PKEY in xsave state\n");
- return 0;
- }
-
- return xstate_offset;
-}
-
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h
new file mode 100644
index 0000000..2f04ade
--- /dev/null
+++ b/tools/testing/selftests/vm/pkey-x86.h
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _PKEYS_X86_H
+#define _PKEYS_X86_H
+
+#ifdef __i386__
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key 380
+#endif
+
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc 381
+# define SYS_pkey_free 382
+#endif
+
+#define REG_IP_IDX REG_EIP
+#define si_pkey_offset 0x14
+
+#else
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key 329
+#endif
+
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc 330
+# define SYS_pkey_free 331
+#endif
+
+#define REG_IP_IDX REG_RIP
+#define si_pkey_offset 0x20
+
+#endif
+
+#ifndef PKEY_DISABLE_ACCESS
+# define PKEY_DISABLE_ACCESS 0x1
+#endif
+
+#ifndef PKEY_DISABLE_WRITE
+# define PKEY_DISABLE_WRITE 0x2
+#endif
+
+#define NR_PKEYS 16
+#define PKEY_BITS_PER_PKEY 2
+#define HPAGE_SIZE (1UL<<21)
+#define PAGE_SIZE 4096
+#define MB (1<<20)
+
+static inline void __page_o_noops(void)
+{
+ /* 8-bytes of instruction * 512 bytes = 1 page */
+ asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr");
+}
+
+static inline unsigned int __read_pkey_reg(void)
+{
+ unsigned int eax, edx;
+ unsigned int ecx = 0;
+ unsigned int pkey_reg;
+
+ asm volatile(".byte 0x0f,0x01,0xee\n\t"
+ : "=a" (eax), "=d" (edx)
+ : "c" (ecx));
+ pkey_reg = eax;
+ return pkey_reg;
+}
+
+static inline void __write_pkey_reg(unsigned int pkey_reg)
+{
+ unsigned int eax = pkey_reg;
+ unsigned int ecx = 0;
+ unsigned int edx = 0;
+
+ dprintf4("%s() changing %08x to %08x\n", __func__,
+ __read_pkey_reg(), pkey_reg);
+ asm volatile(".byte 0x0f,0x01,0xef\n\t"
+ : : "a" (eax), "c" (ecx), "d" (edx));
+ assert(pkey_reg == __read_pkey_reg());
+}
+
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ /* ecx is often an input as well as an output. */
+ asm volatile(
+ "cpuid;"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (*eax), "2" (*ecx));
+}
+
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */
+#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */
+#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */
+
+static inline int cpu_has_pku(void)
+{
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+
+ eax = 0x7;
+ ecx = 0x0;
+ __cpuid(&eax, &ebx, &ecx, &edx);
+
+ if (!(ecx & X86_FEATURE_PKU)) {
+ dprintf2("cpu does not have PKU\n");
+ return 0;
+ }
+ if (!(ecx & X86_FEATURE_OSPKE)) {
+ dprintf2("cpu does not have OSPKE\n");
+ return 0;
+ }
+ return 1;
+}
+
+#define XSTATE_PKEY_BIT (9)
+#define XSTATE_PKEY 0x200
+
+int pkey_reg_xstate_offset(void)
+{
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+ int xstate_offset;
+ int xstate_size;
+ unsigned long XSTATE_CPUID = 0xd;
+ int leaf;
+
+ /* assume that XSTATE_PKEY is set in XCR0 */
+ leaf = XSTATE_PKEY_BIT;
+ {
+ eax = XSTATE_CPUID;
+ ecx = leaf;
+ __cpuid(&eax, &ebx, &ecx, &edx);
+
+ if (leaf == XSTATE_PKEY_BIT) {
+ xstate_offset = ebx;
+ xstate_size = eax;
+ }
+ }
+
+ if (xstate_size == 0) {
+ printf("could not find size/offset of PKEY in xsave state\n");
+ return 0;
+ }
+
+ return xstate_offset;
+}
+
+#endif /* _PKEYS_X86_H */
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index cad52dc..99e4e1e 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -49,9 +49,6 @@
int test_nr;
unsigned int shadow_pkey_reg;
-
-#define HPAGE_SIZE (1UL<<21)
-
int dprint_in_signal;
char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
@@ -137,12 +134,6 @@ void abort_hooks(void)
#endif
}
-static inline void __page_o_noops(void)
-{
- /* 8-bytes of instruction * 512 bytes = 1 page */
- asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr");
-}
-
/*
* This attempts to have roughly a page of instructions followed by a few
* instructions that do a write, and another page of instructions. That
@@ -165,36 +156,6 @@ void lots_o_noops_around_write(int *write_to_me)
dprintf3("%s() done\n", __func__);
}
-#ifdef __i386__
-
-#ifndef SYS_mprotect_key
-# define SYS_mprotect_key 380
-#endif
-
-#ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 381
-# define SYS_pkey_free 382
-#endif
-
-#define REG_IP_IDX REG_EIP
-#define si_pkey_offset 0x14
-
-#else
-
-#ifndef SYS_mprotect_key
-# define SYS_mprotect_key 329
-#endif
-
-#ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 330
-# define SYS_pkey_free 331
-#endif
-
-#define REG_IP_IDX REG_RIP
-#define si_pkey_offset 0x20
-
-#endif
-
void dump_mem(void *dumpme, int len_bytes)
{
char *c = (void *)dumpme;
@@ -367,14 +328,6 @@ pid_t fork_lazy_child(void)
return forkret;
}
-#ifndef PKEY_DISABLE_ACCESS
-# define PKEY_DISABLE_ACCESS 0x1
-#endif
-
-#ifndef PKEY_DISABLE_WRITE
-# define PKEY_DISABLE_WRITE 0x2
-#endif
-
static u32 hw_pkey_get(int pkey, unsigned long flags)
{
u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
--
1.7.1
^ permalink raw reply related
* [PATCH v14 03/22] selftests/vm: move generic definitions to header file
From: Ram Pai @ 2018-07-17 13:49 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1531835365-32387-1-git-send-email-linuxram@us.ibm.com>
Moved all the generic definition and helper functions to the header file.
cc: Dave Hansen <dave.hansen@intel.com>
cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
---
tools/testing/selftests/vm/pkey-helpers.h | 35 ++++++++++++++++++++++---
tools/testing/selftests/vm/protection_keys.c | 27 --------------------
2 files changed, 30 insertions(+), 32 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h
index d5779be..6ad1bd5 100644
--- a/tools/testing/selftests/vm/pkey-helpers.h
+++ b/tools/testing/selftests/vm/pkey-helpers.h
@@ -13,6 +13,14 @@
#include <ucontext.h>
#include <sys/mman.h>
+/* Define some kernel-like types */
+#define u8 uint8_t
+#define u16 uint16_t
+#define u32 uint32_t
+#define u64 uint64_t
+
+#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
+
#define NR_PKEYS 16
#define PKEY_BITS_PER_PKEY 2
@@ -53,6 +61,18 @@ static inline void sigsafe_printf(const char *format, ...)
#define dprintf3(args...) dprintf_level(3, args)
#define dprintf4(args...) dprintf_level(4, args)
+extern void abort_hooks(void);
+#define pkey_assert(condition) do { \
+ if (!(condition)) { \
+ dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \
+ __FILE__, __LINE__, \
+ test_nr, iteration_nr); \
+ dprintf0("errno at assert: %d", errno); \
+ abort_hooks(); \
+ exit(__LINE__); \
+ } \
+} while (0)
+
extern unsigned int shadow_pkey_reg;
static inline unsigned int __read_pkey_reg(void)
{
@@ -137,11 +157,6 @@ static inline void __pkey_write_allow(int pkey, int do_allow_write)
dprintf4("pkey_reg now: %08x\n", read_pkey_reg());
}
-#define PROT_PKEY0 0x10 /* protection key value (bit 0) */
-#define PROT_PKEY1 0x20 /* protection key value (bit 1) */
-#define PROT_PKEY2 0x40 /* protection key value (bit 2) */
-#define PROT_PKEY3 0x80 /* protection key value (bit 3) */
-
#define PAGE_SIZE 4096
#define MB (1<<20)
@@ -219,4 +234,14 @@ int pkey_reg_xstate_offset(void)
return xstate_offset;
}
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
+#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
+#define ALIGN_PTR_UP(p, ptr_align_to) \
+ ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
+#define ALIGN_PTR_DOWN(p, ptr_align_to) \
+ ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to))
+#define __stringify_1(x...) #x
+#define __stringify(x...) __stringify_1(x)
+
#endif /* _PKEYS_HELPER_H */
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index 9f373cc..cad52dc 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -51,31 +51,10 @@
unsigned int shadow_pkey_reg;
#define HPAGE_SIZE (1UL<<21)
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
-#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
-#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
-#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
-#define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to))
-#define __stringify_1(x...) #x
-#define __stringify(x...) __stringify_1(x)
-
-#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
int dprint_in_signal;
char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
-extern void abort_hooks(void);
-#define pkey_assert(condition) do { \
- if (!(condition)) { \
- dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \
- __FILE__, __LINE__, \
- test_nr, iteration_nr); \
- dprintf0("errno at assert: %d", errno); \
- abort_hooks(); \
- exit(__LINE__); \
- } \
-} while (0)
-
void cat_into_file(char *str, char *file)
{
int fd = open(file, O_RDWR);
@@ -186,12 +165,6 @@ void lots_o_noops_around_write(int *write_to_me)
dprintf3("%s() done\n", __func__);
}
-/* Define some kernel-like types */
-#define u8 uint8_t
-#define u16 uint16_t
-#define u32 uint32_t
-#define u64 uint64_t
-
#ifdef __i386__
#ifndef SYS_mprotect_key
--
1.7.1
^ permalink raw reply related
* [PATCH v14 02/22] selftests/vm: rename all references to pkru to a generic name
From: Ram Pai @ 2018-07-17 13:49 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1531835365-32387-1-git-send-email-linuxram@us.ibm.com>
some pkru references are named to pkey_reg
and some prku references are renamed to pkey
cc: Dave Hansen <dave.hansen@intel.com>
cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
---
tools/testing/selftests/vm/pkey-helpers.h | 85 +++++-----
tools/testing/selftests/vm/protection_keys.c | 238 ++++++++++++++------------
2 files changed, 169 insertions(+), 154 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h
index 254e543..d5779be 100644
--- a/tools/testing/selftests/vm/pkey-helpers.h
+++ b/tools/testing/selftests/vm/pkey-helpers.h
@@ -14,7 +14,7 @@
#include <sys/mman.h>
#define NR_PKEYS 16
-#define PKRU_BITS_PER_PKEY 2
+#define PKEY_BITS_PER_PKEY 2
#ifndef DEBUG_LEVEL
#define DEBUG_LEVEL 0
@@ -53,85 +53,88 @@ static inline void sigsafe_printf(const char *format, ...)
#define dprintf3(args...) dprintf_level(3, args)
#define dprintf4(args...) dprintf_level(4, args)
-extern unsigned int shadow_pkru;
-static inline unsigned int __rdpkru(void)
+extern unsigned int shadow_pkey_reg;
+static inline unsigned int __read_pkey_reg(void)
{
unsigned int eax, edx;
unsigned int ecx = 0;
- unsigned int pkru;
+ unsigned int pkey_reg;
asm volatile(".byte 0x0f,0x01,0xee\n\t"
: "=a" (eax), "=d" (edx)
: "c" (ecx));
- pkru = eax;
- return pkru;
+ pkey_reg = eax;
+ return pkey_reg;
}
-static inline unsigned int _rdpkru(int line)
+static inline unsigned int _read_pkey_reg(int line)
{
- unsigned int pkru = __rdpkru();
+ unsigned int pkey_reg = __read_pkey_reg();
- dprintf4("rdpkru(line=%d) pkru: %x shadow: %x\n",
- line, pkru, shadow_pkru);
- assert(pkru == shadow_pkru);
+ dprintf4("read_pkey_reg(line=%d) pkey_reg: %x shadow: %x\n",
+ line, pkey_reg, shadow_pkey_reg);
+ assert(pkey_reg == shadow_pkey_reg);
- return pkru;
+ return pkey_reg;
}
-#define rdpkru() _rdpkru(__LINE__)
+#define read_pkey_reg() _read_pkey_reg(__LINE__)
-static inline void __wrpkru(unsigned int pkru)
+static inline void __write_pkey_reg(unsigned int pkey_reg)
{
- unsigned int eax = pkru;
+ unsigned int eax = pkey_reg;
unsigned int ecx = 0;
unsigned int edx = 0;
- dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
+ dprintf4("%s() changing %08x to %08x\n", __func__,
+ __read_pkey_reg(), pkey_reg);
asm volatile(".byte 0x0f,0x01,0xef\n\t"
: : "a" (eax), "c" (ecx), "d" (edx));
- assert(pkru == __rdpkru());
+ assert(pkey_reg == __read_pkey_reg());
}
-static inline void wrpkru(unsigned int pkru)
+static inline void write_pkey_reg(unsigned int pkey_reg)
{
- dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
+ dprintf4("%s() changing %08x to %08x\n", __func__,
+ __read_pkey_reg(), pkey_reg);
/* will do the shadow check for us: */
- rdpkru();
- __wrpkru(pkru);
- shadow_pkru = pkru;
- dprintf4("%s(%08x) pkru: %08x\n", __func__, pkru, __rdpkru());
+ read_pkey_reg();
+ __write_pkey_reg(pkey_reg);
+ shadow_pkey_reg = pkey_reg;
+ dprintf4("%s(%08x) pkey_reg: %08x\n", __func__,
+ pkey_reg, __read_pkey_reg());
}
/*
* These are technically racy. since something could
- * change PKRU between the read and the write.
+ * change PKEY register between the read and the write.
*/
static inline void __pkey_access_allow(int pkey, int do_allow)
{
- unsigned int pkru = rdpkru();
+ unsigned int pkey_reg = read_pkey_reg();
int bit = pkey * 2;
if (do_allow)
- pkru &= (1<<bit);
+ pkey_reg &= (1<<bit);
else
- pkru |= (1<<bit);
+ pkey_reg |= (1<<bit);
- dprintf4("pkru now: %08x\n", rdpkru());
- wrpkru(pkru);
+ dprintf4("pkey_reg now: %08x\n", read_pkey_reg());
+ write_pkey_reg(pkey_reg);
}
static inline void __pkey_write_allow(int pkey, int do_allow_write)
{
- long pkru = rdpkru();
+ long pkey_reg = read_pkey_reg();
int bit = pkey * 2 + 1;
if (do_allow_write)
- pkru &= (1<<bit);
+ pkey_reg &= (1<<bit);
else
- pkru |= (1<<bit);
+ pkey_reg |= (1<<bit);
- wrpkru(pkru);
- dprintf4("pkru now: %08x\n", rdpkru());
+ write_pkey_reg(pkey_reg);
+ dprintf4("pkey_reg now: %08x\n", read_pkey_reg());
}
#define PROT_PKEY0 0x10 /* protection key value (bit 0) */
@@ -181,10 +184,10 @@ static inline int cpu_has_pku(void)
return 1;
}
-#define XSTATE_PKRU_BIT (9)
-#define XSTATE_PKRU 0x200
+#define XSTATE_PKEY_BIT (9)
+#define XSTATE_PKEY 0x200
-int pkru_xstate_offset(void)
+int pkey_reg_xstate_offset(void)
{
unsigned int eax;
unsigned int ebx;
@@ -195,21 +198,21 @@ int pkru_xstate_offset(void)
unsigned long XSTATE_CPUID = 0xd;
int leaf;
- /* assume that XSTATE_PKRU is set in XCR0 */
- leaf = XSTATE_PKRU_BIT;
+ /* assume that XSTATE_PKEY is set in XCR0 */
+ leaf = XSTATE_PKEY_BIT;
{
eax = XSTATE_CPUID;
ecx = leaf;
__cpuid(&eax, &ebx, &ecx, &edx);
- if (leaf == XSTATE_PKRU_BIT) {
+ if (leaf == XSTATE_PKEY_BIT) {
xstate_offset = ebx;
xstate_size = eax;
}
}
if (xstate_size == 0) {
- printf("could not find size/offset of PKRU in xsave state\n");
+ printf("could not find size/offset of PKEY in xsave state\n");
return 0;
}
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index 460b4bd..9f373cc 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -1,11 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Tests x86 Memory Protection Keys (see Documentation/x86/protection-keys.txt)
+ * Tests Memory Protection Keys (see Documentation/vm/protection-keys.txt)
*
* There are examples in here of:
* * how to set protection keys on memory
- * * how to set/clear bits in PKRU (the rights register)
- * * how to handle SEGV_PKRU signals and extract pkey-relevant
+ * * how to set/clear bits in pkey registers (the rights register)
+ * * how to handle SEGV_PKUERR signals and extract pkey-relevant
* information from the siginfo
*
* Things to add:
@@ -48,7 +48,7 @@
int iteration_nr = 1;
int test_nr;
-unsigned int shadow_pkru;
+unsigned int shadow_pkey_reg;
#define HPAGE_SIZE (1UL<<21)
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
@@ -255,7 +255,7 @@ void dump_mem(void *dumpme, int len_bytes)
return "UNKNOWN";
}
-int pkru_faults;
+int pkey_faults;
int last_si_pkey = -1;
void signal_handler(int signum, siginfo_t *si, void *vucontext)
{
@@ -263,16 +263,16 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
int trapno;
unsigned long ip;
char *fpregs;
- u32 *pkru_ptr;
+ u32 *pkey_reg_ptr;
u64 siginfo_pkey;
u32 *si_pkey_ptr;
- int pkru_offset;
+ int pkey_reg_offset;
fpregset_t fpregset;
dprint_in_signal = 1;
dprintf1(">>>>===============SIGSEGV============================\n");
- dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__,
- __rdpkru(), shadow_pkru);
+ dprintf1("%s()::%d, pkey_reg: 0x%x shadow: %x\n", __func__, __LINE__,
+ __read_pkey_reg(), shadow_pkey_reg);
trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
@@ -289,19 +289,19 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
*/
fpregs += 0x70;
#endif
- pkru_offset = pkru_xstate_offset();
- pkru_ptr = (void *)(&fpregs[pkru_offset]);
+ pkey_reg_offset = pkey_reg_xstate_offset();
+ pkey_reg_ptr = (void *)(&fpregs[pkey_reg_offset]);
dprintf1("siginfo: %p\n", si);
dprintf1(" fpregs: %p\n", fpregs);
/*
- * If we got a PKRU fault, we *HAVE* to have at least one bit set in
+ * If we got a PKEY fault, we *HAVE* to have at least one bit set in
* here.
*/
- dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset());
+ dprintf1("pkey_reg_xstate_offset: %d\n", pkey_reg_xstate_offset());
if (DEBUG_LEVEL > 4)
- dump_mem(pkru_ptr - 128, 256);
- pkey_assert(*pkru_ptr);
+ dump_mem(pkey_reg_ptr - 128, 256);
+ pkey_assert(*pkey_reg_ptr);
if ((si->si_code == SEGV_MAPERR) ||
(si->si_code == SEGV_ACCERR) ||
@@ -317,13 +317,16 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
pkey_assert(siginfo_pkey < NR_PKEYS);
last_si_pkey = siginfo_pkey;
- dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr);
- /* need __rdpkru() version so we do not do shadow_pkru checking */
- dprintf1("signal pkru from pkru: %08x\n", __rdpkru());
+ dprintf1("signal pkey_reg from xsave: %08x\n", *pkey_reg_ptr);
+ /*
+ * need __read_pkey_reg() version so we do not do shadow_pkey_reg
+ * checking
+ */
+ dprintf1("signal pkey_reg from pkey_reg: %08x\n", __read_pkey_reg());
dprintf1("pkey from siginfo: %jx\n", siginfo_pkey);
- *(u64 *)pkru_ptr = 0x00000000;
+ *(u64 *)pkey_reg_ptr = 0x00000000;
dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
- pkru_faults++;
+ pkey_faults++;
dprintf1("<<<<==================================================\n");
dprint_in_signal = 0;
}
@@ -402,45 +405,47 @@ pid_t fork_lazy_child(void)
static u32 hw_pkey_get(int pkey, unsigned long flags)
{
u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
- u32 pkru = __rdpkru();
- u32 shifted_pkru;
- u32 masked_pkru;
+ u32 pkey_reg = __read_pkey_reg();
+ u32 shifted_pkey_reg;
+ u32 masked_pkey_reg;
dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n",
__func__, pkey, flags, 0, 0);
- dprintf2("%s() raw pkru: %x\n", __func__, pkru);
+ dprintf2("%s() raw pkey_reg: %x\n", __func__, pkey_reg);
- shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY));
- dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru);
- masked_pkru = shifted_pkru & mask;
- dprintf2("%s() masked pkru: %x\n", __func__, masked_pkru);
+ shifted_pkey_reg = (pkey_reg >> (pkey * PKEY_BITS_PER_PKEY));
+ dprintf2("%s() shifted_pkey_reg: %x\n", __func__, shifted_pkey_reg);
+ masked_pkey_reg = shifted_pkey_reg & mask;
+ dprintf2("%s() masked pkey_reg: %x\n", __func__, masked_pkey_reg);
/*
* shift down the relevant bits to the lowest two, then
* mask off all the other high bits.
*/
- return masked_pkru;
+ return masked_pkey_reg;
}
static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
{
u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
- u32 old_pkru = __rdpkru();
- u32 new_pkru;
+ u32 old_pkey_reg = __read_pkey_reg();
+ u32 new_pkey_reg;
/* make sure that 'rights' only contains the bits we expect: */
assert(!(rights & ~mask));
- /* copy old pkru */
- new_pkru = old_pkru;
+ /* copy old pkey_reg */
+ new_pkey_reg = old_pkey_reg;
/* mask out bits from pkey in old value: */
- new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY));
+ new_pkey_reg &= ~(mask << (pkey * PKEY_BITS_PER_PKEY));
/* OR in new bits for pkey: */
- new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY));
+ new_pkey_reg |= (rights << (pkey * PKEY_BITS_PER_PKEY));
- __wrpkru(new_pkru);
+ __write_pkey_reg(new_pkey_reg);
- dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n",
- __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru);
+ dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x"
+ " pkey_reg now: %x old_pkey_reg: %x\n",
+ __func__, pkey, rights, flags, 0, __read_pkey_reg(),
+ old_pkey_reg);
return 0;
}
@@ -449,7 +454,7 @@ void pkey_disable_set(int pkey, int flags)
unsigned long syscall_flags = 0;
int ret;
int pkey_rights;
- u32 orig_pkru = rdpkru();
+ u32 orig_pkey_reg = read_pkey_reg();
dprintf1("START->%s(%d, 0x%x)\n", __func__,
pkey, flags);
@@ -465,9 +470,9 @@ void pkey_disable_set(int pkey, int flags)
ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
assert(!ret);
- /*pkru and flags have the same format */
- shadow_pkru |= flags << (pkey * 2);
- dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru);
+ /*pkey_reg and flags have the same format */
+ shadow_pkey_reg |= flags << (pkey * 2);
+ dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkey_reg);
pkey_assert(ret >= 0);
@@ -475,9 +480,9 @@ void pkey_disable_set(int pkey, int flags)
dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
pkey, pkey, pkey_rights);
- dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
+ dprintf1("%s(%d) pkey_reg: 0x%x\n", __func__, pkey, read_pkey_reg());
if (flags)
- pkey_assert(rdpkru() > orig_pkru);
+ pkey_assert(read_pkey_reg() > orig_pkey_reg);
dprintf1("END<---%s(%d, 0x%x)\n", __func__,
pkey, flags);
}
@@ -487,7 +492,7 @@ void pkey_disable_clear(int pkey, int flags)
unsigned long syscall_flags = 0;
int ret;
int pkey_rights = hw_pkey_get(pkey, syscall_flags);
- u32 orig_pkru = rdpkru();
+ u32 orig_pkey_reg = read_pkey_reg();
pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
@@ -498,17 +503,16 @@ void pkey_disable_clear(int pkey, int flags)
pkey_rights |= flags;
ret = hw_pkey_set(pkey, pkey_rights, 0);
- /* pkru and flags have the same format */
- shadow_pkru &= ~(flags << (pkey * 2));
+ shadow_pkey_reg &= ~(flags << (pkey * 2));
pkey_assert(ret >= 0);
pkey_rights = hw_pkey_get(pkey, syscall_flags);
dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
pkey, pkey, pkey_rights);
- dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
+ dprintf1("%s(%d) pkey_reg: 0x%x\n", __func__, pkey, read_pkey_reg());
if (flags)
- assert(rdpkru() > orig_pkru);
+ assert(read_pkey_reg() > orig_pkey_reg);
}
void pkey_write_allow(int pkey)
@@ -561,33 +565,38 @@ int alloc_pkey(void)
int ret;
unsigned long init_val = 0x0;
- dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n",
- __LINE__, __rdpkru(), shadow_pkru);
+ dprintf1("%s()::%d, pkey_reg: 0x%x shadow: %x\n", __func__,
+ __LINE__, __read_pkey_reg(), shadow_pkey_reg);
ret = sys_pkey_alloc(0, init_val);
/*
- * pkey_alloc() sets PKRU, so we need to reflect it in
- * shadow_pkru:
+ * pkey_alloc() sets PKEY register, so we need to reflect it in
+ * shadow_pkey_reg:
*/
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
+ dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n",
+ __func__, __LINE__, ret, __read_pkey_reg(),
+ shadow_pkey_reg);
if (ret) {
/* clear both the bits: */
- shadow_pkru &= ~(0x3 << (ret * 2));
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
+ shadow_pkey_reg &= ~(0x3 << (ret * 2));
+ dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n",
+ __func__,
+ __LINE__, ret, __read_pkey_reg(),
+ shadow_pkey_reg);
/*
* move the new state in from init_val
- * (remember, we cheated and init_val == pkru format)
+ * (remember, we cheated and init_val == pkey_reg format)
*/
- shadow_pkru |= (init_val << (ret * 2));
+ shadow_pkey_reg |= (init_val << (ret * 2));
}
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
- dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno);
+ dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n",
+ __func__, __LINE__, ret, __read_pkey_reg(),
+ shadow_pkey_reg);
+ dprintf1("%s()::%d errno: %d\n", __func__, __LINE__, errno);
/* for shadow checking: */
- rdpkru();
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
+ read_pkey_reg();
+ dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n",
+ __func__, __LINE__, ret, __read_pkey_reg(),
+ shadow_pkey_reg);
return ret;
}
@@ -638,8 +647,8 @@ int alloc_random_pkey(void)
free_ret = sys_pkey_free(alloced_pkeys[i]);
pkey_assert(!free_ret);
}
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
+ dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", __func__,
+ __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg);
return ret;
}
@@ -657,11 +666,13 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
if (nr_iterations-- < 0)
break;
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
+ dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n",
+ __func__, __LINE__, ret, __read_pkey_reg(),
+ shadow_pkey_reg);
sys_pkey_free(rpkey);
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
+ dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n",
+ __func__, __LINE__, ret, __read_pkey_reg(),
+ shadow_pkey_reg);
}
pkey_assert(pkey < NR_PKEYS);
@@ -669,8 +680,8 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
ptr, size, orig_prot, pkey, ret);
pkey_assert(!ret);
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
+ dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", __func__,
+ __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg);
return ret;
}
@@ -752,7 +763,7 @@ void free_pkey_malloc(void *ptr)
void *ptr;
int ret;
- rdpkru();
+ read_pkey_reg();
dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
size, prot, pkey);
pkey_assert(pkey < NR_PKEYS);
@@ -761,7 +772,7 @@ void free_pkey_malloc(void *ptr)
ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey);
pkey_assert(!ret);
record_pkey_malloc(ptr, size, prot);
- rdpkru();
+ read_pkey_reg();
dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr);
return ptr;
@@ -924,15 +935,15 @@ void setup_hugetlbfs(void)
return ret;
}
-int last_pkru_faults;
+int last_pkey_faults;
#define UNKNOWN_PKEY -2
-void expected_pk_fault(int pkey)
+void expected_pkey_fault(int pkey)
{
- dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n",
- __func__, last_pkru_faults, pkru_faults);
+ dprintf2("%s(): last_pkey_faults: %d pkey_faults: %d\n",
+ __func__, last_pkey_faults, pkey_faults);
dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey);
- pkey_assert(last_pkru_faults + 1 == pkru_faults);
+ pkey_assert(last_pkey_faults + 1 == pkey_faults);
/*
* For exec-only memory, we do not know the pkey in
* advance, so skip this check.
@@ -941,23 +952,23 @@ void expected_pk_fault(int pkey)
pkey_assert(last_si_pkey == pkey);
/*
- * The signal handler shold have cleared out PKRU to let the
+ * The signal handler shold have cleared out PKEY register to let the
* test program continue. We now have to restore it.
*/
- if (__rdpkru() != 0)
+ if (__read_pkey_reg() != 0)
pkey_assert(0);
- __wrpkru(shadow_pkru);
- dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n",
- __func__, shadow_pkru);
- last_pkru_faults = pkru_faults;
+ __write_pkey_reg(shadow_pkey_reg);
+ dprintf1("%s() set pkey_reg=%x to restore state after signal "
+ "nuked it\n", __func__, shadow_pkey_reg);
+ last_pkey_faults = pkey_faults;
last_si_pkey = -1;
}
-#define do_not_expect_pk_fault(msg) do { \
- if (last_pkru_faults != pkru_faults) \
- dprintf0("unexpected PK fault: %s\n", msg); \
- pkey_assert(last_pkru_faults == pkru_faults); \
+#define do_not_expect_pkey_fault(msg) do { \
+ if (last_pkey_faults != pkey_faults) \
+ dprintf0("unexpected PKey fault: %s\n", msg); \
+ pkey_assert(last_pkey_faults == pkey_faults); \
} while (0)
int test_fds[10] = { -1 };
@@ -1015,25 +1026,25 @@ void test_read_of_access_disabled_region(int *ptr, u16 pkey)
int ptr_contents;
dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr);
- rdpkru();
+ read_pkey_reg();
pkey_access_deny(pkey);
ptr_contents = read_ptr(ptr);
dprintf1("*ptr: %d\n", ptr_contents);
- expected_pk_fault(pkey);
+ expected_pkey_fault(pkey);
}
void test_write_of_write_disabled_region(int *ptr, u16 pkey)
{
dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey);
pkey_write_deny(pkey);
*ptr = __LINE__;
- expected_pk_fault(pkey);
+ expected_pkey_fault(pkey);
}
void test_write_of_access_disabled_region(int *ptr, u16 pkey)
{
dprintf1("disabling access to PKEY[%02d], doing write\n", pkey);
pkey_access_deny(pkey);
*ptr = __LINE__;
- expected_pk_fault(pkey);
+ expected_pkey_fault(pkey);
}
void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
{
@@ -1145,9 +1156,10 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
int new_pkey;
dprintf1("%s() alloc loop: %d\n", __func__, i);
new_pkey = alloc_pkey();
- dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, err, __rdpkru(), shadow_pkru);
- rdpkru(); /* for shadow checking */
+ dprintf4("%s()::%d, err: %d pkey_reg: 0x%x shadow: 0x%x\n",
+ __func__, __LINE__, err, __read_pkey_reg(),
+ shadow_pkey_reg);
+ read_pkey_reg(); /* for shadow checking */
dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC);
if ((new_pkey == -1) && (errno == ENOSPC)) {
dprintf2("%s() failed to allocate pkey after %d tries\n",
@@ -1180,7 +1192,7 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
for (i = 0; i < nr_allocated_pkeys; i++) {
err = sys_pkey_free(allocated_pkeys[i]);
pkey_assert(!err);
- rdpkru(); /* for shadow checking */
+ read_pkey_reg(); /* for shadow checking */
}
}
@@ -1266,7 +1278,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey)
pkey_assert(ret != -1);
/* Now access from the current task, and expect an exception: */
peek_result = read_ptr(ptr);
- expected_pk_fault(pkey);
+ expected_pkey_fault(pkey);
/*
* Try to access the NON-pkey-protected "plain_ptr" via ptrace:
@@ -1276,7 +1288,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey)
pkey_assert(ret != -1);
/* Now access from the current task, and expect NO exception: */
peek_result = read_ptr(plain_ptr);
- do_not_expect_pk_fault("read plain pointer after ptrace");
+ do_not_expect_pkey_fault("read plain pointer after ptrace");
ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0);
pkey_assert(ret != -1);
@@ -1326,17 +1338,17 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
pkey_assert(!ret);
pkey_access_deny(pkey);
- dprintf2("pkru: %x\n", rdpkru());
+ dprintf2("pkey_reg: %x\n", read_pkey_reg());
/*
* Make sure this is an *instruction* fault
*/
madvise(p1, PAGE_SIZE, MADV_DONTNEED);
lots_o_noops_around_write(&scratch);
- do_not_expect_pk_fault("executing on PROT_EXEC memory");
+ do_not_expect_pkey_fault("executing on PROT_EXEC memory");
ptr_contents = read_ptr(p1);
dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
- expected_pk_fault(pkey);
+ expected_pkey_fault(pkey);
}
void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
@@ -1357,15 +1369,15 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
pkey_assert(!ret);
- dprintf2("pkru: %x\n", rdpkru());
+ dprintf2("pkru: %x\n", read_pkey_reg());
/* Make sure this is an *instruction* fault */
madvise(p1, PAGE_SIZE, MADV_DONTNEED);
lots_o_noops_around_write(&scratch);
- do_not_expect_pk_fault("executing on PROT_EXEC memory");
+ do_not_expect_pkey_fault("executing on PROT_EXEC memory");
ptr_contents = read_ptr(p1);
dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
- expected_pk_fault(UNKNOWN_PKEY);
+ expected_pkey_fault(UNKNOWN_PKEY);
/*
* Put the memory back to non-PROT_EXEC. Should clear the
@@ -1379,7 +1391,7 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC);
pkey_assert(!ret);
ptr_contents = read_ptr(p1);
- do_not_expect_pk_fault("plain read on recently PROT_EXEC area");
+ do_not_expect_pkey_fault("plain read on recently PROT_EXEC area");
}
void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
@@ -1421,7 +1433,7 @@ void run_tests_once(void)
for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) {
int pkey;
- int orig_pkru_faults = pkru_faults;
+ int orig_pkey_faults = pkey_faults;
dprintf1("======================\n");
dprintf1("test %d preparing...\n", test_nr);
@@ -1436,8 +1448,8 @@ void run_tests_once(void)
free_pkey_malloc(ptr);
sys_pkey_free(pkey);
- dprintf1("pkru_faults: %d\n", pkru_faults);
- dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults);
+ dprintf1("pkey_faults: %d\n", pkey_faults);
+ dprintf1("orig_pkey_faults: %d\n", orig_pkey_faults);
tracing_off();
close_test_fds();
@@ -1450,7 +1462,7 @@ void run_tests_once(void)
void pkey_setup_shadow(void)
{
- shadow_pkru = __rdpkru();
+ shadow_pkey_reg = __read_pkey_reg();
}
int main(void)
@@ -1474,7 +1486,7 @@ int main(void)
}
pkey_setup_shadow();
- printf("startup pkru: %x\n", rdpkru());
+ printf("startup pkey_reg: %x\n", read_pkey_reg());
setup_hugetlbfs();
while (nr_iterations-- > 0)
--
1.7.1
^ permalink raw reply related
* [PATCH v14 01/22] selftests/x86: Move protecton key selftest to arch neutral directory
From: Ram Pai @ 2018-07-17 13:49 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1531835365-32387-1-git-send-email-linuxram@us.ibm.com>
cc: Dave Hansen <dave.hansen@intel.com>
cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
---
tools/testing/selftests/vm/.gitignore | 1 +
tools/testing/selftests/vm/Makefile | 1 +
tools/testing/selftests/vm/pkey-helpers.h | 219 ++++
tools/testing/selftests/vm/protection_keys.c | 1485 +++++++++++++++++++++++++
tools/testing/selftests/x86/.gitignore | 1 -
tools/testing/selftests/x86/pkey-helpers.h | 219 ----
tools/testing/selftests/x86/protection_keys.c | 1485 -------------------------
7 files changed, 1706 insertions(+), 1705 deletions(-)
create mode 100644 tools/testing/selftests/vm/pkey-helpers.h
create mode 100644 tools/testing/selftests/vm/protection_keys.c
delete mode 100644 tools/testing/selftests/x86/pkey-helpers.h
delete mode 100644 tools/testing/selftests/x86/protection_keys.c
diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index 342c7bc..0214fbf 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -12,3 +12,4 @@ mlock-random-test
virtual_address_range
gup_benchmark
va_128TBswitch
+protection_keys
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index fdefa22..9788a58 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -20,6 +20,7 @@ TEST_GEN_FILES += transhuge-stress
TEST_GEN_FILES += userfaultfd
TEST_GEN_FILES += va_128TBswitch
TEST_GEN_FILES += virtual_address_range
+TEST_GEN_FILES += protection_keys
TEST_PROGS := run_vmtests
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h
new file mode 100644
index 0000000..254e543
--- /dev/null
+++ b/tools/testing/selftests/vm/pkey-helpers.h
@@ -0,0 +1,219 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PKEYS_HELPER_H
+#define _PKEYS_HELPER_H
+#define _GNU_SOURCE
+#include <string.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+
+#define NR_PKEYS 16
+#define PKRU_BITS_PER_PKEY 2
+
+#ifndef DEBUG_LEVEL
+#define DEBUG_LEVEL 0
+#endif
+#define DPRINT_IN_SIGNAL_BUF_SIZE 4096
+extern int dprint_in_signal;
+extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
+static inline void sigsafe_printf(const char *format, ...)
+{
+ va_list ap;
+
+ if (!dprint_in_signal) {
+ va_start(ap, format);
+ vprintf(format, ap);
+ va_end(ap);
+ } else {
+ int ret;
+ /*
+ * No printf() functions are signal-safe.
+ * They deadlock easily. Write the format
+ * string to get some output, even if
+ * incomplete.
+ */
+ ret = write(1, format, strlen(format));
+ if (ret < 0)
+ exit(1);
+ }
+}
+#define dprintf_level(level, args...) do { \
+ if (level <= DEBUG_LEVEL) \
+ sigsafe_printf(args); \
+} while (0)
+#define dprintf0(args...) dprintf_level(0, args)
+#define dprintf1(args...) dprintf_level(1, args)
+#define dprintf2(args...) dprintf_level(2, args)
+#define dprintf3(args...) dprintf_level(3, args)
+#define dprintf4(args...) dprintf_level(4, args)
+
+extern unsigned int shadow_pkru;
+static inline unsigned int __rdpkru(void)
+{
+ unsigned int eax, edx;
+ unsigned int ecx = 0;
+ unsigned int pkru;
+
+ asm volatile(".byte 0x0f,0x01,0xee\n\t"
+ : "=a" (eax), "=d" (edx)
+ : "c" (ecx));
+ pkru = eax;
+ return pkru;
+}
+
+static inline unsigned int _rdpkru(int line)
+{
+ unsigned int pkru = __rdpkru();
+
+ dprintf4("rdpkru(line=%d) pkru: %x shadow: %x\n",
+ line, pkru, shadow_pkru);
+ assert(pkru == shadow_pkru);
+
+ return pkru;
+}
+
+#define rdpkru() _rdpkru(__LINE__)
+
+static inline void __wrpkru(unsigned int pkru)
+{
+ unsigned int eax = pkru;
+ unsigned int ecx = 0;
+ unsigned int edx = 0;
+
+ dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
+ asm volatile(".byte 0x0f,0x01,0xef\n\t"
+ : : "a" (eax), "c" (ecx), "d" (edx));
+ assert(pkru == __rdpkru());
+}
+
+static inline void wrpkru(unsigned int pkru)
+{
+ dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
+ /* will do the shadow check for us: */
+ rdpkru();
+ __wrpkru(pkru);
+ shadow_pkru = pkru;
+ dprintf4("%s(%08x) pkru: %08x\n", __func__, pkru, __rdpkru());
+}
+
+/*
+ * These are technically racy. since something could
+ * change PKRU between the read and the write.
+ */
+static inline void __pkey_access_allow(int pkey, int do_allow)
+{
+ unsigned int pkru = rdpkru();
+ int bit = pkey * 2;
+
+ if (do_allow)
+ pkru &= (1<<bit);
+ else
+ pkru |= (1<<bit);
+
+ dprintf4("pkru now: %08x\n", rdpkru());
+ wrpkru(pkru);
+}
+
+static inline void __pkey_write_allow(int pkey, int do_allow_write)
+{
+ long pkru = rdpkru();
+ int bit = pkey * 2 + 1;
+
+ if (do_allow_write)
+ pkru &= (1<<bit);
+ else
+ pkru |= (1<<bit);
+
+ wrpkru(pkru);
+ dprintf4("pkru now: %08x\n", rdpkru());
+}
+
+#define PROT_PKEY0 0x10 /* protection key value (bit 0) */
+#define PROT_PKEY1 0x20 /* protection key value (bit 1) */
+#define PROT_PKEY2 0x40 /* protection key value (bit 2) */
+#define PROT_PKEY3 0x80 /* protection key value (bit 3) */
+
+#define PAGE_SIZE 4096
+#define MB (1<<20)
+
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ /* ecx is often an input as well as an output. */
+ asm volatile(
+ "cpuid;"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (*eax), "2" (*ecx));
+}
+
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */
+#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */
+#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */
+
+static inline int cpu_has_pku(void)
+{
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+
+ eax = 0x7;
+ ecx = 0x0;
+ __cpuid(&eax, &ebx, &ecx, &edx);
+
+ if (!(ecx & X86_FEATURE_PKU)) {
+ dprintf2("cpu does not have PKU\n");
+ return 0;
+ }
+ if (!(ecx & X86_FEATURE_OSPKE)) {
+ dprintf2("cpu does not have OSPKE\n");
+ return 0;
+ }
+ return 1;
+}
+
+#define XSTATE_PKRU_BIT (9)
+#define XSTATE_PKRU 0x200
+
+int pkru_xstate_offset(void)
+{
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+ int xstate_offset;
+ int xstate_size;
+ unsigned long XSTATE_CPUID = 0xd;
+ int leaf;
+
+ /* assume that XSTATE_PKRU is set in XCR0 */
+ leaf = XSTATE_PKRU_BIT;
+ {
+ eax = XSTATE_CPUID;
+ ecx = leaf;
+ __cpuid(&eax, &ebx, &ecx, &edx);
+
+ if (leaf == XSTATE_PKRU_BIT) {
+ xstate_offset = ebx;
+ xstate_size = eax;
+ }
+ }
+
+ if (xstate_size == 0) {
+ printf("could not find size/offset of PKRU in xsave state\n");
+ return 0;
+ }
+
+ return xstate_offset;
+}
+
+#endif /* _PKEYS_HELPER_H */
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
new file mode 100644
index 0000000..460b4bd
--- /dev/null
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -0,0 +1,1485 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tests x86 Memory Protection Keys (see Documentation/x86/protection-keys.txt)
+ *
+ * There are examples in here of:
+ * * how to set protection keys on memory
+ * * how to set/clear bits in PKRU (the rights register)
+ * * how to handle SEGV_PKRU signals and extract pkey-relevant
+ * information from the siginfo
+ *
+ * Things to add:
+ * make sure KSM and KSM COW breaking works
+ * prefault pages in at malloc, or not
+ * protect MPX bounds tables with protection keys?
+ * make sure VMA splitting/merging is working correctly
+ * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys
+ * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel
+ * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks
+ *
+ * Compile like this:
+ * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
+ * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
+ */
+#define _GNU_SOURCE
+#include <errno.h>
+#include <linux/futex.h>
+#include <sys/time.h>
+#include <sys/syscall.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/ptrace.h>
+#include <setjmp.h>
+
+#include "pkey-helpers.h"
+
+int iteration_nr = 1;
+int test_nr;
+
+unsigned int shadow_pkru;
+
+#define HPAGE_SIZE (1UL<<21)
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
+#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
+#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
+#define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to))
+#define __stringify_1(x...) #x
+#define __stringify(x...) __stringify_1(x)
+
+#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
+
+int dprint_in_signal;
+char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
+
+extern void abort_hooks(void);
+#define pkey_assert(condition) do { \
+ if (!(condition)) { \
+ dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \
+ __FILE__, __LINE__, \
+ test_nr, iteration_nr); \
+ dprintf0("errno at assert: %d", errno); \
+ abort_hooks(); \
+ exit(__LINE__); \
+ } \
+} while (0)
+
+void cat_into_file(char *str, char *file)
+{
+ int fd = open(file, O_RDWR);
+ int ret;
+
+ dprintf2("%s(): writing '%s' to '%s'\n", __func__, str, file);
+ /*
+ * these need to be raw because they are called under
+ * pkey_assert()
+ */
+ if (fd < 0) {
+ fprintf(stderr, "error opening '%s'\n", str);
+ perror("error: ");
+ exit(__LINE__);
+ }
+
+ ret = write(fd, str, strlen(str));
+ if (ret != strlen(str)) {
+ perror("write to file failed");
+ fprintf(stderr, "filename: '%s' str: '%s'\n", file, str);
+ exit(__LINE__);
+ }
+ close(fd);
+}
+
+#if CONTROL_TRACING > 0
+static int warned_tracing;
+int tracing_root_ok(void)
+{
+ if (geteuid() != 0) {
+ if (!warned_tracing)
+ fprintf(stderr, "WARNING: not run as root, "
+ "can not do tracing control\n");
+ warned_tracing = 1;
+ return 0;
+ }
+ return 1;
+}
+#endif
+
+void tracing_on(void)
+{
+#if CONTROL_TRACING > 0
+#define TRACEDIR "/sys/kernel/debug/tracing"
+ char pidstr[32];
+
+ if (!tracing_root_ok())
+ return;
+
+ sprintf(pidstr, "%d", getpid());
+ cat_into_file("0", TRACEDIR "/tracing_on");
+ cat_into_file("\n", TRACEDIR "/trace");
+ if (1) {
+ cat_into_file("function_graph", TRACEDIR "/current_tracer");
+ cat_into_file("1", TRACEDIR "/options/funcgraph-proc");
+ } else {
+ cat_into_file("nop", TRACEDIR "/current_tracer");
+ }
+ cat_into_file(pidstr, TRACEDIR "/set_ftrace_pid");
+ cat_into_file("1", TRACEDIR "/tracing_on");
+ dprintf1("enabled tracing\n");
+#endif
+}
+
+void tracing_off(void)
+{
+#if CONTROL_TRACING > 0
+ if (!tracing_root_ok())
+ return;
+ cat_into_file("0", "/sys/kernel/debug/tracing/tracing_on");
+#endif
+}
+
+void abort_hooks(void)
+{
+ fprintf(stderr, "running %s()...\n", __func__);
+ tracing_off();
+#ifdef SLEEP_ON_ABORT
+ sleep(SLEEP_ON_ABORT);
+#endif
+}
+
+static inline void __page_o_noops(void)
+{
+ /* 8-bytes of instruction * 512 bytes = 1 page */
+ asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr");
+}
+
+/*
+ * This attempts to have roughly a page of instructions followed by a few
+ * instructions that do a write, and another page of instructions. That
+ * way, we are pretty sure that the write is in the second page of
+ * instructions and has at least a page of padding behind it.
+ *
+ * *That* lets us be sure to madvise() away the write instruction, which
+ * will then fault, which makes sure that the fault code handles
+ * execute-only memory properly.
+ */
+__attribute__((__aligned__(PAGE_SIZE)))
+void lots_o_noops_around_write(int *write_to_me)
+{
+ dprintf3("running %s()\n", __func__);
+ __page_o_noops();
+ /* Assume this happens in the second page of instructions: */
+ *write_to_me = __LINE__;
+ /* pad out by another page: */
+ __page_o_noops();
+ dprintf3("%s() done\n", __func__);
+}
+
+/* Define some kernel-like types */
+#define u8 uint8_t
+#define u16 uint16_t
+#define u32 uint32_t
+#define u64 uint64_t
+
+#ifdef __i386__
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key 380
+#endif
+
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc 381
+# define SYS_pkey_free 382
+#endif
+
+#define REG_IP_IDX REG_EIP
+#define si_pkey_offset 0x14
+
+#else
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key 329
+#endif
+
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc 330
+# define SYS_pkey_free 331
+#endif
+
+#define REG_IP_IDX REG_RIP
+#define si_pkey_offset 0x20
+
+#endif
+
+void dump_mem(void *dumpme, int len_bytes)
+{
+ char *c = (void *)dumpme;
+ int i;
+
+ for (i = 0; i < len_bytes; i += sizeof(u64)) {
+ u64 *ptr = (u64 *)(c + i);
+ dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr);
+ }
+}
+
+/* Failed address bound checks: */
+#ifndef SEGV_BNDERR
+# define SEGV_BNDERR 3
+#endif
+
+#ifndef SEGV_PKUERR
+# define SEGV_PKUERR 4
+#endif
+
+static char *si_code_str(int si_code)
+{
+ if (si_code == SEGV_MAPERR)
+ return "SEGV_MAPERR";
+ if (si_code == SEGV_ACCERR)
+ return "SEGV_ACCERR";
+ if (si_code == SEGV_BNDERR)
+ return "SEGV_BNDERR";
+ if (si_code == SEGV_PKUERR)
+ return "SEGV_PKUERR";
+ return "UNKNOWN";
+}
+
+int pkru_faults;
+int last_si_pkey = -1;
+void signal_handler(int signum, siginfo_t *si, void *vucontext)
+{
+ ucontext_t *uctxt = vucontext;
+ int trapno;
+ unsigned long ip;
+ char *fpregs;
+ u32 *pkru_ptr;
+ u64 siginfo_pkey;
+ u32 *si_pkey_ptr;
+ int pkru_offset;
+ fpregset_t fpregset;
+
+ dprint_in_signal = 1;
+ dprintf1(">>>>===============SIGSEGV============================\n");
+ dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__,
+ __rdpkru(), shadow_pkru);
+
+ trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
+ ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
+ fpregset = uctxt->uc_mcontext.fpregs;
+ fpregs = (void *)fpregset;
+
+ dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__,
+ trapno, ip, si_code_str(si->si_code), si->si_code);
+#ifdef __i386__
+ /*
+ * 32-bit has some extra padding so that userspace can tell whether
+ * the XSTATE header is present in addition to the "legacy" FPU
+ * state. We just assume that it is here.
+ */
+ fpregs += 0x70;
+#endif
+ pkru_offset = pkru_xstate_offset();
+ pkru_ptr = (void *)(&fpregs[pkru_offset]);
+
+ dprintf1("siginfo: %p\n", si);
+ dprintf1(" fpregs: %p\n", fpregs);
+ /*
+ * If we got a PKRU fault, we *HAVE* to have at least one bit set in
+ * here.
+ */
+ dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset());
+ if (DEBUG_LEVEL > 4)
+ dump_mem(pkru_ptr - 128, 256);
+ pkey_assert(*pkru_ptr);
+
+ if ((si->si_code == SEGV_MAPERR) ||
+ (si->si_code == SEGV_ACCERR) ||
+ (si->si_code == SEGV_BNDERR)) {
+ printf("non-PK si_code, exiting...\n");
+ exit(4);
+ }
+
+ si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
+ dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
+ dump_mem((u8 *)si_pkey_ptr - 8, 24);
+ siginfo_pkey = *si_pkey_ptr;
+ pkey_assert(siginfo_pkey < NR_PKEYS);
+ last_si_pkey = siginfo_pkey;
+
+ dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr);
+ /* need __rdpkru() version so we do not do shadow_pkru checking */
+ dprintf1("signal pkru from pkru: %08x\n", __rdpkru());
+ dprintf1("pkey from siginfo: %jx\n", siginfo_pkey);
+ *(u64 *)pkru_ptr = 0x00000000;
+ dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
+ pkru_faults++;
+ dprintf1("<<<<==================================================\n");
+ dprint_in_signal = 0;
+}
+
+int wait_all_children(void)
+{
+ int status;
+ return waitpid(-1, &status, 0);
+}
+
+void sig_chld(int x)
+{
+ dprint_in_signal = 1;
+ dprintf2("[%d] SIGCHLD: %d\n", getpid(), x);
+ dprint_in_signal = 0;
+}
+
+void setup_sigsegv_handler(void)
+{
+ int r, rs;
+ struct sigaction newact;
+ struct sigaction oldact;
+
+ /* #PF is mapped to sigsegv */
+ int signum = SIGSEGV;
+
+ newact.sa_handler = 0;
+ newact.sa_sigaction = signal_handler;
+
+ /*sigset_t - signals to block while in the handler */
+ /* get the old signal mask. */
+ rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask);
+ pkey_assert(rs == 0);
+
+ /* call sa_sigaction, not sa_handler*/
+ newact.sa_flags = SA_SIGINFO;
+
+ newact.sa_restorer = 0; /* void(*)(), obsolete */
+ r = sigaction(signum, &newact, &oldact);
+ r = sigaction(SIGALRM, &newact, &oldact);
+ pkey_assert(r == 0);
+}
+
+void setup_handlers(void)
+{
+ signal(SIGCHLD, &sig_chld);
+ setup_sigsegv_handler();
+}
+
+pid_t fork_lazy_child(void)
+{
+ pid_t forkret;
+
+ forkret = fork();
+ pkey_assert(forkret >= 0);
+ dprintf3("[%d] fork() ret: %d\n", getpid(), forkret);
+
+ if (!forkret) {
+ /* in the child */
+ while (1) {
+ dprintf1("child sleeping...\n");
+ sleep(30);
+ }
+ }
+ return forkret;
+}
+
+#ifndef PKEY_DISABLE_ACCESS
+# define PKEY_DISABLE_ACCESS 0x1
+#endif
+
+#ifndef PKEY_DISABLE_WRITE
+# define PKEY_DISABLE_WRITE 0x2
+#endif
+
+static u32 hw_pkey_get(int pkey, unsigned long flags)
+{
+ u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
+ u32 pkru = __rdpkru();
+ u32 shifted_pkru;
+ u32 masked_pkru;
+
+ dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n",
+ __func__, pkey, flags, 0, 0);
+ dprintf2("%s() raw pkru: %x\n", __func__, pkru);
+
+ shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY));
+ dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru);
+ masked_pkru = shifted_pkru & mask;
+ dprintf2("%s() masked pkru: %x\n", __func__, masked_pkru);
+ /*
+ * shift down the relevant bits to the lowest two, then
+ * mask off all the other high bits.
+ */
+ return masked_pkru;
+}
+
+static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
+{
+ u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
+ u32 old_pkru = __rdpkru();
+ u32 new_pkru;
+
+ /* make sure that 'rights' only contains the bits we expect: */
+ assert(!(rights & ~mask));
+
+ /* copy old pkru */
+ new_pkru = old_pkru;
+ /* mask out bits from pkey in old value: */
+ new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY));
+ /* OR in new bits for pkey: */
+ new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY));
+
+ __wrpkru(new_pkru);
+
+ dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n",
+ __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru);
+ return 0;
+}
+
+void pkey_disable_set(int pkey, int flags)
+{
+ unsigned long syscall_flags = 0;
+ int ret;
+ int pkey_rights;
+ u32 orig_pkru = rdpkru();
+
+ dprintf1("START->%s(%d, 0x%x)\n", __func__,
+ pkey, flags);
+ pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
+
+ pkey_rights = hw_pkey_get(pkey, syscall_flags);
+
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+ pkey, pkey, pkey_rights);
+ pkey_assert(pkey_rights >= 0);
+
+ pkey_rights |= flags;
+
+ ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
+ assert(!ret);
+ /*pkru and flags have the same format */
+ shadow_pkru |= flags << (pkey * 2);
+ dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru);
+
+ pkey_assert(ret >= 0);
+
+ pkey_rights = hw_pkey_get(pkey, syscall_flags);
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+ pkey, pkey, pkey_rights);
+
+ dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
+ if (flags)
+ pkey_assert(rdpkru() > orig_pkru);
+ dprintf1("END<---%s(%d, 0x%x)\n", __func__,
+ pkey, flags);
+}
+
+void pkey_disable_clear(int pkey, int flags)
+{
+ unsigned long syscall_flags = 0;
+ int ret;
+ int pkey_rights = hw_pkey_get(pkey, syscall_flags);
+ u32 orig_pkru = rdpkru();
+
+ pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
+
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+ pkey, pkey, pkey_rights);
+ pkey_assert(pkey_rights >= 0);
+
+ pkey_rights |= flags;
+
+ ret = hw_pkey_set(pkey, pkey_rights, 0);
+ /* pkru and flags have the same format */
+ shadow_pkru &= ~(flags << (pkey * 2));
+ pkey_assert(ret >= 0);
+
+ pkey_rights = hw_pkey_get(pkey, syscall_flags);
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+ pkey, pkey, pkey_rights);
+
+ dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
+ if (flags)
+ assert(rdpkru() > orig_pkru);
+}
+
+void pkey_write_allow(int pkey)
+{
+ pkey_disable_clear(pkey, PKEY_DISABLE_WRITE);
+}
+void pkey_write_deny(int pkey)
+{
+ pkey_disable_set(pkey, PKEY_DISABLE_WRITE);
+}
+void pkey_access_allow(int pkey)
+{
+ pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS);
+}
+void pkey_access_deny(int pkey)
+{
+ pkey_disable_set(pkey, PKEY_DISABLE_ACCESS);
+}
+
+int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
+ unsigned long pkey)
+{
+ int sret;
+
+ dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__,
+ ptr, size, orig_prot, pkey);
+
+ errno = 0;
+ sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey);
+ if (errno) {
+ dprintf2("SYS_mprotect_key sret: %d\n", sret);
+ dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot);
+ dprintf2("SYS_mprotect_key failed, errno: %d\n", errno);
+ if (DEBUG_LEVEL >= 2)
+ perror("SYS_mprotect_pkey");
+ }
+ return sret;
+}
+
+int sys_pkey_alloc(unsigned long flags, unsigned long init_val)
+{
+ int ret = syscall(SYS_pkey_alloc, flags, init_val);
+ dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n",
+ __func__, flags, init_val, ret, errno);
+ return ret;
+}
+
+int alloc_pkey(void)
+{
+ int ret;
+ unsigned long init_val = 0x0;
+
+ dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n",
+ __LINE__, __rdpkru(), shadow_pkru);
+ ret = sys_pkey_alloc(0, init_val);
+ /*
+ * pkey_alloc() sets PKRU, so we need to reflect it in
+ * shadow_pkru:
+ */
+ dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ if (ret) {
+ /* clear both the bits: */
+ shadow_pkru &= ~(0x3 << (ret * 2));
+ dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ /*
+ * move the new state in from init_val
+ * (remember, we cheated and init_val == pkru format)
+ */
+ shadow_pkru |= (init_val << (ret * 2));
+ }
+ dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno);
+ /* for shadow checking: */
+ rdpkru();
+ dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ return ret;
+}
+
+int sys_pkey_free(unsigned long pkey)
+{
+ int ret = syscall(SYS_pkey_free, pkey);
+ dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret);
+ return ret;
+}
+
+/*
+ * I had a bug where pkey bits could be set by mprotect() but
+ * not cleared. This ensures we get lots of random bit sets
+ * and clears on the vma and pte pkey bits.
+ */
+int alloc_random_pkey(void)
+{
+ int max_nr_pkey_allocs;
+ int ret;
+ int i;
+ int alloced_pkeys[NR_PKEYS];
+ int nr_alloced = 0;
+ int random_index;
+ memset(alloced_pkeys, 0, sizeof(alloced_pkeys));
+
+ /* allocate every possible key and make a note of which ones we got */
+ max_nr_pkey_allocs = NR_PKEYS;
+ max_nr_pkey_allocs = 1;
+ for (i = 0; i < max_nr_pkey_allocs; i++) {
+ int new_pkey = alloc_pkey();
+ if (new_pkey < 0)
+ break;
+ alloced_pkeys[nr_alloced++] = new_pkey;
+ }
+
+ pkey_assert(nr_alloced > 0);
+ /* select a random one out of the allocated ones */
+ random_index = rand() % nr_alloced;
+ ret = alloced_pkeys[random_index];
+ /* now zero it out so we don't free it next */
+ alloced_pkeys[random_index] = 0;
+
+ /* go through the allocated ones that we did not want and free them */
+ for (i = 0; i < nr_alloced; i++) {
+ int free_ret;
+ if (!alloced_pkeys[i])
+ continue;
+ free_ret = sys_pkey_free(alloced_pkeys[i]);
+ pkey_assert(!free_ret);
+ }
+ dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ return ret;
+}
+
+int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
+ unsigned long pkey)
+{
+ int nr_iterations = random() % 100;
+ int ret;
+
+ while (0) {
+ int rpkey = alloc_random_pkey();
+ ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
+ dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
+ ptr, size, orig_prot, pkey, ret);
+ if (nr_iterations-- < 0)
+ break;
+
+ dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ sys_pkey_free(rpkey);
+ dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ }
+ pkey_assert(pkey < NR_PKEYS);
+
+ ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
+ dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
+ ptr, size, orig_prot, pkey, ret);
+ pkey_assert(!ret);
+ dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ return ret;
+}
+
+struct pkey_malloc_record {
+ void *ptr;
+ long size;
+ int prot;
+};
+struct pkey_malloc_record *pkey_malloc_records;
+struct pkey_malloc_record *pkey_last_malloc_record;
+long nr_pkey_malloc_records;
+void record_pkey_malloc(void *ptr, long size, int prot)
+{
+ long i;
+ struct pkey_malloc_record *rec = NULL;
+
+ for (i = 0; i < nr_pkey_malloc_records; i++) {
+ rec = &pkey_malloc_records[i];
+ /* find a free record */
+ if (rec)
+ break;
+ }
+ if (!rec) {
+ /* every record is full */
+ size_t old_nr_records = nr_pkey_malloc_records;
+ size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1);
+ size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record);
+ dprintf2("new_nr_records: %zd\n", new_nr_records);
+ dprintf2("new_size: %zd\n", new_size);
+ pkey_malloc_records = realloc(pkey_malloc_records, new_size);
+ pkey_assert(pkey_malloc_records != NULL);
+ rec = &pkey_malloc_records[nr_pkey_malloc_records];
+ /*
+ * realloc() does not initialize memory, so zero it from
+ * the first new record all the way to the end.
+ */
+ for (i = 0; i < new_nr_records - old_nr_records; i++)
+ memset(rec + i, 0, sizeof(*rec));
+ }
+ dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n",
+ (int)(rec - pkey_malloc_records), rec, ptr, size);
+ rec->ptr = ptr;
+ rec->size = size;
+ rec->prot = prot;
+ pkey_last_malloc_record = rec;
+ nr_pkey_malloc_records++;
+}
+
+void free_pkey_malloc(void *ptr)
+{
+ long i;
+ int ret;
+ dprintf3("%s(%p)\n", __func__, ptr);
+ for (i = 0; i < nr_pkey_malloc_records; i++) {
+ struct pkey_malloc_record *rec = &pkey_malloc_records[i];
+ dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n",
+ ptr, i, rec, rec->ptr, rec->size);
+ if ((ptr < rec->ptr) ||
+ (ptr >= rec->ptr + rec->size))
+ continue;
+
+ dprintf3("found ptr %p at record[%ld/%p]: {%p, %ld}\n",
+ ptr, i, rec, rec->ptr, rec->size);
+ nr_pkey_malloc_records--;
+ ret = munmap(rec->ptr, rec->size);
+ dprintf3("munmap ret: %d\n", ret);
+ pkey_assert(!ret);
+ dprintf3("clearing rec->ptr, rec: %p\n", rec);
+ rec->ptr = NULL;
+ dprintf3("done clearing rec->ptr, rec: %p\n", rec);
+ return;
+ }
+ pkey_assert(false);
+}
+
+
+void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
+{
+ void *ptr;
+ int ret;
+
+ rdpkru();
+ dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
+ size, prot, pkey);
+ pkey_assert(pkey < NR_PKEYS);
+ ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ pkey_assert(ptr != (void *)-1);
+ ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey);
+ pkey_assert(!ret);
+ record_pkey_malloc(ptr, size, prot);
+ rdpkru();
+
+ dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr);
+ return ptr;
+}
+
+void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
+{
+ int ret;
+ void *ptr;
+
+ dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
+ size, prot, pkey);
+ /*
+ * Guarantee we can fit at least one huge page in the resulting
+ * allocation by allocating space for 2:
+ */
+ size = ALIGN_UP(size, HPAGE_SIZE * 2);
+ ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ pkey_assert(ptr != (void *)-1);
+ record_pkey_malloc(ptr, size, prot);
+ mprotect_pkey(ptr, size, prot, pkey);
+
+ dprintf1("unaligned ptr: %p\n", ptr);
+ ptr = ALIGN_PTR_UP(ptr, HPAGE_SIZE);
+ dprintf1(" aligned ptr: %p\n", ptr);
+ ret = madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE);
+ dprintf1("MADV_HUGEPAGE ret: %d\n", ret);
+ ret = madvise(ptr, HPAGE_SIZE, MADV_WILLNEED);
+ dprintf1("MADV_WILLNEED ret: %d\n", ret);
+ memset(ptr, 0, HPAGE_SIZE);
+
+ dprintf1("mmap()'d thp for pkey %d @ %p\n", pkey, ptr);
+ return ptr;
+}
+
+int hugetlb_setup_ok;
+#define GET_NR_HUGE_PAGES 10
+void setup_hugetlbfs(void)
+{
+ int err;
+ int fd;
+ char buf[] = "123";
+
+ if (geteuid() != 0) {
+ fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n");
+ return;
+ }
+
+ cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages");
+
+ /*
+ * Now go make sure that we got the pages and that they
+ * are 2M pages. Someone might have made 1G the default.
+ */
+ fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY);
+ if (fd < 0) {
+ perror("opening sysfs 2M hugetlb config");
+ return;
+ }
+
+ /* -1 to guarantee leaving the trailing \0 */
+ err = read(fd, buf, sizeof(buf)-1);
+ close(fd);
+ if (err <= 0) {
+ perror("reading sysfs 2M hugetlb config");
+ return;
+ }
+
+ if (atoi(buf) != GET_NR_HUGE_PAGES) {
+ fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n",
+ buf, GET_NR_HUGE_PAGES);
+ return;
+ }
+
+ hugetlb_setup_ok = 1;
+}
+
+void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
+{
+ void *ptr;
+ int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB;
+
+ if (!hugetlb_setup_ok)
+ return PTR_ERR_ENOTSUP;
+
+ dprintf1("doing %s(%ld, %x, %x)\n", __func__, size, prot, pkey);
+ size = ALIGN_UP(size, HPAGE_SIZE * 2);
+ pkey_assert(pkey < NR_PKEYS);
+ ptr = mmap(NULL, size, PROT_NONE, flags, -1, 0);
+ pkey_assert(ptr != (void *)-1);
+ mprotect_pkey(ptr, size, prot, pkey);
+
+ record_pkey_malloc(ptr, size, prot);
+
+ dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr);
+ return ptr;
+}
+
+void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey)
+{
+ void *ptr;
+ int fd;
+
+ dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
+ size, prot, pkey);
+ pkey_assert(pkey < NR_PKEYS);
+ fd = open("/dax/foo", O_RDWR);
+ pkey_assert(fd >= 0);
+
+ ptr = mmap(0, size, prot, MAP_SHARED, fd, 0);
+ pkey_assert(ptr != (void *)-1);
+
+ mprotect_pkey(ptr, size, prot, pkey);
+
+ record_pkey_malloc(ptr, size, prot);
+
+ dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr);
+ close(fd);
+ return ptr;
+}
+
+void *(*pkey_malloc[])(long size, int prot, u16 pkey) = {
+
+ malloc_pkey_with_mprotect,
+ malloc_pkey_anon_huge,
+ malloc_pkey_hugetlb
+/* can not do direct with the pkey_mprotect() API:
+ malloc_pkey_mmap_direct,
+ malloc_pkey_mmap_dax,
+*/
+};
+
+void *malloc_pkey(long size, int prot, u16 pkey)
+{
+ void *ret;
+ static int malloc_type;
+ int nr_malloc_types = ARRAY_SIZE(pkey_malloc);
+
+ pkey_assert(pkey < NR_PKEYS);
+
+ while (1) {
+ pkey_assert(malloc_type < nr_malloc_types);
+
+ ret = pkey_malloc[malloc_type](size, prot, pkey);
+ pkey_assert(ret != (void *)-1);
+
+ malloc_type++;
+ if (malloc_type >= nr_malloc_types)
+ malloc_type = (random()%nr_malloc_types);
+
+ /* try again if the malloc_type we tried is unsupported */
+ if (ret == PTR_ERR_ENOTSUP)
+ continue;
+
+ break;
+ }
+
+ dprintf3("%s(%ld, prot=%x, pkey=%x) returning: %p\n", __func__,
+ size, prot, pkey, ret);
+ return ret;
+}
+
+int last_pkru_faults;
+#define UNKNOWN_PKEY -2
+void expected_pk_fault(int pkey)
+{
+ dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n",
+ __func__, last_pkru_faults, pkru_faults);
+ dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey);
+ pkey_assert(last_pkru_faults + 1 == pkru_faults);
+
+ /*
+ * For exec-only memory, we do not know the pkey in
+ * advance, so skip this check.
+ */
+ if (pkey != UNKNOWN_PKEY)
+ pkey_assert(last_si_pkey == pkey);
+
+ /*
+ * The signal handler shold have cleared out PKRU to let the
+ * test program continue. We now have to restore it.
+ */
+ if (__rdpkru() != 0)
+ pkey_assert(0);
+
+ __wrpkru(shadow_pkru);
+ dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n",
+ __func__, shadow_pkru);
+ last_pkru_faults = pkru_faults;
+ last_si_pkey = -1;
+}
+
+#define do_not_expect_pk_fault(msg) do { \
+ if (last_pkru_faults != pkru_faults) \
+ dprintf0("unexpected PK fault: %s\n", msg); \
+ pkey_assert(last_pkru_faults == pkru_faults); \
+} while (0)
+
+int test_fds[10] = { -1 };
+int nr_test_fds;
+void __save_test_fd(int fd)
+{
+ pkey_assert(fd >= 0);
+ pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds));
+ test_fds[nr_test_fds] = fd;
+ nr_test_fds++;
+}
+
+int get_test_read_fd(void)
+{
+ int test_fd = open("/etc/passwd", O_RDONLY);
+ __save_test_fd(test_fd);
+ return test_fd;
+}
+
+void close_test_fds(void)
+{
+ int i;
+
+ for (i = 0; i < nr_test_fds; i++) {
+ if (test_fds[i] < 0)
+ continue;
+ close(test_fds[i]);
+ test_fds[i] = -1;
+ }
+ nr_test_fds = 0;
+}
+
+#define barrier() __asm__ __volatile__("": : :"memory")
+__attribute__((noinline)) int read_ptr(int *ptr)
+{
+ /*
+ * Keep GCC from optimizing this away somehow
+ */
+ barrier();
+ return *ptr;
+}
+
+void test_read_of_write_disabled_region(int *ptr, u16 pkey)
+{
+ int ptr_contents;
+
+ dprintf1("disabling write access to PKEY[1], doing read\n");
+ pkey_write_deny(pkey);
+ ptr_contents = read_ptr(ptr);
+ dprintf1("*ptr: %d\n", ptr_contents);
+ dprintf1("\n");
+}
+void test_read_of_access_disabled_region(int *ptr, u16 pkey)
+{
+ int ptr_contents;
+
+ dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr);
+ rdpkru();
+ pkey_access_deny(pkey);
+ ptr_contents = read_ptr(ptr);
+ dprintf1("*ptr: %d\n", ptr_contents);
+ expected_pk_fault(pkey);
+}
+void test_write_of_write_disabled_region(int *ptr, u16 pkey)
+{
+ dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey);
+ pkey_write_deny(pkey);
+ *ptr = __LINE__;
+ expected_pk_fault(pkey);
+}
+void test_write_of_access_disabled_region(int *ptr, u16 pkey)
+{
+ dprintf1("disabling access to PKEY[%02d], doing write\n", pkey);
+ pkey_access_deny(pkey);
+ *ptr = __LINE__;
+ expected_pk_fault(pkey);
+}
+void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
+{
+ int ret;
+ int test_fd = get_test_read_fd();
+
+ dprintf1("disabling access to PKEY[%02d], "
+ "having kernel read() to buffer\n", pkey);
+ pkey_access_deny(pkey);
+ ret = read(test_fd, ptr, 1);
+ dprintf1("read ret: %d\n", ret);
+ pkey_assert(ret);
+}
+void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey)
+{
+ int ret;
+ int test_fd = get_test_read_fd();
+
+ pkey_write_deny(pkey);
+ ret = read(test_fd, ptr, 100);
+ dprintf1("read ret: %d\n", ret);
+ if (ret < 0 && (DEBUG_LEVEL > 0))
+ perror("verbose read result (OK for this to be bad)");
+ pkey_assert(ret);
+}
+
+void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey)
+{
+ int pipe_ret, vmsplice_ret;
+ struct iovec iov;
+ int pipe_fds[2];
+
+ pipe_ret = pipe(pipe_fds);
+
+ pkey_assert(pipe_ret == 0);
+ dprintf1("disabling access to PKEY[%02d], "
+ "having kernel vmsplice from buffer\n", pkey);
+ pkey_access_deny(pkey);
+ iov.iov_base = ptr;
+ iov.iov_len = PAGE_SIZE;
+ vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT);
+ dprintf1("vmsplice() ret: %d\n", vmsplice_ret);
+ pkey_assert(vmsplice_ret == -1);
+
+ close(pipe_fds[0]);
+ close(pipe_fds[1]);
+}
+
+void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey)
+{
+ int ignored = 0xdada;
+ int futex_ret;
+ int some_int = __LINE__;
+
+ dprintf1("disabling write to PKEY[%02d], "
+ "doing futex gunk in buffer\n", pkey);
+ *ptr = some_int;
+ pkey_write_deny(pkey);
+ futex_ret = syscall(SYS_futex, ptr, FUTEX_WAIT, some_int-1, NULL,
+ &ignored, ignored);
+ if (DEBUG_LEVEL > 0)
+ perror("futex");
+ dprintf1("futex() ret: %d\n", futex_ret);
+}
+
+/* Assumes that all pkeys other than 'pkey' are unallocated */
+void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
+{
+ int err;
+ int i;
+
+ /* Note: 0 is the default pkey, so don't mess with it */
+ for (i = 1; i < NR_PKEYS; i++) {
+ if (pkey == i)
+ continue;
+
+ dprintf1("trying get/set/free to non-allocated pkey: %2d\n", i);
+ err = sys_pkey_free(i);
+ pkey_assert(err);
+
+ err = sys_pkey_free(i);
+ pkey_assert(err);
+
+ err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, i);
+ pkey_assert(err);
+ }
+}
+
+/* Assumes that all pkeys other than 'pkey' are unallocated */
+void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
+{
+ int err;
+ int bad_pkey = NR_PKEYS+99;
+
+ /* pass a known-invalid pkey in: */
+ err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey);
+ pkey_assert(err);
+}
+
+/* Assumes that all pkeys other than 'pkey' are unallocated */
+void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
+{
+ int err;
+ int allocated_pkeys[NR_PKEYS] = {0};
+ int nr_allocated_pkeys = 0;
+ int i;
+
+ for (i = 0; i < NR_PKEYS*2; i++) {
+ int new_pkey;
+ dprintf1("%s() alloc loop: %d\n", __func__, i);
+ new_pkey = alloc_pkey();
+ dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+ __LINE__, err, __rdpkru(), shadow_pkru);
+ rdpkru(); /* for shadow checking */
+ dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC);
+ if ((new_pkey == -1) && (errno == ENOSPC)) {
+ dprintf2("%s() failed to allocate pkey after %d tries\n",
+ __func__, nr_allocated_pkeys);
+ break;
+ }
+ pkey_assert(nr_allocated_pkeys < NR_PKEYS);
+ allocated_pkeys[nr_allocated_pkeys++] = new_pkey;
+ }
+
+ dprintf3("%s()::%d\n", __func__, __LINE__);
+
+ /*
+ * ensure it did not reach the end of the loop without
+ * failure:
+ */
+ pkey_assert(i < NR_PKEYS*2);
+
+ /*
+ * There are 16 pkeys supported in hardware. Three are
+ * allocated by the time we get here:
+ * 1. The default key (0)
+ * 2. One possibly consumed by an execute-only mapping.
+ * 3. One allocated by the test code and passed in via
+ * 'pkey' to this function.
+ * Ensure that we can allocate at least another 13 (16-3).
+ */
+ pkey_assert(i >= NR_PKEYS-3);
+
+ for (i = 0; i < nr_allocated_pkeys; i++) {
+ err = sys_pkey_free(allocated_pkeys[i]);
+ pkey_assert(!err);
+ rdpkru(); /* for shadow checking */
+ }
+}
+
+/*
+ * pkey 0 is special. It is allocated by default, so you do not
+ * have to call pkey_alloc() to use it first. Make sure that it
+ * is usable.
+ */
+void test_mprotect_with_pkey_0(int *ptr, u16 pkey)
+{
+ long size;
+ int prot;
+
+ assert(pkey_last_malloc_record);
+ size = pkey_last_malloc_record->size;
+ /*
+ * This is a bit of a hack. But mprotect() requires
+ * huge-page-aligned sizes when operating on hugetlbfs.
+ * So, make sure that we use something that's a multiple
+ * of a huge page when we can.
+ */
+ if (size >= HPAGE_SIZE)
+ size = HPAGE_SIZE;
+ prot = pkey_last_malloc_record->prot;
+
+ /* Use pkey 0 */
+ mprotect_pkey(ptr, size, prot, 0);
+
+ /* Make sure that we can set it back to the original pkey. */
+ mprotect_pkey(ptr, size, prot, pkey);
+}
+
+void test_ptrace_of_child(int *ptr, u16 pkey)
+{
+ __attribute__((__unused__)) int peek_result;
+ pid_t child_pid;
+ void *ignored = 0;
+ long ret;
+ int status;
+ /*
+ * This is the "control" for our little expermient. Make sure
+ * we can always access it when ptracing.
+ */
+ int *plain_ptr_unaligned = malloc(HPAGE_SIZE);
+ int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE);
+
+ /*
+ * Fork a child which is an exact copy of this process, of course.
+ * That means we can do all of our tests via ptrace() and then plain
+ * memory access and ensure they work differently.
+ */
+ child_pid = fork_lazy_child();
+ dprintf1("[%d] child pid: %d\n", getpid(), child_pid);
+
+ ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored);
+ if (ret)
+ perror("attach");
+ dprintf1("[%d] attach ret: %ld %d\n", getpid(), ret, __LINE__);
+ pkey_assert(ret != -1);
+ ret = waitpid(child_pid, &status, WUNTRACED);
+ if ((ret != child_pid) || !(WIFSTOPPED(status))) {
+ fprintf(stderr, "weird waitpid result %ld stat %x\n",
+ ret, status);
+ pkey_assert(0);
+ }
+ dprintf2("waitpid ret: %ld\n", ret);
+ dprintf2("waitpid status: %d\n", status);
+
+ pkey_access_deny(pkey);
+ pkey_write_deny(pkey);
+
+ /* Write access, untested for now:
+ ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data);
+ pkey_assert(ret != -1);
+ dprintf1("poke at %p: %ld\n", peek_at, ret);
+ */
+
+ /*
+ * Try to access the pkey-protected "ptr" via ptrace:
+ */
+ ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored);
+ /* expect it to work, without an error: */
+ pkey_assert(ret != -1);
+ /* Now access from the current task, and expect an exception: */
+ peek_result = read_ptr(ptr);
+ expected_pk_fault(pkey);
+
+ /*
+ * Try to access the NON-pkey-protected "plain_ptr" via ptrace:
+ */
+ ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored);
+ /* expect it to work, without an error: */
+ pkey_assert(ret != -1);
+ /* Now access from the current task, and expect NO exception: */
+ peek_result = read_ptr(plain_ptr);
+ do_not_expect_pk_fault("read plain pointer after ptrace");
+
+ ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0);
+ pkey_assert(ret != -1);
+
+ ret = kill(child_pid, SIGKILL);
+ pkey_assert(ret != -1);
+
+ wait(&status);
+
+ free(plain_ptr_unaligned);
+}
+
+void *get_pointer_to_instructions(void)
+{
+ void *p1;
+
+ p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE);
+ dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write);
+ /* lots_o_noops_around_write should be page-aligned already */
+ assert(p1 == &lots_o_noops_around_write);
+
+ /* Point 'p1' at the *second* page of the function: */
+ p1 += PAGE_SIZE;
+
+ /*
+ * Try to ensure we fault this in on next touch to ensure
+ * we get an instruction fault as opposed to a data one
+ */
+ madvise(p1, PAGE_SIZE, MADV_DONTNEED);
+
+ return p1;
+}
+
+void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
+{
+ void *p1;
+ int scratch;
+ int ptr_contents;
+ int ret;
+
+ p1 = get_pointer_to_instructions();
+ lots_o_noops_around_write(&scratch);
+ ptr_contents = read_ptr(p1);
+ dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+
+ ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey);
+ pkey_assert(!ret);
+ pkey_access_deny(pkey);
+
+ dprintf2("pkru: %x\n", rdpkru());
+
+ /*
+ * Make sure this is an *instruction* fault
+ */
+ madvise(p1, PAGE_SIZE, MADV_DONTNEED);
+ lots_o_noops_around_write(&scratch);
+ do_not_expect_pk_fault("executing on PROT_EXEC memory");
+ ptr_contents = read_ptr(p1);
+ dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+ expected_pk_fault(pkey);
+}
+
+void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
+{
+ void *p1;
+ int scratch;
+ int ptr_contents;
+ int ret;
+
+ dprintf1("%s() start\n", __func__);
+
+ p1 = get_pointer_to_instructions();
+ lots_o_noops_around_write(&scratch);
+ ptr_contents = read_ptr(p1);
+ dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+
+ /* Use a *normal* mprotect(), not mprotect_pkey(): */
+ ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
+ pkey_assert(!ret);
+
+ dprintf2("pkru: %x\n", rdpkru());
+
+ /* Make sure this is an *instruction* fault */
+ madvise(p1, PAGE_SIZE, MADV_DONTNEED);
+ lots_o_noops_around_write(&scratch);
+ do_not_expect_pk_fault("executing on PROT_EXEC memory");
+ ptr_contents = read_ptr(p1);
+ dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+ expected_pk_fault(UNKNOWN_PKEY);
+
+ /*
+ * Put the memory back to non-PROT_EXEC. Should clear the
+ * exec-only pkey off the VMA and allow it to be readable
+ * again. Go to PROT_NONE first to check for a kernel bug
+ * that did not clear the pkey when doing PROT_NONE.
+ */
+ ret = mprotect(p1, PAGE_SIZE, PROT_NONE);
+ pkey_assert(!ret);
+
+ ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC);
+ pkey_assert(!ret);
+ ptr_contents = read_ptr(p1);
+ do_not_expect_pk_fault("plain read on recently PROT_EXEC area");
+}
+
+void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
+{
+ int size = PAGE_SIZE;
+ int sret;
+
+ if (cpu_has_pku()) {
+ dprintf1("SKIP: %s: no CPU support\n", __func__);
+ return;
+ }
+
+ sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey);
+ pkey_assert(sret < 0);
+}
+
+void (*pkey_tests[])(int *ptr, u16 pkey) = {
+ test_read_of_write_disabled_region,
+ test_read_of_access_disabled_region,
+ test_write_of_write_disabled_region,
+ test_write_of_access_disabled_region,
+ test_kernel_write_of_access_disabled_region,
+ test_kernel_write_of_write_disabled_region,
+ test_kernel_gup_of_access_disabled_region,
+ test_kernel_gup_write_to_write_disabled_region,
+ test_executing_on_unreadable_memory,
+ test_implicit_mprotect_exec_only_memory,
+ test_mprotect_with_pkey_0,
+ test_ptrace_of_child,
+ test_pkey_syscalls_on_non_allocated_pkey,
+ test_pkey_syscalls_bad_args,
+ test_pkey_alloc_exhaust,
+};
+
+void run_tests_once(void)
+{
+ int *ptr;
+ int prot = PROT_READ|PROT_WRITE;
+
+ for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) {
+ int pkey;
+ int orig_pkru_faults = pkru_faults;
+
+ dprintf1("======================\n");
+ dprintf1("test %d preparing...\n", test_nr);
+
+ tracing_on();
+ pkey = alloc_random_pkey();
+ dprintf1("test %d starting with pkey: %d\n", test_nr, pkey);
+ ptr = malloc_pkey(PAGE_SIZE, prot, pkey);
+ dprintf1("test %d starting...\n", test_nr);
+ pkey_tests[test_nr](ptr, pkey);
+ dprintf1("freeing test memory: %p\n", ptr);
+ free_pkey_malloc(ptr);
+ sys_pkey_free(pkey);
+
+ dprintf1("pkru_faults: %d\n", pkru_faults);
+ dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults);
+
+ tracing_off();
+ close_test_fds();
+
+ printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr);
+ dprintf1("======================\n\n");
+ }
+ iteration_nr++;
+}
+
+void pkey_setup_shadow(void)
+{
+ shadow_pkru = __rdpkru();
+}
+
+int main(void)
+{
+ int nr_iterations = 22;
+
+ setup_handlers();
+
+ printf("has pku: %d\n", cpu_has_pku());
+
+ if (!cpu_has_pku()) {
+ int size = PAGE_SIZE;
+ int *ptr;
+
+ printf("running PKEY tests for unsupported CPU/OS\n");
+
+ ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ assert(ptr != (void *)-1);
+ test_mprotect_pkey_on_unsupported_cpu(ptr, 1);
+ exit(0);
+ }
+
+ pkey_setup_shadow();
+ printf("startup pkru: %x\n", rdpkru());
+ setup_hugetlbfs();
+
+ while (nr_iterations-- > 0)
+ run_tests_once();
+
+ printf("done (all tests OK)\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/.gitignore b/tools/testing/selftests/x86/.gitignore
index 7757f73..eb30ffd 100644
--- a/tools/testing/selftests/x86/.gitignore
+++ b/tools/testing/selftests/x86/.gitignore
@@ -11,5 +11,4 @@ ldt_gdt
iopl
mpx-mini-test
ioperm
-protection_keys
test_vdso
diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h
deleted file mode 100644
index 254e543..0000000
--- a/tools/testing/selftests/x86/pkey-helpers.h
+++ /dev/null
@@ -1,219 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _PKEYS_HELPER_H
-#define _PKEYS_HELPER_H
-#define _GNU_SOURCE
-#include <string.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <signal.h>
-#include <assert.h>
-#include <stdlib.h>
-#include <ucontext.h>
-#include <sys/mman.h>
-
-#define NR_PKEYS 16
-#define PKRU_BITS_PER_PKEY 2
-
-#ifndef DEBUG_LEVEL
-#define DEBUG_LEVEL 0
-#endif
-#define DPRINT_IN_SIGNAL_BUF_SIZE 4096
-extern int dprint_in_signal;
-extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
-static inline void sigsafe_printf(const char *format, ...)
-{
- va_list ap;
-
- if (!dprint_in_signal) {
- va_start(ap, format);
- vprintf(format, ap);
- va_end(ap);
- } else {
- int ret;
- /*
- * No printf() functions are signal-safe.
- * They deadlock easily. Write the format
- * string to get some output, even if
- * incomplete.
- */
- ret = write(1, format, strlen(format));
- if (ret < 0)
- exit(1);
- }
-}
-#define dprintf_level(level, args...) do { \
- if (level <= DEBUG_LEVEL) \
- sigsafe_printf(args); \
-} while (0)
-#define dprintf0(args...) dprintf_level(0, args)
-#define dprintf1(args...) dprintf_level(1, args)
-#define dprintf2(args...) dprintf_level(2, args)
-#define dprintf3(args...) dprintf_level(3, args)
-#define dprintf4(args...) dprintf_level(4, args)
-
-extern unsigned int shadow_pkru;
-static inline unsigned int __rdpkru(void)
-{
- unsigned int eax, edx;
- unsigned int ecx = 0;
- unsigned int pkru;
-
- asm volatile(".byte 0x0f,0x01,0xee\n\t"
- : "=a" (eax), "=d" (edx)
- : "c" (ecx));
- pkru = eax;
- return pkru;
-}
-
-static inline unsigned int _rdpkru(int line)
-{
- unsigned int pkru = __rdpkru();
-
- dprintf4("rdpkru(line=%d) pkru: %x shadow: %x\n",
- line, pkru, shadow_pkru);
- assert(pkru == shadow_pkru);
-
- return pkru;
-}
-
-#define rdpkru() _rdpkru(__LINE__)
-
-static inline void __wrpkru(unsigned int pkru)
-{
- unsigned int eax = pkru;
- unsigned int ecx = 0;
- unsigned int edx = 0;
-
- dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
- asm volatile(".byte 0x0f,0x01,0xef\n\t"
- : : "a" (eax), "c" (ecx), "d" (edx));
- assert(pkru == __rdpkru());
-}
-
-static inline void wrpkru(unsigned int pkru)
-{
- dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
- /* will do the shadow check for us: */
- rdpkru();
- __wrpkru(pkru);
- shadow_pkru = pkru;
- dprintf4("%s(%08x) pkru: %08x\n", __func__, pkru, __rdpkru());
-}
-
-/*
- * These are technically racy. since something could
- * change PKRU between the read and the write.
- */
-static inline void __pkey_access_allow(int pkey, int do_allow)
-{
- unsigned int pkru = rdpkru();
- int bit = pkey * 2;
-
- if (do_allow)
- pkru &= (1<<bit);
- else
- pkru |= (1<<bit);
-
- dprintf4("pkru now: %08x\n", rdpkru());
- wrpkru(pkru);
-}
-
-static inline void __pkey_write_allow(int pkey, int do_allow_write)
-{
- long pkru = rdpkru();
- int bit = pkey * 2 + 1;
-
- if (do_allow_write)
- pkru &= (1<<bit);
- else
- pkru |= (1<<bit);
-
- wrpkru(pkru);
- dprintf4("pkru now: %08x\n", rdpkru());
-}
-
-#define PROT_PKEY0 0x10 /* protection key value (bit 0) */
-#define PROT_PKEY1 0x20 /* protection key value (bit 1) */
-#define PROT_PKEY2 0x40 /* protection key value (bit 2) */
-#define PROT_PKEY3 0x80 /* protection key value (bit 3) */
-
-#define PAGE_SIZE 4096
-#define MB (1<<20)
-
-static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- /* ecx is often an input as well as an output. */
- asm volatile(
- "cpuid;"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx));
-}
-
-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */
-#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */
-#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */
-
-static inline int cpu_has_pku(void)
-{
- unsigned int eax;
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
-
- eax = 0x7;
- ecx = 0x0;
- __cpuid(&eax, &ebx, &ecx, &edx);
-
- if (!(ecx & X86_FEATURE_PKU)) {
- dprintf2("cpu does not have PKU\n");
- return 0;
- }
- if (!(ecx & X86_FEATURE_OSPKE)) {
- dprintf2("cpu does not have OSPKE\n");
- return 0;
- }
- return 1;
-}
-
-#define XSTATE_PKRU_BIT (9)
-#define XSTATE_PKRU 0x200
-
-int pkru_xstate_offset(void)
-{
- unsigned int eax;
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
- int xstate_offset;
- int xstate_size;
- unsigned long XSTATE_CPUID = 0xd;
- int leaf;
-
- /* assume that XSTATE_PKRU is set in XCR0 */
- leaf = XSTATE_PKRU_BIT;
- {
- eax = XSTATE_CPUID;
- ecx = leaf;
- __cpuid(&eax, &ebx, &ecx, &edx);
-
- if (leaf == XSTATE_PKRU_BIT) {
- xstate_offset = ebx;
- xstate_size = eax;
- }
- }
-
- if (xstate_size == 0) {
- printf("could not find size/offset of PKRU in xsave state\n");
- return 0;
- }
-
- return xstate_offset;
-}
-
-#endif /* _PKEYS_HELPER_H */
diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
deleted file mode 100644
index 460b4bd..0000000
--- a/tools/testing/selftests/x86/protection_keys.c
+++ /dev/null
@@ -1,1485 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Tests x86 Memory Protection Keys (see Documentation/x86/protection-keys.txt)
- *
- * There are examples in here of:
- * * how to set protection keys on memory
- * * how to set/clear bits in PKRU (the rights register)
- * * how to handle SEGV_PKRU signals and extract pkey-relevant
- * information from the siginfo
- *
- * Things to add:
- * make sure KSM and KSM COW breaking works
- * prefault pages in at malloc, or not
- * protect MPX bounds tables with protection keys?
- * make sure VMA splitting/merging is working correctly
- * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys
- * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel
- * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks
- *
- * Compile like this:
- * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
- * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
- */
-#define _GNU_SOURCE
-#include <errno.h>
-#include <linux/futex.h>
-#include <sys/time.h>
-#include <sys/syscall.h>
-#include <string.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <signal.h>
-#include <assert.h>
-#include <stdlib.h>
-#include <ucontext.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/ptrace.h>
-#include <setjmp.h>
-
-#include "pkey-helpers.h"
-
-int iteration_nr = 1;
-int test_nr;
-
-unsigned int shadow_pkru;
-
-#define HPAGE_SIZE (1UL<<21)
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
-#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
-#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
-#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
-#define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to))
-#define __stringify_1(x...) #x
-#define __stringify(x...) __stringify_1(x)
-
-#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
-
-int dprint_in_signal;
-char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
-
-extern void abort_hooks(void);
-#define pkey_assert(condition) do { \
- if (!(condition)) { \
- dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \
- __FILE__, __LINE__, \
- test_nr, iteration_nr); \
- dprintf0("errno at assert: %d", errno); \
- abort_hooks(); \
- exit(__LINE__); \
- } \
-} while (0)
-
-void cat_into_file(char *str, char *file)
-{
- int fd = open(file, O_RDWR);
- int ret;
-
- dprintf2("%s(): writing '%s' to '%s'\n", __func__, str, file);
- /*
- * these need to be raw because they are called under
- * pkey_assert()
- */
- if (fd < 0) {
- fprintf(stderr, "error opening '%s'\n", str);
- perror("error: ");
- exit(__LINE__);
- }
-
- ret = write(fd, str, strlen(str));
- if (ret != strlen(str)) {
- perror("write to file failed");
- fprintf(stderr, "filename: '%s' str: '%s'\n", file, str);
- exit(__LINE__);
- }
- close(fd);
-}
-
-#if CONTROL_TRACING > 0
-static int warned_tracing;
-int tracing_root_ok(void)
-{
- if (geteuid() != 0) {
- if (!warned_tracing)
- fprintf(stderr, "WARNING: not run as root, "
- "can not do tracing control\n");
- warned_tracing = 1;
- return 0;
- }
- return 1;
-}
-#endif
-
-void tracing_on(void)
-{
-#if CONTROL_TRACING > 0
-#define TRACEDIR "/sys/kernel/debug/tracing"
- char pidstr[32];
-
- if (!tracing_root_ok())
- return;
-
- sprintf(pidstr, "%d", getpid());
- cat_into_file("0", TRACEDIR "/tracing_on");
- cat_into_file("\n", TRACEDIR "/trace");
- if (1) {
- cat_into_file("function_graph", TRACEDIR "/current_tracer");
- cat_into_file("1", TRACEDIR "/options/funcgraph-proc");
- } else {
- cat_into_file("nop", TRACEDIR "/current_tracer");
- }
- cat_into_file(pidstr, TRACEDIR "/set_ftrace_pid");
- cat_into_file("1", TRACEDIR "/tracing_on");
- dprintf1("enabled tracing\n");
-#endif
-}
-
-void tracing_off(void)
-{
-#if CONTROL_TRACING > 0
- if (!tracing_root_ok())
- return;
- cat_into_file("0", "/sys/kernel/debug/tracing/tracing_on");
-#endif
-}
-
-void abort_hooks(void)
-{
- fprintf(stderr, "running %s()...\n", __func__);
- tracing_off();
-#ifdef SLEEP_ON_ABORT
- sleep(SLEEP_ON_ABORT);
-#endif
-}
-
-static inline void __page_o_noops(void)
-{
- /* 8-bytes of instruction * 512 bytes = 1 page */
- asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr");
-}
-
-/*
- * This attempts to have roughly a page of instructions followed by a few
- * instructions that do a write, and another page of instructions. That
- * way, we are pretty sure that the write is in the second page of
- * instructions and has at least a page of padding behind it.
- *
- * *That* lets us be sure to madvise() away the write instruction, which
- * will then fault, which makes sure that the fault code handles
- * execute-only memory properly.
- */
-__attribute__((__aligned__(PAGE_SIZE)))
-void lots_o_noops_around_write(int *write_to_me)
-{
- dprintf3("running %s()\n", __func__);
- __page_o_noops();
- /* Assume this happens in the second page of instructions: */
- *write_to_me = __LINE__;
- /* pad out by another page: */
- __page_o_noops();
- dprintf3("%s() done\n", __func__);
-}
-
-/* Define some kernel-like types */
-#define u8 uint8_t
-#define u16 uint16_t
-#define u32 uint32_t
-#define u64 uint64_t
-
-#ifdef __i386__
-
-#ifndef SYS_mprotect_key
-# define SYS_mprotect_key 380
-#endif
-
-#ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 381
-# define SYS_pkey_free 382
-#endif
-
-#define REG_IP_IDX REG_EIP
-#define si_pkey_offset 0x14
-
-#else
-
-#ifndef SYS_mprotect_key
-# define SYS_mprotect_key 329
-#endif
-
-#ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 330
-# define SYS_pkey_free 331
-#endif
-
-#define REG_IP_IDX REG_RIP
-#define si_pkey_offset 0x20
-
-#endif
-
-void dump_mem(void *dumpme, int len_bytes)
-{
- char *c = (void *)dumpme;
- int i;
-
- for (i = 0; i < len_bytes; i += sizeof(u64)) {
- u64 *ptr = (u64 *)(c + i);
- dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr);
- }
-}
-
-/* Failed address bound checks: */
-#ifndef SEGV_BNDERR
-# define SEGV_BNDERR 3
-#endif
-
-#ifndef SEGV_PKUERR
-# define SEGV_PKUERR 4
-#endif
-
-static char *si_code_str(int si_code)
-{
- if (si_code == SEGV_MAPERR)
- return "SEGV_MAPERR";
- if (si_code == SEGV_ACCERR)
- return "SEGV_ACCERR";
- if (si_code == SEGV_BNDERR)
- return "SEGV_BNDERR";
- if (si_code == SEGV_PKUERR)
- return "SEGV_PKUERR";
- return "UNKNOWN";
-}
-
-int pkru_faults;
-int last_si_pkey = -1;
-void signal_handler(int signum, siginfo_t *si, void *vucontext)
-{
- ucontext_t *uctxt = vucontext;
- int trapno;
- unsigned long ip;
- char *fpregs;
- u32 *pkru_ptr;
- u64 siginfo_pkey;
- u32 *si_pkey_ptr;
- int pkru_offset;
- fpregset_t fpregset;
-
- dprint_in_signal = 1;
- dprintf1(">>>>===============SIGSEGV============================\n");
- dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__,
- __rdpkru(), shadow_pkru);
-
- trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
- ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
- fpregset = uctxt->uc_mcontext.fpregs;
- fpregs = (void *)fpregset;
-
- dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__,
- trapno, ip, si_code_str(si->si_code), si->si_code);
-#ifdef __i386__
- /*
- * 32-bit has some extra padding so that userspace can tell whether
- * the XSTATE header is present in addition to the "legacy" FPU
- * state. We just assume that it is here.
- */
- fpregs += 0x70;
-#endif
- pkru_offset = pkru_xstate_offset();
- pkru_ptr = (void *)(&fpregs[pkru_offset]);
-
- dprintf1("siginfo: %p\n", si);
- dprintf1(" fpregs: %p\n", fpregs);
- /*
- * If we got a PKRU fault, we *HAVE* to have at least one bit set in
- * here.
- */
- dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset());
- if (DEBUG_LEVEL > 4)
- dump_mem(pkru_ptr - 128, 256);
- pkey_assert(*pkru_ptr);
-
- if ((si->si_code == SEGV_MAPERR) ||
- (si->si_code == SEGV_ACCERR) ||
- (si->si_code == SEGV_BNDERR)) {
- printf("non-PK si_code, exiting...\n");
- exit(4);
- }
-
- si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
- dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
- dump_mem((u8 *)si_pkey_ptr - 8, 24);
- siginfo_pkey = *si_pkey_ptr;
- pkey_assert(siginfo_pkey < NR_PKEYS);
- last_si_pkey = siginfo_pkey;
-
- dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr);
- /* need __rdpkru() version so we do not do shadow_pkru checking */
- dprintf1("signal pkru from pkru: %08x\n", __rdpkru());
- dprintf1("pkey from siginfo: %jx\n", siginfo_pkey);
- *(u64 *)pkru_ptr = 0x00000000;
- dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
- pkru_faults++;
- dprintf1("<<<<==================================================\n");
- dprint_in_signal = 0;
-}
-
-int wait_all_children(void)
-{
- int status;
- return waitpid(-1, &status, 0);
-}
-
-void sig_chld(int x)
-{
- dprint_in_signal = 1;
- dprintf2("[%d] SIGCHLD: %d\n", getpid(), x);
- dprint_in_signal = 0;
-}
-
-void setup_sigsegv_handler(void)
-{
- int r, rs;
- struct sigaction newact;
- struct sigaction oldact;
-
- /* #PF is mapped to sigsegv */
- int signum = SIGSEGV;
-
- newact.sa_handler = 0;
- newact.sa_sigaction = signal_handler;
-
- /*sigset_t - signals to block while in the handler */
- /* get the old signal mask. */
- rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask);
- pkey_assert(rs == 0);
-
- /* call sa_sigaction, not sa_handler*/
- newact.sa_flags = SA_SIGINFO;
-
- newact.sa_restorer = 0; /* void(*)(), obsolete */
- r = sigaction(signum, &newact, &oldact);
- r = sigaction(SIGALRM, &newact, &oldact);
- pkey_assert(r == 0);
-}
-
-void setup_handlers(void)
-{
- signal(SIGCHLD, &sig_chld);
- setup_sigsegv_handler();
-}
-
-pid_t fork_lazy_child(void)
-{
- pid_t forkret;
-
- forkret = fork();
- pkey_assert(forkret >= 0);
- dprintf3("[%d] fork() ret: %d\n", getpid(), forkret);
-
- if (!forkret) {
- /* in the child */
- while (1) {
- dprintf1("child sleeping...\n");
- sleep(30);
- }
- }
- return forkret;
-}
-
-#ifndef PKEY_DISABLE_ACCESS
-# define PKEY_DISABLE_ACCESS 0x1
-#endif
-
-#ifndef PKEY_DISABLE_WRITE
-# define PKEY_DISABLE_WRITE 0x2
-#endif
-
-static u32 hw_pkey_get(int pkey, unsigned long flags)
-{
- u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
- u32 pkru = __rdpkru();
- u32 shifted_pkru;
- u32 masked_pkru;
-
- dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n",
- __func__, pkey, flags, 0, 0);
- dprintf2("%s() raw pkru: %x\n", __func__, pkru);
-
- shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY));
- dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru);
- masked_pkru = shifted_pkru & mask;
- dprintf2("%s() masked pkru: %x\n", __func__, masked_pkru);
- /*
- * shift down the relevant bits to the lowest two, then
- * mask off all the other high bits.
- */
- return masked_pkru;
-}
-
-static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
-{
- u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
- u32 old_pkru = __rdpkru();
- u32 new_pkru;
-
- /* make sure that 'rights' only contains the bits we expect: */
- assert(!(rights & ~mask));
-
- /* copy old pkru */
- new_pkru = old_pkru;
- /* mask out bits from pkey in old value: */
- new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY));
- /* OR in new bits for pkey: */
- new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY));
-
- __wrpkru(new_pkru);
-
- dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n",
- __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru);
- return 0;
-}
-
-void pkey_disable_set(int pkey, int flags)
-{
- unsigned long syscall_flags = 0;
- int ret;
- int pkey_rights;
- u32 orig_pkru = rdpkru();
-
- dprintf1("START->%s(%d, 0x%x)\n", __func__,
- pkey, flags);
- pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
-
- pkey_rights = hw_pkey_get(pkey, syscall_flags);
-
- dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
- pkey, pkey, pkey_rights);
- pkey_assert(pkey_rights >= 0);
-
- pkey_rights |= flags;
-
- ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
- assert(!ret);
- /*pkru and flags have the same format */
- shadow_pkru |= flags << (pkey * 2);
- dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru);
-
- pkey_assert(ret >= 0);
-
- pkey_rights = hw_pkey_get(pkey, syscall_flags);
- dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
- pkey, pkey, pkey_rights);
-
- dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
- if (flags)
- pkey_assert(rdpkru() > orig_pkru);
- dprintf1("END<---%s(%d, 0x%x)\n", __func__,
- pkey, flags);
-}
-
-void pkey_disable_clear(int pkey, int flags)
-{
- unsigned long syscall_flags = 0;
- int ret;
- int pkey_rights = hw_pkey_get(pkey, syscall_flags);
- u32 orig_pkru = rdpkru();
-
- pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
-
- dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
- pkey, pkey, pkey_rights);
- pkey_assert(pkey_rights >= 0);
-
- pkey_rights |= flags;
-
- ret = hw_pkey_set(pkey, pkey_rights, 0);
- /* pkru and flags have the same format */
- shadow_pkru &= ~(flags << (pkey * 2));
- pkey_assert(ret >= 0);
-
- pkey_rights = hw_pkey_get(pkey, syscall_flags);
- dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
- pkey, pkey, pkey_rights);
-
- dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
- if (flags)
- assert(rdpkru() > orig_pkru);
-}
-
-void pkey_write_allow(int pkey)
-{
- pkey_disable_clear(pkey, PKEY_DISABLE_WRITE);
-}
-void pkey_write_deny(int pkey)
-{
- pkey_disable_set(pkey, PKEY_DISABLE_WRITE);
-}
-void pkey_access_allow(int pkey)
-{
- pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS);
-}
-void pkey_access_deny(int pkey)
-{
- pkey_disable_set(pkey, PKEY_DISABLE_ACCESS);
-}
-
-int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
- unsigned long pkey)
-{
- int sret;
-
- dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__,
- ptr, size, orig_prot, pkey);
-
- errno = 0;
- sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey);
- if (errno) {
- dprintf2("SYS_mprotect_key sret: %d\n", sret);
- dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot);
- dprintf2("SYS_mprotect_key failed, errno: %d\n", errno);
- if (DEBUG_LEVEL >= 2)
- perror("SYS_mprotect_pkey");
- }
- return sret;
-}
-
-int sys_pkey_alloc(unsigned long flags, unsigned long init_val)
-{
- int ret = syscall(SYS_pkey_alloc, flags, init_val);
- dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n",
- __func__, flags, init_val, ret, errno);
- return ret;
-}
-
-int alloc_pkey(void)
-{
- int ret;
- unsigned long init_val = 0x0;
-
- dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n",
- __LINE__, __rdpkru(), shadow_pkru);
- ret = sys_pkey_alloc(0, init_val);
- /*
- * pkey_alloc() sets PKRU, so we need to reflect it in
- * shadow_pkru:
- */
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
- if (ret) {
- /* clear both the bits: */
- shadow_pkru &= ~(0x3 << (ret * 2));
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
- /*
- * move the new state in from init_val
- * (remember, we cheated and init_val == pkru format)
- */
- shadow_pkru |= (init_val << (ret * 2));
- }
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
- dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno);
- /* for shadow checking: */
- rdpkru();
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
- return ret;
-}
-
-int sys_pkey_free(unsigned long pkey)
-{
- int ret = syscall(SYS_pkey_free, pkey);
- dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret);
- return ret;
-}
-
-/*
- * I had a bug where pkey bits could be set by mprotect() but
- * not cleared. This ensures we get lots of random bit sets
- * and clears on the vma and pte pkey bits.
- */
-int alloc_random_pkey(void)
-{
- int max_nr_pkey_allocs;
- int ret;
- int i;
- int alloced_pkeys[NR_PKEYS];
- int nr_alloced = 0;
- int random_index;
- memset(alloced_pkeys, 0, sizeof(alloced_pkeys));
-
- /* allocate every possible key and make a note of which ones we got */
- max_nr_pkey_allocs = NR_PKEYS;
- max_nr_pkey_allocs = 1;
- for (i = 0; i < max_nr_pkey_allocs; i++) {
- int new_pkey = alloc_pkey();
- if (new_pkey < 0)
- break;
- alloced_pkeys[nr_alloced++] = new_pkey;
- }
-
- pkey_assert(nr_alloced > 0);
- /* select a random one out of the allocated ones */
- random_index = rand() % nr_alloced;
- ret = alloced_pkeys[random_index];
- /* now zero it out so we don't free it next */
- alloced_pkeys[random_index] = 0;
-
- /* go through the allocated ones that we did not want and free them */
- for (i = 0; i < nr_alloced; i++) {
- int free_ret;
- if (!alloced_pkeys[i])
- continue;
- free_ret = sys_pkey_free(alloced_pkeys[i]);
- pkey_assert(!free_ret);
- }
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
- return ret;
-}
-
-int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
- unsigned long pkey)
-{
- int nr_iterations = random() % 100;
- int ret;
-
- while (0) {
- int rpkey = alloc_random_pkey();
- ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
- dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
- ptr, size, orig_prot, pkey, ret);
- if (nr_iterations-- < 0)
- break;
-
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
- sys_pkey_free(rpkey);
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
- }
- pkey_assert(pkey < NR_PKEYS);
-
- ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
- dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
- ptr, size, orig_prot, pkey, ret);
- pkey_assert(!ret);
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
- return ret;
-}
-
-struct pkey_malloc_record {
- void *ptr;
- long size;
- int prot;
-};
-struct pkey_malloc_record *pkey_malloc_records;
-struct pkey_malloc_record *pkey_last_malloc_record;
-long nr_pkey_malloc_records;
-void record_pkey_malloc(void *ptr, long size, int prot)
-{
- long i;
- struct pkey_malloc_record *rec = NULL;
-
- for (i = 0; i < nr_pkey_malloc_records; i++) {
- rec = &pkey_malloc_records[i];
- /* find a free record */
- if (rec)
- break;
- }
- if (!rec) {
- /* every record is full */
- size_t old_nr_records = nr_pkey_malloc_records;
- size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1);
- size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record);
- dprintf2("new_nr_records: %zd\n", new_nr_records);
- dprintf2("new_size: %zd\n", new_size);
- pkey_malloc_records = realloc(pkey_malloc_records, new_size);
- pkey_assert(pkey_malloc_records != NULL);
- rec = &pkey_malloc_records[nr_pkey_malloc_records];
- /*
- * realloc() does not initialize memory, so zero it from
- * the first new record all the way to the end.
- */
- for (i = 0; i < new_nr_records - old_nr_records; i++)
- memset(rec + i, 0, sizeof(*rec));
- }
- dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n",
- (int)(rec - pkey_malloc_records), rec, ptr, size);
- rec->ptr = ptr;
- rec->size = size;
- rec->prot = prot;
- pkey_last_malloc_record = rec;
- nr_pkey_malloc_records++;
-}
-
-void free_pkey_malloc(void *ptr)
-{
- long i;
- int ret;
- dprintf3("%s(%p)\n", __func__, ptr);
- for (i = 0; i < nr_pkey_malloc_records; i++) {
- struct pkey_malloc_record *rec = &pkey_malloc_records[i];
- dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n",
- ptr, i, rec, rec->ptr, rec->size);
- if ((ptr < rec->ptr) ||
- (ptr >= rec->ptr + rec->size))
- continue;
-
- dprintf3("found ptr %p at record[%ld/%p]: {%p, %ld}\n",
- ptr, i, rec, rec->ptr, rec->size);
- nr_pkey_malloc_records--;
- ret = munmap(rec->ptr, rec->size);
- dprintf3("munmap ret: %d\n", ret);
- pkey_assert(!ret);
- dprintf3("clearing rec->ptr, rec: %p\n", rec);
- rec->ptr = NULL;
- dprintf3("done clearing rec->ptr, rec: %p\n", rec);
- return;
- }
- pkey_assert(false);
-}
-
-
-void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
-{
- void *ptr;
- int ret;
-
- rdpkru();
- dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
- size, prot, pkey);
- pkey_assert(pkey < NR_PKEYS);
- ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
- pkey_assert(ptr != (void *)-1);
- ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey);
- pkey_assert(!ret);
- record_pkey_malloc(ptr, size, prot);
- rdpkru();
-
- dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr);
- return ptr;
-}
-
-void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
-{
- int ret;
- void *ptr;
-
- dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
- size, prot, pkey);
- /*
- * Guarantee we can fit at least one huge page in the resulting
- * allocation by allocating space for 2:
- */
- size = ALIGN_UP(size, HPAGE_SIZE * 2);
- ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
- pkey_assert(ptr != (void *)-1);
- record_pkey_malloc(ptr, size, prot);
- mprotect_pkey(ptr, size, prot, pkey);
-
- dprintf1("unaligned ptr: %p\n", ptr);
- ptr = ALIGN_PTR_UP(ptr, HPAGE_SIZE);
- dprintf1(" aligned ptr: %p\n", ptr);
- ret = madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE);
- dprintf1("MADV_HUGEPAGE ret: %d\n", ret);
- ret = madvise(ptr, HPAGE_SIZE, MADV_WILLNEED);
- dprintf1("MADV_WILLNEED ret: %d\n", ret);
- memset(ptr, 0, HPAGE_SIZE);
-
- dprintf1("mmap()'d thp for pkey %d @ %p\n", pkey, ptr);
- return ptr;
-}
-
-int hugetlb_setup_ok;
-#define GET_NR_HUGE_PAGES 10
-void setup_hugetlbfs(void)
-{
- int err;
- int fd;
- char buf[] = "123";
-
- if (geteuid() != 0) {
- fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n");
- return;
- }
-
- cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages");
-
- /*
- * Now go make sure that we got the pages and that they
- * are 2M pages. Someone might have made 1G the default.
- */
- fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY);
- if (fd < 0) {
- perror("opening sysfs 2M hugetlb config");
- return;
- }
-
- /* -1 to guarantee leaving the trailing \0 */
- err = read(fd, buf, sizeof(buf)-1);
- close(fd);
- if (err <= 0) {
- perror("reading sysfs 2M hugetlb config");
- return;
- }
-
- if (atoi(buf) != GET_NR_HUGE_PAGES) {
- fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n",
- buf, GET_NR_HUGE_PAGES);
- return;
- }
-
- hugetlb_setup_ok = 1;
-}
-
-void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
-{
- void *ptr;
- int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB;
-
- if (!hugetlb_setup_ok)
- return PTR_ERR_ENOTSUP;
-
- dprintf1("doing %s(%ld, %x, %x)\n", __func__, size, prot, pkey);
- size = ALIGN_UP(size, HPAGE_SIZE * 2);
- pkey_assert(pkey < NR_PKEYS);
- ptr = mmap(NULL, size, PROT_NONE, flags, -1, 0);
- pkey_assert(ptr != (void *)-1);
- mprotect_pkey(ptr, size, prot, pkey);
-
- record_pkey_malloc(ptr, size, prot);
-
- dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr);
- return ptr;
-}
-
-void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey)
-{
- void *ptr;
- int fd;
-
- dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
- size, prot, pkey);
- pkey_assert(pkey < NR_PKEYS);
- fd = open("/dax/foo", O_RDWR);
- pkey_assert(fd >= 0);
-
- ptr = mmap(0, size, prot, MAP_SHARED, fd, 0);
- pkey_assert(ptr != (void *)-1);
-
- mprotect_pkey(ptr, size, prot, pkey);
-
- record_pkey_malloc(ptr, size, prot);
-
- dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr);
- close(fd);
- return ptr;
-}
-
-void *(*pkey_malloc[])(long size, int prot, u16 pkey) = {
-
- malloc_pkey_with_mprotect,
- malloc_pkey_anon_huge,
- malloc_pkey_hugetlb
-/* can not do direct with the pkey_mprotect() API:
- malloc_pkey_mmap_direct,
- malloc_pkey_mmap_dax,
-*/
-};
-
-void *malloc_pkey(long size, int prot, u16 pkey)
-{
- void *ret;
- static int malloc_type;
- int nr_malloc_types = ARRAY_SIZE(pkey_malloc);
-
- pkey_assert(pkey < NR_PKEYS);
-
- while (1) {
- pkey_assert(malloc_type < nr_malloc_types);
-
- ret = pkey_malloc[malloc_type](size, prot, pkey);
- pkey_assert(ret != (void *)-1);
-
- malloc_type++;
- if (malloc_type >= nr_malloc_types)
- malloc_type = (random()%nr_malloc_types);
-
- /* try again if the malloc_type we tried is unsupported */
- if (ret == PTR_ERR_ENOTSUP)
- continue;
-
- break;
- }
-
- dprintf3("%s(%ld, prot=%x, pkey=%x) returning: %p\n", __func__,
- size, prot, pkey, ret);
- return ret;
-}
-
-int last_pkru_faults;
-#define UNKNOWN_PKEY -2
-void expected_pk_fault(int pkey)
-{
- dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n",
- __func__, last_pkru_faults, pkru_faults);
- dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey);
- pkey_assert(last_pkru_faults + 1 == pkru_faults);
-
- /*
- * For exec-only memory, we do not know the pkey in
- * advance, so skip this check.
- */
- if (pkey != UNKNOWN_PKEY)
- pkey_assert(last_si_pkey == pkey);
-
- /*
- * The signal handler shold have cleared out PKRU to let the
- * test program continue. We now have to restore it.
- */
- if (__rdpkru() != 0)
- pkey_assert(0);
-
- __wrpkru(shadow_pkru);
- dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n",
- __func__, shadow_pkru);
- last_pkru_faults = pkru_faults;
- last_si_pkey = -1;
-}
-
-#define do_not_expect_pk_fault(msg) do { \
- if (last_pkru_faults != pkru_faults) \
- dprintf0("unexpected PK fault: %s\n", msg); \
- pkey_assert(last_pkru_faults == pkru_faults); \
-} while (0)
-
-int test_fds[10] = { -1 };
-int nr_test_fds;
-void __save_test_fd(int fd)
-{
- pkey_assert(fd >= 0);
- pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds));
- test_fds[nr_test_fds] = fd;
- nr_test_fds++;
-}
-
-int get_test_read_fd(void)
-{
- int test_fd = open("/etc/passwd", O_RDONLY);
- __save_test_fd(test_fd);
- return test_fd;
-}
-
-void close_test_fds(void)
-{
- int i;
-
- for (i = 0; i < nr_test_fds; i++) {
- if (test_fds[i] < 0)
- continue;
- close(test_fds[i]);
- test_fds[i] = -1;
- }
- nr_test_fds = 0;
-}
-
-#define barrier() __asm__ __volatile__("": : :"memory")
-__attribute__((noinline)) int read_ptr(int *ptr)
-{
- /*
- * Keep GCC from optimizing this away somehow
- */
- barrier();
- return *ptr;
-}
-
-void test_read_of_write_disabled_region(int *ptr, u16 pkey)
-{
- int ptr_contents;
-
- dprintf1("disabling write access to PKEY[1], doing read\n");
- pkey_write_deny(pkey);
- ptr_contents = read_ptr(ptr);
- dprintf1("*ptr: %d\n", ptr_contents);
- dprintf1("\n");
-}
-void test_read_of_access_disabled_region(int *ptr, u16 pkey)
-{
- int ptr_contents;
-
- dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr);
- rdpkru();
- pkey_access_deny(pkey);
- ptr_contents = read_ptr(ptr);
- dprintf1("*ptr: %d\n", ptr_contents);
- expected_pk_fault(pkey);
-}
-void test_write_of_write_disabled_region(int *ptr, u16 pkey)
-{
- dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey);
- pkey_write_deny(pkey);
- *ptr = __LINE__;
- expected_pk_fault(pkey);
-}
-void test_write_of_access_disabled_region(int *ptr, u16 pkey)
-{
- dprintf1("disabling access to PKEY[%02d], doing write\n", pkey);
- pkey_access_deny(pkey);
- *ptr = __LINE__;
- expected_pk_fault(pkey);
-}
-void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
-{
- int ret;
- int test_fd = get_test_read_fd();
-
- dprintf1("disabling access to PKEY[%02d], "
- "having kernel read() to buffer\n", pkey);
- pkey_access_deny(pkey);
- ret = read(test_fd, ptr, 1);
- dprintf1("read ret: %d\n", ret);
- pkey_assert(ret);
-}
-void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey)
-{
- int ret;
- int test_fd = get_test_read_fd();
-
- pkey_write_deny(pkey);
- ret = read(test_fd, ptr, 100);
- dprintf1("read ret: %d\n", ret);
- if (ret < 0 && (DEBUG_LEVEL > 0))
- perror("verbose read result (OK for this to be bad)");
- pkey_assert(ret);
-}
-
-void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey)
-{
- int pipe_ret, vmsplice_ret;
- struct iovec iov;
- int pipe_fds[2];
-
- pipe_ret = pipe(pipe_fds);
-
- pkey_assert(pipe_ret == 0);
- dprintf1("disabling access to PKEY[%02d], "
- "having kernel vmsplice from buffer\n", pkey);
- pkey_access_deny(pkey);
- iov.iov_base = ptr;
- iov.iov_len = PAGE_SIZE;
- vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT);
- dprintf1("vmsplice() ret: %d\n", vmsplice_ret);
- pkey_assert(vmsplice_ret == -1);
-
- close(pipe_fds[0]);
- close(pipe_fds[1]);
-}
-
-void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey)
-{
- int ignored = 0xdada;
- int futex_ret;
- int some_int = __LINE__;
-
- dprintf1("disabling write to PKEY[%02d], "
- "doing futex gunk in buffer\n", pkey);
- *ptr = some_int;
- pkey_write_deny(pkey);
- futex_ret = syscall(SYS_futex, ptr, FUTEX_WAIT, some_int-1, NULL,
- &ignored, ignored);
- if (DEBUG_LEVEL > 0)
- perror("futex");
- dprintf1("futex() ret: %d\n", futex_ret);
-}
-
-/* Assumes that all pkeys other than 'pkey' are unallocated */
-void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
-{
- int err;
- int i;
-
- /* Note: 0 is the default pkey, so don't mess with it */
- for (i = 1; i < NR_PKEYS; i++) {
- if (pkey == i)
- continue;
-
- dprintf1("trying get/set/free to non-allocated pkey: %2d\n", i);
- err = sys_pkey_free(i);
- pkey_assert(err);
-
- err = sys_pkey_free(i);
- pkey_assert(err);
-
- err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, i);
- pkey_assert(err);
- }
-}
-
-/* Assumes that all pkeys other than 'pkey' are unallocated */
-void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
-{
- int err;
- int bad_pkey = NR_PKEYS+99;
-
- /* pass a known-invalid pkey in: */
- err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey);
- pkey_assert(err);
-}
-
-/* Assumes that all pkeys other than 'pkey' are unallocated */
-void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
-{
- int err;
- int allocated_pkeys[NR_PKEYS] = {0};
- int nr_allocated_pkeys = 0;
- int i;
-
- for (i = 0; i < NR_PKEYS*2; i++) {
- int new_pkey;
- dprintf1("%s() alloc loop: %d\n", __func__, i);
- new_pkey = alloc_pkey();
- dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, err, __rdpkru(), shadow_pkru);
- rdpkru(); /* for shadow checking */
- dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC);
- if ((new_pkey == -1) && (errno == ENOSPC)) {
- dprintf2("%s() failed to allocate pkey after %d tries\n",
- __func__, nr_allocated_pkeys);
- break;
- }
- pkey_assert(nr_allocated_pkeys < NR_PKEYS);
- allocated_pkeys[nr_allocated_pkeys++] = new_pkey;
- }
-
- dprintf3("%s()::%d\n", __func__, __LINE__);
-
- /*
- * ensure it did not reach the end of the loop without
- * failure:
- */
- pkey_assert(i < NR_PKEYS*2);
-
- /*
- * There are 16 pkeys supported in hardware. Three are
- * allocated by the time we get here:
- * 1. The default key (0)
- * 2. One possibly consumed by an execute-only mapping.
- * 3. One allocated by the test code and passed in via
- * 'pkey' to this function.
- * Ensure that we can allocate at least another 13 (16-3).
- */
- pkey_assert(i >= NR_PKEYS-3);
-
- for (i = 0; i < nr_allocated_pkeys; i++) {
- err = sys_pkey_free(allocated_pkeys[i]);
- pkey_assert(!err);
- rdpkru(); /* for shadow checking */
- }
-}
-
-/*
- * pkey 0 is special. It is allocated by default, so you do not
- * have to call pkey_alloc() to use it first. Make sure that it
- * is usable.
- */
-void test_mprotect_with_pkey_0(int *ptr, u16 pkey)
-{
- long size;
- int prot;
-
- assert(pkey_last_malloc_record);
- size = pkey_last_malloc_record->size;
- /*
- * This is a bit of a hack. But mprotect() requires
- * huge-page-aligned sizes when operating on hugetlbfs.
- * So, make sure that we use something that's a multiple
- * of a huge page when we can.
- */
- if (size >= HPAGE_SIZE)
- size = HPAGE_SIZE;
- prot = pkey_last_malloc_record->prot;
-
- /* Use pkey 0 */
- mprotect_pkey(ptr, size, prot, 0);
-
- /* Make sure that we can set it back to the original pkey. */
- mprotect_pkey(ptr, size, prot, pkey);
-}
-
-void test_ptrace_of_child(int *ptr, u16 pkey)
-{
- __attribute__((__unused__)) int peek_result;
- pid_t child_pid;
- void *ignored = 0;
- long ret;
- int status;
- /*
- * This is the "control" for our little expermient. Make sure
- * we can always access it when ptracing.
- */
- int *plain_ptr_unaligned = malloc(HPAGE_SIZE);
- int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE);
-
- /*
- * Fork a child which is an exact copy of this process, of course.
- * That means we can do all of our tests via ptrace() and then plain
- * memory access and ensure they work differently.
- */
- child_pid = fork_lazy_child();
- dprintf1("[%d] child pid: %d\n", getpid(), child_pid);
-
- ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored);
- if (ret)
- perror("attach");
- dprintf1("[%d] attach ret: %ld %d\n", getpid(), ret, __LINE__);
- pkey_assert(ret != -1);
- ret = waitpid(child_pid, &status, WUNTRACED);
- if ((ret != child_pid) || !(WIFSTOPPED(status))) {
- fprintf(stderr, "weird waitpid result %ld stat %x\n",
- ret, status);
- pkey_assert(0);
- }
- dprintf2("waitpid ret: %ld\n", ret);
- dprintf2("waitpid status: %d\n", status);
-
- pkey_access_deny(pkey);
- pkey_write_deny(pkey);
-
- /* Write access, untested for now:
- ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data);
- pkey_assert(ret != -1);
- dprintf1("poke at %p: %ld\n", peek_at, ret);
- */
-
- /*
- * Try to access the pkey-protected "ptr" via ptrace:
- */
- ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored);
- /* expect it to work, without an error: */
- pkey_assert(ret != -1);
- /* Now access from the current task, and expect an exception: */
- peek_result = read_ptr(ptr);
- expected_pk_fault(pkey);
-
- /*
- * Try to access the NON-pkey-protected "plain_ptr" via ptrace:
- */
- ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored);
- /* expect it to work, without an error: */
- pkey_assert(ret != -1);
- /* Now access from the current task, and expect NO exception: */
- peek_result = read_ptr(plain_ptr);
- do_not_expect_pk_fault("read plain pointer after ptrace");
-
- ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0);
- pkey_assert(ret != -1);
-
- ret = kill(child_pid, SIGKILL);
- pkey_assert(ret != -1);
-
- wait(&status);
-
- free(plain_ptr_unaligned);
-}
-
-void *get_pointer_to_instructions(void)
-{
- void *p1;
-
- p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE);
- dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write);
- /* lots_o_noops_around_write should be page-aligned already */
- assert(p1 == &lots_o_noops_around_write);
-
- /* Point 'p1' at the *second* page of the function: */
- p1 += PAGE_SIZE;
-
- /*
- * Try to ensure we fault this in on next touch to ensure
- * we get an instruction fault as opposed to a data one
- */
- madvise(p1, PAGE_SIZE, MADV_DONTNEED);
-
- return p1;
-}
-
-void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
-{
- void *p1;
- int scratch;
- int ptr_contents;
- int ret;
-
- p1 = get_pointer_to_instructions();
- lots_o_noops_around_write(&scratch);
- ptr_contents = read_ptr(p1);
- dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
-
- ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey);
- pkey_assert(!ret);
- pkey_access_deny(pkey);
-
- dprintf2("pkru: %x\n", rdpkru());
-
- /*
- * Make sure this is an *instruction* fault
- */
- madvise(p1, PAGE_SIZE, MADV_DONTNEED);
- lots_o_noops_around_write(&scratch);
- do_not_expect_pk_fault("executing on PROT_EXEC memory");
- ptr_contents = read_ptr(p1);
- dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
- expected_pk_fault(pkey);
-}
-
-void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
-{
- void *p1;
- int scratch;
- int ptr_contents;
- int ret;
-
- dprintf1("%s() start\n", __func__);
-
- p1 = get_pointer_to_instructions();
- lots_o_noops_around_write(&scratch);
- ptr_contents = read_ptr(p1);
- dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
-
- /* Use a *normal* mprotect(), not mprotect_pkey(): */
- ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
- pkey_assert(!ret);
-
- dprintf2("pkru: %x\n", rdpkru());
-
- /* Make sure this is an *instruction* fault */
- madvise(p1, PAGE_SIZE, MADV_DONTNEED);
- lots_o_noops_around_write(&scratch);
- do_not_expect_pk_fault("executing on PROT_EXEC memory");
- ptr_contents = read_ptr(p1);
- dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
- expected_pk_fault(UNKNOWN_PKEY);
-
- /*
- * Put the memory back to non-PROT_EXEC. Should clear the
- * exec-only pkey off the VMA and allow it to be readable
- * again. Go to PROT_NONE first to check for a kernel bug
- * that did not clear the pkey when doing PROT_NONE.
- */
- ret = mprotect(p1, PAGE_SIZE, PROT_NONE);
- pkey_assert(!ret);
-
- ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC);
- pkey_assert(!ret);
- ptr_contents = read_ptr(p1);
- do_not_expect_pk_fault("plain read on recently PROT_EXEC area");
-}
-
-void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
-{
- int size = PAGE_SIZE;
- int sret;
-
- if (cpu_has_pku()) {
- dprintf1("SKIP: %s: no CPU support\n", __func__);
- return;
- }
-
- sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey);
- pkey_assert(sret < 0);
-}
-
-void (*pkey_tests[])(int *ptr, u16 pkey) = {
- test_read_of_write_disabled_region,
- test_read_of_access_disabled_region,
- test_write_of_write_disabled_region,
- test_write_of_access_disabled_region,
- test_kernel_write_of_access_disabled_region,
- test_kernel_write_of_write_disabled_region,
- test_kernel_gup_of_access_disabled_region,
- test_kernel_gup_write_to_write_disabled_region,
- test_executing_on_unreadable_memory,
- test_implicit_mprotect_exec_only_memory,
- test_mprotect_with_pkey_0,
- test_ptrace_of_child,
- test_pkey_syscalls_on_non_allocated_pkey,
- test_pkey_syscalls_bad_args,
- test_pkey_alloc_exhaust,
-};
-
-void run_tests_once(void)
-{
- int *ptr;
- int prot = PROT_READ|PROT_WRITE;
-
- for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) {
- int pkey;
- int orig_pkru_faults = pkru_faults;
-
- dprintf1("======================\n");
- dprintf1("test %d preparing...\n", test_nr);
-
- tracing_on();
- pkey = alloc_random_pkey();
- dprintf1("test %d starting with pkey: %d\n", test_nr, pkey);
- ptr = malloc_pkey(PAGE_SIZE, prot, pkey);
- dprintf1("test %d starting...\n", test_nr);
- pkey_tests[test_nr](ptr, pkey);
- dprintf1("freeing test memory: %p\n", ptr);
- free_pkey_malloc(ptr);
- sys_pkey_free(pkey);
-
- dprintf1("pkru_faults: %d\n", pkru_faults);
- dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults);
-
- tracing_off();
- close_test_fds();
-
- printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr);
- dprintf1("======================\n\n");
- }
- iteration_nr++;
-}
-
-void pkey_setup_shadow(void)
-{
- shadow_pkru = __rdpkru();
-}
-
-int main(void)
-{
- int nr_iterations = 22;
-
- setup_handlers();
-
- printf("has pku: %d\n", cpu_has_pku());
-
- if (!cpu_has_pku()) {
- int size = PAGE_SIZE;
- int *ptr;
-
- printf("running PKEY tests for unsupported CPU/OS\n");
-
- ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
- assert(ptr != (void *)-1);
- test_mprotect_pkey_on_unsupported_cpu(ptr, 1);
- exit(0);
- }
-
- pkey_setup_shadow();
- printf("startup pkru: %x\n", rdpkru());
- setup_hugetlbfs();
-
- while (nr_iterations-- > 0)
- run_tests_once();
-
- printf("done (all tests OK)\n");
- return 0;
-}
--
1.7.1
^ permalink raw reply related
* [PATCH v14 00/22] selftests, powerpc, x86 : Memory Protection Keys
From: Ram Pai @ 2018-07-17 13:49 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
Memory protection keys enables an application to protect its address space from
inadvertent access by its own code.
This feature is now enabled on powerpc architecture and integrated in
4.16-rc1. The patches move the selftests to arch neutral directory
and enhance their test coverage.
Test
----
Verified for correctness on powerpc and on x86.
History:
-------
version 14:
(1) incorporated another round of comments from Dave Hansen.
version 13:
(1) Incorporated comments for Dave Hansen.
(2) Added one more test for correct pkey-0 behavior.
version 12:
(1) fixed the offset of pkey field in the siginfo structure for
x86_64 and powerpc. And tries to use the actual field
if the headers have it defined.
version 11:
(1) fixed a deadlock in the ptrace testcase.
version 10 and prior:
(1) moved the testcase to arch neutral directory
(2) split the changes into incremental patches.
Ram Pai (20):
selftests/x86: Move protecton key selftest to arch neutral directory
selftests/vm: rename all references to pkru to a generic name
selftests/vm: move generic definitions to header file
selftests/vm: typecast the pkey register
selftests/vm: generic function to handle shadow key register
selftests/vm: fix the wrong assert in pkey_disable_set()
selftests/vm: fixed bugs in pkey_disable_clear()
selftests/vm: fix alloc_random_pkey() to make it really random
selftests/vm: introduce two arch independent abstraction
selftests/vm: pkey register should match shadow pkey
selftests/vm: generic cleanup
selftests/vm: Introduce generic abstractions
selftests/vm: powerpc implementation to check support for pkey
selftests/vm: fix an assertion in test_pkey_alloc_exhaust()
selftests/vm: associate key on a mapped page and detect access
violation
selftests/vm: associate key on a mapped page and detect write
violation
selftests/vm: detect write violation on a mapped access-denied-key
page
selftests/vm: testcases must restore pkey-permissions
selftests/vm: sub-page allocator
selftests/vm: test correct behavior of pkey-0
Thiago Jung Bauermann (2):
selftests/vm: move arch-specific definitions to arch-specific header
selftests/vm: Make gcc check arguments of sigsafe_printf()
tools/testing/selftests/vm/.gitignore | 1 +
tools/testing/selftests/vm/Makefile | 1 +
tools/testing/selftests/vm/pkey-helpers.h | 214 ++++
tools/testing/selftests/vm/pkey-powerpc.h | 128 ++
tools/testing/selftests/vm/pkey-x86.h | 184 +++
tools/testing/selftests/vm/protection_keys.c | 1593 +++++++++++++++++++++++++
tools/testing/selftests/x86/.gitignore | 1 -
tools/testing/selftests/x86/pkey-helpers.h | 219 ----
tools/testing/selftests/x86/protection_keys.c | 1485 -----------------------
9 files changed, 2121 insertions(+), 1705 deletions(-)
create mode 100644 tools/testing/selftests/vm/pkey-helpers.h
create mode 100644 tools/testing/selftests/vm/pkey-powerpc.h
create mode 100644 tools/testing/selftests/vm/pkey-x86.h
create mode 100644 tools/testing/selftests/vm/protection_keys.c
delete mode 100644 tools/testing/selftests/x86/pkey-helpers.h
delete mode 100644 tools/testing/selftests/x86/protection_keys.c
^ permalink raw reply
* Re: [PATCH v5 2/2] hwmon: ibmpowernv: Add attributes to enable/disable sensor groups
From: Guenter Roeck @ 2018-07-17 13:47 UTC (permalink / raw)
To: Shilpasri G Bhat, mpe; +Cc: linuxppc-dev, linux-hwmon, linux-kernel, ego
In-Reply-To: <1531637685-23250-3-git-send-email-shilpa.bhat@linux.vnet.ibm.com>
On 07/14/2018 11:54 PM, Shilpasri G Bhat wrote:
> On-Chip-Controller(OCC) is an embedded micro-processor in POWER9 chip
> which measures various system and chip level sensors. These sensors
> comprises of environmental sensors (like power, temperature, current
> and voltage) and performance sensors (like utilization, frequency).
> All these sensors are copied to main memory at a regular interval of
> 100ms. OCC provides a way to select a group of sensors that is copied
> to the main memory to increase the update frequency of selected sensor
> groups. When a sensor-group is disabled, OCC will not copy it to main
> memory and those sensors read 0 values.
>
> This patch provides support for enabling/disabling the sensor groups
> like power, temperature, current and voltage. This patch adds new
> per-senor sysfs attribute to disable and enable them.
>
> Signed-off-by: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
> ---
> Changes from v4:
> - As per Mpe's suggestion store device_node instead of phandles and
> clean it after init
> - s/sg_data/sgrp_data
>
> Documentation/hwmon/ibmpowernv | 43 ++++++-
> drivers/hwmon/ibmpowernv.c | 256 +++++++++++++++++++++++++++++++++++------
> 2 files changed, 265 insertions(+), 34 deletions(-)
>
> diff --git a/Documentation/hwmon/ibmpowernv b/Documentation/hwmon/ibmpowernv
> index 8826ba2..5646825 100644
> --- a/Documentation/hwmon/ibmpowernv
> +++ b/Documentation/hwmon/ibmpowernv
> @@ -33,9 +33,48 @@ fanX_input Measured RPM value.
> fanX_min Threshold RPM for alert generation.
> fanX_fault 0: No fail condition
> 1: Failing fan
> +
> tempX_input Measured ambient temperature.
> tempX_max Threshold ambient temperature for alert generation.
> -inX_input Measured power supply voltage
> +tempX_highest Historical maximum temperature
> +tempX_lowest Historical minimum temperature
> +tempX_enable Enable/disable all temperature sensors belonging to the
> + sub-group. In POWER9, this attribute corresponds to
> + each OCC. Using this attribute each OCC can be asked to
> + disable/enable all of its temperature sensors.
> + 1: Enable
> + 0: Disable
> +
> +inX_input Measured power supply voltage (millivolt)
> inX_fault 0: No fail condition.
> 1: Failing power supply.
> -power1_input System power consumption (microWatt)
> +inX_highest Historical maximum voltage
> +inX_lowest Historical minimum voltage
> +inX_enable Enable/disable all voltage sensors belonging to the
> + sub-group. In POWER9, this attribute corresponds to
> + each OCC. Using this attribute each OCC can be asked to
> + disable/enable all of its voltage sensors.
> + 1: Enable
> + 0: Disable
> +
> +powerX_input Power consumption (microWatt)
> +powerX_input_highest Historical maximum power
> +powerX_input_lowest Historical minimum power
> +powerX_enable Enable/disable all power sensors belonging to the
> + sub-group. In POWER9, this attribute corresponds to
> + each OCC. Using this attribute each OCC can be asked to
> + disable/enable all of its power sensors.
> + 1: Enable
> + 0: Disable
> +
> +currX_input Measured current (milliampere)
> +currX_highest Historical maximum current
> +currX_lowest Historical minimum current
> +currX_enable Enable/disable all current sensors belonging to the
> + sub-group. In POWER9, this attribute corresponds to
> + each OCC. Using this attribute each OCC can be asked to
> + disable/enable all of its current sensors.
> + 1: Enable
> + 0: Disable
> +
> +energyX_input Cumulative energy (microJoule)
> diff --git a/drivers/hwmon/ibmpowernv.c b/drivers/hwmon/ibmpowernv.c
> index f829dad..a509b9b 100644
> --- a/drivers/hwmon/ibmpowernv.c
> +++ b/drivers/hwmon/ibmpowernv.c
> @@ -90,11 +90,23 @@ struct sensor_data {
> char label[MAX_LABEL_LEN];
> char name[MAX_ATTR_LEN];
> struct device_attribute dev_attr;
> + struct sensor_group_data *sgrp_data;
> +};
> +
> +struct sensor_group_data {
> + struct mutex mutex;
> + struct device_node **of_nodes;
> + u32 gid;
> + u32 nr_nodes;
> + enum sensors type;
> + bool enable;
> };
>
> struct platform_data {
> const struct attribute_group *attr_groups[MAX_SENSOR_TYPE + 1];
> + struct sensor_group_data *sgrp_data;
> u32 sensors_count; /* Total count of sensors from each group */
> + u32 nr_sensor_groups; /* Total number of sensor groups */
> };
>
> static ssize_t show_sensor(struct device *dev, struct device_attribute *devattr,
> @@ -105,6 +117,9 @@ static ssize_t show_sensor(struct device *dev, struct device_attribute *devattr,
> ssize_t ret;
> u64 x;
>
> + if (sdata->sgrp_data && !sdata->sgrp_data->enable)
> + return -ENODATA;
> +
> ret = opal_get_sensor_data_u64(sdata->id, &x);
>
> if (ret)
> @@ -120,6 +135,46 @@ static ssize_t show_sensor(struct device *dev, struct device_attribute *devattr,
> return sprintf(buf, "%llu\n", x);
> }
>
> +static ssize_t show_enable(struct device *dev,
> + struct device_attribute *devattr, char *buf)
> +{
> + struct sensor_data *sdata = container_of(devattr, struct sensor_data,
> + dev_attr);
> +
> + return sprintf(buf, "%u\n", sdata->sgrp_data->enable);
> +}
> +
> +static ssize_t store_enable(struct device *dev,
> + struct device_attribute *devattr,
> + const char *buf, size_t count)
> +{
> + struct sensor_data *sdata = container_of(devattr, struct sensor_data,
> + dev_attr);
> + struct sensor_group_data *sgrp_data = sdata->sgrp_data;
> + bool data;
> + int ret;
> +
> + ret = kstrtobool(buf, &data);
> + if (ret)
> + return ret;
> +
> + ret = mutex_lock_interruptible(&sgrp_data->mutex);
> + if (ret)
> + return ret;
> +
> + if (data != sgrp_data->enable) {
> + ret = sensor_group_enable(sgrp_data->gid, data);
> + if (!ret)
> + sgrp_data->enable = data;
> + }
> +
> + if (!ret)
> + ret = count;
> +
> + mutex_unlock(&sgrp_data->mutex);
> + return ret;
> +}
> +
> static ssize_t show_label(struct device *dev, struct device_attribute *devattr,
> char *buf)
> {
> @@ -292,12 +347,129 @@ static u32 get_sensor_hwmon_index(struct sensor_data *sdata,
> return ++sensor_groups[sdata->type].hwmon_index;
> }
>
> +static int init_sensor_group_data(struct platform_device *pdev,
> + struct platform_data *pdata)
> +{
> + struct sensor_group_data *sgrp_data;
> + struct device_node *groups, *sgrp;
> + enum sensors type;
> + int count = 0, ret = 0;
> +
> + groups = of_find_node_by_path("/ibm,opal/sensor-groups");
> + if (!groups)
> + return ret;
> +
> + for_each_child_of_node(groups, sgrp) {
> + type = get_sensor_type(sgrp);
> + if (type != MAX_SENSOR_TYPE)
> + pdata->nr_sensor_groups++;
> + }
> +
> + if (!pdata->nr_sensor_groups)
> + goto out;
> +
> + sgrp_data = devm_kcalloc(&pdev->dev, pdata->nr_sensor_groups,
> + sizeof(*sgrp_data), GFP_KERNEL);
> + if (!sgrp_data) {
> + ret = -ENOMEM;
> + goto out;
> + }
> +
> + for_each_child_of_node(groups, sgrp) {
> + const __be32 *phandles;
> + int len, gid, i, k = 0;
> +
> + type = get_sensor_type(sgrp);
> + if (type == MAX_SENSOR_TYPE)
> + continue;
> +
> + if (of_property_read_u32(sgrp, "sensor-group-id", &gid))
> + continue;
> +
> + phandles = of_get_property(sgrp, "sensors", &len);
> + if (!phandles)
> + continue;
> +
> + len /= sizeof(u32);
> + if (!len)
> + continue;
> +
> + sgrp_data[count].of_nodes = devm_kcalloc(&pdev->dev,
> + sizeof(struct device_node *),
> + len, GFP_KERNEL);
> + if (!sgrp_data[count].of_nodes) {
> + ret = -ENOMEM;
> + of_node_put(sgrp);
> + goto out;
> + }
> +
> + for (i = 0; i < len; i++) {
> + struct device_node *node;
> +
> + node = of_parse_phandle(sgrp, "sensors", i);
> + if (!node)
> + continue;
> + sgrp_data[count].of_nodes[k++] = node;
I don't immediately see where "node" is used later. As mentioned below,
I'll need to have a much closer look into the code to understand what
is going on here.
> + }
> +
> + sensor_groups[type].attr_count++;
> + sgrp_data[count].gid = gid;
> + sgrp_data[count].type = type;
> + sgrp_data[count].nr_nodes = len;
> + mutex_init(&sgrp_data[count].mutex);
> + sgrp_data[count++].enable = false;
> + }
> + pdata->sgrp_data = sgrp_data;
> +out:
> + of_node_put(groups);
> + return ret;
> +}
> +
> +static struct sensor_group_data *get_sensor_group(struct platform_data *pdata,
> + struct device_node *node,
> + enum sensors type)
> +{
> + struct sensor_group_data *sgrp_data = pdata->sgrp_data;
> + int i, j;
> +
> + for (i = 0; i < pdata->nr_sensor_groups; i++) {
> + if (type != sgrp_data[i].type)
> + continue;
> +
> + for (j = 0; j < sgrp_data[i].nr_nodes; j++)
> + if (sgrp_data[i].of_nodes[j] == node)
> + return &sgrp_data[i];
> + }
> +
> + return NULL;
> +}
> +
> +static void clean_sensor_group_of_node(struct platform_device *pdev)
> +{
> + struct platform_data *pdata = platform_get_drvdata(pdev);
> + struct sensor_group_data *sgrp_data = pdata->sgrp_data;
> + int i, j;
> +
> + for (i = 0; i < pdata->nr_sensor_groups; i++) {
> + for (j = 0; j < sgrp_data[i].nr_nodes; j++)
> + of_node_put(sgrp_data[i].of_nodes[j]);
> +
> + devm_kfree(&pdev->dev, sgrp_data[i].of_nodes);
The whole point of calling devm_kzalloc() is that calling devm_kfree()
is not necessary. Any call to devm_kfree() is a strong indication
that something is wrong.
> + sgrp_data[i].of_nodes = NULL;
> + }
> +}
> +
Ok, this will require a detailed review. I don't understand what this
function is about. It seems that sgrp_data[].of_nodes is only allocated
to be freed at the end of create_dev_attrs(), suggesting that the cleared
data isn't needed for runtime and thus should not be stored in an
array in the first place. I'll have to understand this much better
than I do right now before approving it.
Guenter
> static int populate_attr_groups(struct platform_device *pdev)
> {
> struct platform_data *pdata = platform_get_drvdata(pdev);
> const struct attribute_group **pgroups = pdata->attr_groups;
> struct device_node *opal, *np;
> enum sensors type;
> + int ret;
> +
> + ret = init_sensor_group_data(pdev, pdata);
> + if (ret)
> + return ret;
>
> opal = of_find_node_by_path("/ibm,opal/sensors");
> for_each_child_of_node(opal, np) {
> @@ -344,7 +516,10 @@ static int populate_attr_groups(struct platform_device *pdev)
> static void create_hwmon_attr(struct sensor_data *sdata, const char *attr_name,
> ssize_t (*show)(struct device *dev,
> struct device_attribute *attr,
> - char *buf))
> + char *buf),
> + ssize_t (*store)(struct device *dev,
> + struct device_attribute *attr,
> + const char *buf, size_t count))
> {
> snprintf(sdata->name, MAX_ATTR_LEN, "%s%d_%s",
> sensor_groups[sdata->type].name, sdata->hwmon_index,
> @@ -352,23 +527,33 @@ static void create_hwmon_attr(struct sensor_data *sdata, const char *attr_name,
>
> sysfs_attr_init(&sdata->dev_attr.attr);
> sdata->dev_attr.attr.name = sdata->name;
> - sdata->dev_attr.attr.mode = S_IRUGO;
> sdata->dev_attr.show = show;
> + if (store) {
> + sdata->dev_attr.store = store;
> + sdata->dev_attr.attr.mode = 0664;
> + } else {
> + sdata->dev_attr.attr.mode = 0444;
> + }
> }
>
> static void populate_sensor(struct sensor_data *sdata, int od, int hd, int sid,
> const char *attr_name, enum sensors type,
> const struct attribute_group *pgroup,
> + struct sensor_group_data *sgrp_data,
> ssize_t (*show)(struct device *dev,
> struct device_attribute *attr,
> - char *buf))
> + char *buf),
> + ssize_t (*store)(struct device *dev,
> + struct device_attribute *attr,
> + const char *buf, size_t count))
> {
> sdata->id = sid;
> sdata->type = type;
> sdata->opal_index = od;
> sdata->hwmon_index = hd;
> - create_hwmon_attr(sdata, attr_name, show);
> + create_hwmon_attr(sdata, attr_name, show, store);
> pgroup->attrs[sensor_groups[type].attr_count++] = &sdata->dev_attr.attr;
> + sdata->sgrp_data = sgrp_data;
> }
>
> static char *get_max_attr(enum sensors type)
> @@ -403,24 +588,23 @@ static int create_device_attrs(struct platform_device *pdev)
> const struct attribute_group **pgroups = pdata->attr_groups;
> struct device_node *opal, *np;
> struct sensor_data *sdata;
> - u32 sensor_id;
> - enum sensors type;
> u32 count = 0;
> - int err = 0;
> + u32 group_attr_id[MAX_SENSOR_TYPE] = {0};
>
> - opal = of_find_node_by_path("/ibm,opal/sensors");
> sdata = devm_kcalloc(&pdev->dev,
> pdata->sensors_count, sizeof(*sdata),
> GFP_KERNEL);
> - if (!sdata) {
> - err = -ENOMEM;
> - goto exit_put_node;
> - }
> + if (!sdata)
> + return -ENOMEM;
>
> + opal = of_find_node_by_path("/ibm,opal/sensors");
> for_each_child_of_node(opal, np) {
> + struct sensor_group_data *sgrp_data;
> const char *attr_name;
> - u32 opal_index;
> + u32 opal_index, hw_id;
> + u32 sensor_id;
> const char *label;
> + enum sensors type;
>
> if (np->name == NULL)
> continue;
> @@ -456,14 +640,12 @@ static int create_device_attrs(struct platform_device *pdev)
> opal_index = INVALID_INDEX;
> }
>
> - sdata[count].opal_index = opal_index;
> - sdata[count].hwmon_index =
> - get_sensor_hwmon_index(&sdata[count], sdata, count);
> -
> - create_hwmon_attr(&sdata[count], attr_name, show_sensor);
> -
> - pgroups[type]->attrs[sensor_groups[type].attr_count++] =
> - &sdata[count++].dev_attr.attr;
> + hw_id = get_sensor_hwmon_index(&sdata[count], sdata, count);
> + sgrp_data = get_sensor_group(pdata, np, type);
> + populate_sensor(&sdata[count], opal_index, hw_id, sensor_id,
> + attr_name, type, pgroups[type], sgrp_data,
> + show_sensor, NULL);
> + count++;
>
> if (!of_property_read_string(np, "label", &label)) {
> /*
> @@ -474,35 +656,44 @@ static int create_device_attrs(struct platform_device *pdev)
> */
>
> make_sensor_label(np, &sdata[count], label);
> - populate_sensor(&sdata[count], opal_index,
> - sdata[count - 1].hwmon_index,
> + populate_sensor(&sdata[count], opal_index, hw_id,
> sensor_id, "label", type, pgroups[type],
> - show_label);
> + NULL, show_label, NULL);
> count++;
> }
>
> if (!of_property_read_u32(np, "sensor-data-max", &sensor_id)) {
> attr_name = get_max_attr(type);
> - populate_sensor(&sdata[count], opal_index,
> - sdata[count - 1].hwmon_index,
> + populate_sensor(&sdata[count], opal_index, hw_id,
> sensor_id, attr_name, type,
> - pgroups[type], show_sensor);
> + pgroups[type], sgrp_data, show_sensor,
> + NULL);
> count++;
> }
>
> if (!of_property_read_u32(np, "sensor-data-min", &sensor_id)) {
> attr_name = get_min_attr(type);
> - populate_sensor(&sdata[count], opal_index,
> - sdata[count - 1].hwmon_index,
> + populate_sensor(&sdata[count], opal_index, hw_id,
> sensor_id, attr_name, type,
> - pgroups[type], show_sensor);
> + pgroups[type], sgrp_data, show_sensor,
> + NULL);
> + count++;
> + }
> +
> + if (sgrp_data && !sgrp_data->enable) {
> + sgrp_data->enable = true;
> + hw_id = ++group_attr_id[type];
> + populate_sensor(&sdata[count], opal_index, hw_id,
> + sgrp_data->gid, "enable", type,
> + pgroups[type], sgrp_data, show_enable,
> + store_enable);
> count++;
> }
> }
>
> -exit_put_node:
> of_node_put(opal);
> - return err;
> + clean_sensor_group_of_node(pdev);
> + return 0;
> }
>
> static int ibmpowernv_probe(struct platform_device *pdev)
> @@ -517,6 +708,7 @@ static int ibmpowernv_probe(struct platform_device *pdev)
>
> platform_set_drvdata(pdev, pdata);
> pdata->sensors_count = 0;
> + pdata->nr_sensor_groups = 0;
> err = populate_attr_groups(pdev);
> if (err)
> return err;
>
^ permalink raw reply
* Re: [RFC PATCH v6 0/4] powerpc/fadump: Improvements and fixes for firmware-assisted dump.
From: Michal Hocko @ 2018-07-17 11:52 UTC (permalink / raw)
To: Mahesh Jagannath Salgaonkar
Cc: linuxppc-dev, Linux Kernel, Hari Bathini,
Ananth N Mavinakayanahalli, Srikar Dronamraju, Aneesh Kumar K.V,
Anshuman Khandual, Andrew Morton, Joonsoo Kim, Ananth Narayan,
kernelfans
In-Reply-To: <cb5fc554-7fa3-1356-c304-ae8c27a0c70c@linux.vnet.ibm.com>
On Tue 17-07-18 16:58:10, Mahesh Jagannath Salgaonkar wrote:
> On 07/16/2018 01:56 PM, Michal Hocko wrote:
> > On Mon 16-07-18 11:32:56, Mahesh J Salgaonkar wrote:
> >> One of the primary issues with Firmware Assisted Dump (fadump) on Power
> >> is that it needs a large amount of memory to be reserved. This reserved
> >> memory is used for saving the contents of old crashed kernel's memory before
> >> fadump capture kernel uses old kernel's memory area to boot. However, This
> >> reserved memory area stays unused until system crash and isn't available
> >> for production kernel to use.
> >
> > How much memory are we talking about. Regular kernel dump process needs
> > some reserved memory as well. Why that is not a big problem?
>
> We reserve around 5% of total system RAM. On large systems with
> TeraBytes of memory, this reservation can be quite significant.
>
> The regular kernel dump uses the kexec method to boot into capture
> kernel and it can control the parameters that are being passed to
> capture kernel. This allows a capability to strip down the parameters
> that can help lowering down the memory requirement for capture kernel to
> boot. This allows regular kdump to reserve less memory to start with.
>
> Where as fadump depends on power firmware (pHyp) to load the capture
> kernel after full reset and boots like a regular kernel. It needs same
> amount of memory to boot as the production kernel. On large systems
> production kernel needs significant amount of memory to boot. Hence
> fadump needs to reserve enough memory for capture kernel to boot
> successfully and execute dump capturing operations. By default fadump
> reserves 5% of total system RAM and in most cases this has worked
> flawlessly on variety of system configurations. Optionally,
> 'crashkernel=X' can also be used to specify more fine-tuned memory size
> for reservation.
So why do we even care about fadump when regular kexec provides
(presumably) same functionality with a smaller memory footprint? Or is
there any reason why kexec doesn't work well on ppc?
> >> Instead of setting aside a significant chunk of memory that nobody can use,
> >> take advantage ZONE_MOVABLE to mark a significant chunk of reserved memory
> >> as ZONE_MOVABLE, so that the kernel is prevented from using, but
> >> applications are free to use it.
> >
> > Why kernel cannot use that memory while userspace can?
>
> fadump needs to reserve memory to be able to save crashing kernel's
> memory, with help from power firmware, before the capture kernel loads
> into crashing kernel's memory area. Any contents present in this
> reserved memory will be over-written. If kernel is allowed to use this
> memory, then we loose that kernel data and won't be part of captured
> dump, which could be critical to debug root cause of system crash.
But then you simply screw user memory sitting there. This might be not
so critical as the kernel memory but still it sounds like you are
reducing the usefulness of the dump just because of inherent limitations
of fadump.
> Kdump and fadump both uses same infrastructure/tool (makedumpfile) to
> capture the memory dump. While the tool provides flexibility to
> determine what needs to be part of the dump and what memory to filter
> out, all supported distributions defaults to "Capture only kernel data
> and nothing else". Taking advantage of this default we can at least make
> the reserved memory available for userspace to use.
>
> If someone wants to capture userspace data as well then
> 'fadump=nonmovable' option can be used where reserved pages won't be
> marked zone movable.
Ohh, so you have an unclutter thing to support the case above.
> Advantage of movable method is the reserved memory chunk is also
> available for use.
>
> > [...]
> >> Documentation/powerpc/firmware-assisted-dump.txt | 18 +++
> >> arch/powerpc/include/asm/fadump.h | 7 +
> >> arch/powerpc/kernel/fadump.c | 123 +++++++++++++++++--
> >> arch/powerpc/platforms/pseries/hotplug-memory.c | 7 +
> >> include/linux/mmzone.h | 2
> >> mm/page_alloc.c | 146 ++++++++++++++++++++++
> >> 6 files changed, 290 insertions(+), 13 deletions(-)
> >
> > This is quite a large change and you didn't seem to explain why we need
> > it.
> >
>
> In fadump case, the reserved memory stays unused until system is
> crashed. fadump uses very small portion of this reserved memory, few
> KBs, for storing fadump metadata. Otherwise, the significant chunk of
> memory is completely unused. Hence, instead of blocking a memory that is
> un-utilized through out the lifetime of system, it's better to give it
> back to production kernel to use. But at the same time we don't want
> kernel to use that memory. While exploring we found 1) Linux kernel's
> Contiguous Memory Allocator (CMA) feature and 2) ZONE_MOVABLE, that
> suites the requirement. Initial 5 revisions of this patchset () was
> using CMA feature. However, fadump does not do any cma allocations,
> hence it will be more appropriate to use zone movable to achieve the same.
>
> But unlike CMA, there is no interface available to mark a custom
> reserved memory area as ZONE_MOVABLE. Hence patch 1/4 proposes the same.
Well, you are adding a significant amount of code so you should be much
better in explaining why does the generic code care about a ppc specific
kdump method.
--
Michal Hocko
SUSE Labs
^ permalink raw reply
* Re: [PATCH] powerpc/powernv : Save/Restore SPRG3 on entry/exit from stop.
From: Gautham R Shenoy @ 2018-07-17 11:47 UTC (permalink / raw)
To: Gautham R. Shenoy
Cc: Michael Ellerman, Benjamin Herrenschmidt, Michael Neuling,
Vaidyanathan Srinivasan, linuxppc-dev, linux-kernel,
Florian Weimer, Oleg Nesterov
In-Reply-To: <1531826849-31838-1-git-send-email-ego@linux.vnet.ibm.com>
On Tue, Jul 17, 2018 at 04:57:29PM +0530, Gautham R. Shenoy wrote:
> From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
>
> On 64-bit Servers, SPRN_SPRG3 and its userspace read-only mirror
> SPRN_USPRG3 are used as userspace VDSO write and read registers
> respectively.
>
> SPRN_SPRG3 is lost when we enter stop4 and above, and is currently not
> restored. As a result, any read from SPRN_USPRG3 returns zero on an
> exit from stop4 and above.
>
> Thus in this situation, any call from sched_getcpu() always returns
This happens only on POWER9.
> zero, as on powerpc, we call __kernel_getcpu() which relies upon
> SPRN_USPRG3 to report the CPU and NUMA node information.
>
> Fix this by saving the SPRN_SPRG3 before entering a deep stop state,
> and restoring it back on wakeup from the stop state.
>
I forgot to add that this fixes commit e1c1cfed5432 ("powerpc/powernv:
Save/Restore additional SPRs for stop4 cpuidle").
@mpe, do you want me to send fresh patch with these updates ?
> Reported-by: Florian Weimer <fweimer@redhat.com>
> Cc: Oleg Nesterov <oleg@redhat.com>
> Cc: Michael Neuling <mikey@neuling.org>
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
> Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> ---
> arch/powerpc/include/asm/cpuidle.h | 1 +
> arch/powerpc/kernel/asm-offsets.c | 1 +
> arch/powerpc/kernel/idle_book3s.S | 5 +++++
> 3 files changed, 7 insertions(+)
>
> diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
> index e210a83..03fa904 100644
> --- a/arch/powerpc/include/asm/cpuidle.h
> +++ b/arch/powerpc/include/asm/cpuidle.h
> @@ -77,6 +77,7 @@ struct stop_sprs {
> u64 mmcr1;
> u64 mmcr2;
> u64 mmcra;
> + u64 sprg3;
> };
>
> extern u32 pnv_fastsleep_workaround_at_entry[];
> diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
> index 89cf155..a35ebfc 100644
> --- a/arch/powerpc/kernel/asm-offsets.c
> +++ b/arch/powerpc/kernel/asm-offsets.c
> @@ -776,6 +776,7 @@ int main(void)
> STOP_SPR(STOP_MMCR1, mmcr1);
> STOP_SPR(STOP_MMCR2, mmcr2);
> STOP_SPR(STOP_MMCRA, mmcra);
> + STOP_SPR(STOP_SPRG3, sprg3);
> #endif
>
> DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
> diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
> index d85d551..5069d42 100644
> --- a/arch/powerpc/kernel/idle_book3s.S
> +++ b/arch/powerpc/kernel/idle_book3s.S
> @@ -120,6 +120,9 @@ power9_save_additional_sprs:
> mfspr r4, SPRN_MMCR2
> std r3, STOP_MMCR1(r13)
> std r4, STOP_MMCR2(r13)
> +
> + mfspr r3, SPRN_SPRG3
> + std r3, STOP_SPRG3(r13)
> blr
>
> power9_restore_additional_sprs:
> @@ -144,7 +147,9 @@ power9_restore_additional_sprs:
> mtspr SPRN_MMCR1, r4
>
> ld r3, STOP_MMCR2(r13)
> + ld r4, STOP_SPRG3(r13)
> mtspr SPRN_MMCR2, r3
> + mtspr SPRN_SPRG3, r4
> blr
>
> /*
> --
> 1.9.4
>
^ permalink raw reply
* Re: [RFC PATCH v6 0/4] powerpc/fadump: Improvements and fixes for firmware-assisted dump.
From: Mahesh Jagannath Salgaonkar @ 2018-07-17 11:28 UTC (permalink / raw)
To: Michal Hocko
Cc: linuxppc-dev, Linux Kernel, Hari Bathini,
Ananth N Mavinakayanahalli, Srikar Dronamraju, Aneesh Kumar K.V,
Anshuman Khandual, Andrew Morton, Joonsoo Kim, Ananth Narayan,
kernelfans
In-Reply-To: <20180716082646.GF17280@dhcp22.suse.cz>
On 07/16/2018 01:56 PM, Michal Hocko wrote:
> On Mon 16-07-18 11:32:56, Mahesh J Salgaonkar wrote:
>> One of the primary issues with Firmware Assisted Dump (fadump) on Power
>> is that it needs a large amount of memory to be reserved. This reserved
>> memory is used for saving the contents of old crashed kernel's memory before
>> fadump capture kernel uses old kernel's memory area to boot. However, This
>> reserved memory area stays unused until system crash and isn't available
>> for production kernel to use.
>
> How much memory are we talking about. Regular kernel dump process needs
> some reserved memory as well. Why that is not a big problem?
We reserve around 5% of total system RAM. On large systems with
TeraBytes of memory, this reservation can be quite significant.
The regular kernel dump uses the kexec method to boot into capture
kernel and it can control the parameters that are being passed to
capture kernel. This allows a capability to strip down the parameters
that can help lowering down the memory requirement for capture kernel to
boot. This allows regular kdump to reserve less memory to start with.
Where as fadump depends on power firmware (pHyp) to load the capture
kernel after full reset and boots like a regular kernel. It needs same
amount of memory to boot as the production kernel. On large systems
production kernel needs significant amount of memory to boot. Hence
fadump needs to reserve enough memory for capture kernel to boot
successfully and execute dump capturing operations. By default fadump
reserves 5% of total system RAM and in most cases this has worked
flawlessly on variety of system configurations. Optionally,
'crashkernel=X' can also be used to specify more fine-tuned memory size
for reservation.
>
>> Instead of setting aside a significant chunk of memory that nobody can use,
>> take advantage ZONE_MOVABLE to mark a significant chunk of reserved memory
>> as ZONE_MOVABLE, so that the kernel is prevented from using, but
>> applications are free to use it.
>
> Why kernel cannot use that memory while userspace can?
fadump needs to reserve memory to be able to save crashing kernel's
memory, with help from power firmware, before the capture kernel loads
into crashing kernel's memory area. Any contents present in this
reserved memory will be over-written. If kernel is allowed to use this
memory, then we loose that kernel data and won't be part of captured
dump, which could be critical to debug root cause of system crash.
Kdump and fadump both uses same infrastructure/tool (makedumpfile) to
capture the memory dump. While the tool provides flexibility to
determine what needs to be part of the dump and what memory to filter
out, all supported distributions defaults to "Capture only kernel data
and nothing else". Taking advantage of this default we can at least make
the reserved memory available for userspace to use.
If someone wants to capture userspace data as well then
'fadump=nonmovable' option can be used where reserved pages won't be
marked zone movable.
Advantage of movable method is the reserved memory chunk is also
available for use.
> [...]
>> Documentation/powerpc/firmware-assisted-dump.txt | 18 +++
>> arch/powerpc/include/asm/fadump.h | 7 +
>> arch/powerpc/kernel/fadump.c | 123 +++++++++++++++++--
>> arch/powerpc/platforms/pseries/hotplug-memory.c | 7 +
>> include/linux/mmzone.h | 2
>> mm/page_alloc.c | 146 ++++++++++++++++++++++
>> 6 files changed, 290 insertions(+), 13 deletions(-)
>
> This is quite a large change and you didn't seem to explain why we need
> it.
>
In fadump case, the reserved memory stays unused until system is
crashed. fadump uses very small portion of this reserved memory, few
KBs, for storing fadump metadata. Otherwise, the significant chunk of
memory is completely unused. Hence, instead of blocking a memory that is
un-utilized through out the lifetime of system, it's better to give it
back to production kernel to use. But at the same time we don't want
kernel to use that memory. While exploring we found 1) Linux kernel's
Contiguous Memory Allocator (CMA) feature and 2) ZONE_MOVABLE, that
suites the requirement. Initial 5 revisions of this patchset () was
using CMA feature. However, fadump does not do any cma allocations,
hence it will be more appropriate to use zone movable to achieve the same.
But unlike CMA, there is no interface available to mark a custom
reserved memory area as ZONE_MOVABLE. Hence patch 1/4 proposes the same.
Thanks,
-Mahesh.
^ permalink raw reply
* [PATCH] powerpc/powernv : Save/Restore SPRG3 on entry/exit from stop.
From: Gautham R. Shenoy @ 2018-07-17 11:27 UTC (permalink / raw)
To: Michael Ellerman, Benjamin Herrenschmidt, Michael Neuling,
Vaidyanathan Srinivasan
Cc: linuxppc-dev, linux-kernel, Florian Weimer, Gautham R. Shenoy,
Oleg Nesterov
From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
On 64-bit Servers, SPRN_SPRG3 and its userspace read-only mirror
SPRN_USPRG3 are used as userspace VDSO write and read registers
respectively.
SPRN_SPRG3 is lost when we enter stop4 and above, and is currently not
restored. As a result, any read from SPRN_USPRG3 returns zero on an
exit from stop4 and above.
Thus in this situation, any call from sched_getcpu() always returns
zero, as on powerpc, we call __kernel_getcpu() which relies upon
SPRN_USPRG3 to report the CPU and NUMA node information.
Fix this by saving the SPRN_SPRG3 before entering a deep stop state,
and restoring it back on wakeup from the stop state.
Reported-by: Florian Weimer <fweimer@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/cpuidle.h | 1 +
arch/powerpc/kernel/asm-offsets.c | 1 +
arch/powerpc/kernel/idle_book3s.S | 5 +++++
3 files changed, 7 insertions(+)
diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
index e210a83..03fa904 100644
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -77,6 +77,7 @@ struct stop_sprs {
u64 mmcr1;
u64 mmcr2;
u64 mmcra;
+ u64 sprg3;
};
extern u32 pnv_fastsleep_workaround_at_entry[];
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 89cf155..a35ebfc 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -776,6 +776,7 @@ int main(void)
STOP_SPR(STOP_MMCR1, mmcr1);
STOP_SPR(STOP_MMCR2, mmcr2);
STOP_SPR(STOP_MMCRA, mmcra);
+ STOP_SPR(STOP_SPRG3, sprg3);
#endif
DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index d85d551..5069d42 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -120,6 +120,9 @@ power9_save_additional_sprs:
mfspr r4, SPRN_MMCR2
std r3, STOP_MMCR1(r13)
std r4, STOP_MMCR2(r13)
+
+ mfspr r3, SPRN_SPRG3
+ std r3, STOP_SPRG3(r13)
blr
power9_restore_additional_sprs:
@@ -144,7 +147,9 @@ power9_restore_additional_sprs:
mtspr SPRN_MMCR1, r4
ld r3, STOP_MMCR2(r13)
+ ld r4, STOP_SPRG3(r13)
mtspr SPRN_MMCR2, r3
+ mtspr SPRN_SPRG3, r4
blr
/*
--
1.9.4
^ permalink raw reply related
* [PATCH v4 6/6] powerpc/fsl: Sanitize the syscall table for NXP PowerPC 32 bit platforms
From: Diana Craciun @ 2018-07-17 11:09 UTC (permalink / raw)
To: linuxppc-dev; +Cc: mpe, oss, leoyang.li, bharat.bhushan, Diana Craciun
In-Reply-To: <1531825797-14236-1-git-send-email-diana.craciun@nxp.com>
Used barrier_nospec to sanitize the syscall table.
Signed-off-by: Diana Craciun <diana.craciun@nxp.com>
---
History:
v2-->v3
- included in the series
arch/powerpc/kernel/entry_32.S | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 973577f..8f05280 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -33,6 +33,7 @@
#include <asm/unistd.h>
#include <asm/ptrace.h>
#include <asm/export.h>
+#include <asm/barrier.h>
/*
* MSR_KERNEL is > 0x10000 on 4xx/Book-E since it include MSR_CE.
@@ -358,6 +359,15 @@ syscall_dotrace_cont:
ori r10,r10,sys_call_table@l
slwi r0,r0,2
bge- 66f
+
+ barrier_nospec_asm
+ /*
+ * Prevent the load of the handler below (based on the user-passed
+ * system call number) being speculatively executed until the test
+ * against NR_syscalls and branch to .66f above has
+ * committed.
+ */
+
lwzx r10,r10,r0 /* Fetch system call handler [ptr] */
mtlr r10
addi r9,r1,STACK_FRAME_OVERHEAD
--
2.5.5
^ permalink raw reply related
* [PATCH v4 5/6] powerpc/fsl: Add barrier_nospec implementation for NXP PowerPC Book3E
From: Diana Craciun @ 2018-07-17 11:09 UTC (permalink / raw)
To: linuxppc-dev; +Cc: mpe, oss, leoyang.li, bharat.bhushan, Diana Craciun
In-Reply-To: <1531825797-14236-1-git-send-email-diana.craciun@nxp.com>
Implement the barrier_nospec as a isync;sync instruction sequence.
The implementation uses the infrastructure built for BOOK3S 64.
Signed-off-by: Diana Craciun <diana.craciun@nxp.com>
---
History:
v3-->v4
- fixed compilation issues
v2-->v3
- added PPC_NOSPEC Kconfig
- addressed the review comments
It was a discussion at the previous review cycle about the place in the code
where to call setup_barrier_nospec. I have chosen to call the function in the
common code in order to be re-used on multiple platforms. However, I am not sure
that changes concerning powernv/pseries are correct, I need some input here.
arch/powerpc/include/asm/barrier.h | 12 +++++++++---
arch/powerpc/include/asm/setup.h | 6 +++++-
arch/powerpc/kernel/Makefile | 3 ++-
arch/powerpc/kernel/module.c | 4 +++-
arch/powerpc/kernel/setup-common.c | 2 ++
arch/powerpc/kernel/vmlinux.lds.S | 4 +++-
arch/powerpc/lib/feature-fixups.c | 35 +++++++++++++++++++++++++++++++++-
arch/powerpc/platforms/powernv/setup.c | 1 -
arch/powerpc/platforms/pseries/setup.c | 1 -
9 files changed, 58 insertions(+), 10 deletions(-)
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index f67b3f6..0bdfa81 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -77,19 +77,25 @@ do { \
})
#ifdef CONFIG_PPC_BOOK3S_64
+#define NOSPEC_BARRIER_SLOT nop
+#elif defined(CONFIG_PPC_FSL_BOOK3E)
+#define NOSPEC_BARRIER_SLOT nop; nop
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_PPC_NOSPEC
/*
* Prevent execution of subsequent instructions until preceding branches have
* been fully resolved and are no longer executing speculatively.
*/
-#define barrier_nospec_asm NOSPEC_BARRIER_FIXUP_SECTION; nop
+#define barrier_nospec_asm NOSPEC_BARRIER_FIXUP_SECTION; NOSPEC_BARRIER_SLOT
// This also acts as a compiler barrier due to the memory clobber.
#define barrier_nospec() asm (stringify_in_c(barrier_nospec_asm) ::: "memory")
-#else /* !CONFIG_PPC_BOOK3S_64 */
+#else /* !CONFIG_PPC_NOSPEC */
#define barrier_nospec_asm
#define barrier_nospec()
-#endif
+#endif /* CONFIG_PPC_NOSPEC */
#include <asm-generic/barrier.h>
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index 8721fd0..a794a40 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -52,11 +52,15 @@ enum l1d_flush_type {
void setup_rfi_flush(enum l1d_flush_type, bool enable);
void do_rfi_flush_fixups(enum l1d_flush_type types);
+#ifdef CONFIG_PPC_NOSPEC
void setup_barrier_nospec(void);
+#else
+static inline void setup_barrier_nospec(void) { };
+#endif
void do_barrier_nospec_fixups(bool enable);
extern bool barrier_nospec_enabled;
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_NOSPEC
void do_barrier_nospec_fixups_range(bool enable, void *start, void *end);
#else
static inline void do_barrier_nospec_fixups_range(bool enable, void *start, void *end) { };
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 2b4c40b2..cd0eb38 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -42,9 +42,10 @@ obj-$(CONFIG_VDSO32) += vdso32/
obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
-obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o security.o
+obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o
obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o
+obj-$(CONFIG_PPC_NOSPEC) += security.o
obj-$(CONFIG_PPC64) += vdso64/
obj-$(CONFIG_ALTIVEC) += vecemu.o
obj-$(CONFIG_PPC_970_NAP) += idle_power4.o
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index 1b3c683..a0c4967 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -72,13 +72,15 @@ int module_finalize(const Elf_Ehdr *hdr,
do_feature_fixups(powerpc_firmware_features,
(void *)sect->sh_addr,
(void *)sect->sh_addr + sect->sh_size);
+#endif /* CONFIG_PPC64 */
+#if defined(CONFIG_PPC64) || defined(CONFIG_PPC_FSL_BOOK3E)
sect = find_section(hdr, sechdrs, "__spec_barrier_fixup");
if (sect != NULL)
do_barrier_nospec_fixups_range(barrier_nospec_enabled,
(void *)sect->sh_addr,
(void *)sect->sh_addr + sect->sh_size);
-#endif
+#endif /* CONFIG_PPC64 || CONFIG_PPC_FSL_BOOK3E */
sect = find_section(hdr, sechdrs, "__lwsync_fixup");
if (sect != NULL)
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 40b44bb..93fa0c9 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -972,6 +972,8 @@ void __init setup_arch(char **cmdline_p)
if (ppc_md.setup_arch)
ppc_md.setup_arch();
+ setup_barrier_nospec();
+
paging_init();
/* Initialize the MMU context management stuff. */
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 5baac79..6087b02 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -153,14 +153,16 @@ SECTIONS
*(__rfi_flush_fixup)
__stop___rfi_flush_fixup = .;
}
+#endif /* CONFIG_PPC64 */
+#if defined(CONFIG_PPC64) || defined(CONFIG_PPC_FSL_BOOK3E)
. = ALIGN(8);
__spec_barrier_fixup : AT(ADDR(__spec_barrier_fixup) - LOAD_OFFSET) {
__start___barrier_nospec_fixup = .;
*(__barrier_nospec_fixup)
__stop___barrier_nospec_fixup = .;
}
-#endif
+#endif /* CONFIG_PPC64 || CONFIG_PPC_FSL_BOOK3E */
EXCEPTION_TABLE(0)
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 8b69f86..41f372e 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -304,6 +304,9 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_
printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i);
}
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_PPC_NOSPEC
void do_barrier_nospec_fixups(bool enable)
{
void *start, *end;
@@ -313,8 +316,38 @@ void do_barrier_nospec_fixups(bool enable)
do_barrier_nospec_fixups_range(enable, start, end);
}
+#endif /* CONFIG_PPC_NOSPEC */
-#endif /* CONFIG_PPC_BOOK3S_64 */
+#ifdef CONFIG_PPC_FSL_BOOK3E
+void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end)
+{
+ unsigned int instr[2], *dest;
+ long *start, *end;
+ int i;
+
+ start = fixup_start;
+ end = fixup_end;
+
+ instr[0] = PPC_INST_NOP;
+ instr[1] = PPC_INST_NOP;
+
+ if (enable) {
+ pr_info("barrier-nospec: using isync; sync as speculation barrier\n");
+ instr[0] = PPC_INST_ISYNC;
+ instr[1] = PPC_INST_SYNC;
+ }
+
+ for (i = 0; start < end; start++, i++) {
+ dest = (void *)start + *start;
+
+ pr_devel("patching dest %lx\n", (unsigned long)dest);
+ patch_instruction(dest, instr[0]);
+ patch_instruction(dest + 1, instr[1]);
+ }
+
+ printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i);
+}
+#endif /* CONFIG_PPC_FSL_BOOK3E */
void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
{
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index f96df0a..1ab6dc7 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -124,7 +124,6 @@ static void pnv_setup_rfi_flush(void)
security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV));
setup_rfi_flush(type, enable);
- setup_barrier_nospec();
}
static void __init pnv_setup_arch(void)
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 139f0af..fdb32e0 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -534,7 +534,6 @@ void pseries_setup_rfi_flush(void)
security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR);
setup_rfi_flush(types, enable);
- setup_barrier_nospec();
}
#ifdef CONFIG_PCI_IOV
--
2.5.5
^ permalink raw reply related
* [PATCH v4 4/6] powerpc/fsl: Enable cpu vulnerabilities reporting for NXP PPC BOOK3E
From: Diana Craciun @ 2018-07-17 11:09 UTC (permalink / raw)
To: linuxppc-dev; +Cc: mpe, oss, leoyang.li, bharat.bhushan, Diana Craciun
In-Reply-To: <1531825797-14236-1-git-send-email-diana.craciun@nxp.com>
The NXP PPC Book3E platforms are not vulnerable to meltdown and
Spectre v4, so make them PPC_BOOK3S_64 specific.
Signed-off-by: Diana Craciun <diana.craciun@nxp.com>
---
History:
v2-->v3
- used the existing functions for spectre v1/v2
arch/powerpc/Kconfig | 7 ++++++-
arch/powerpc/kernel/security.c | 2 ++
2 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9f2b75f..116c953 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -165,7 +165,7 @@ config PPC
select GENERIC_CLOCKEVENTS_BROADCAST if SMP
select GENERIC_CMOS_UPDATE
select GENERIC_CPU_AUTOPROBE
- select GENERIC_CPU_VULNERABILITIES if PPC_BOOK3S_64
+ select GENERIC_CPU_VULNERABILITIES if PPC_NOSPEC
select GENERIC_IRQ_SHOW
select GENERIC_IRQ_SHOW_LEVEL
select GENERIC_SMP_IDLE_THREAD
@@ -240,6 +240,11 @@ config PPC
# Please keep this list sorted alphabetically.
#
+config PPC_NOSPEC
+ bool
+ default y
+ depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E
+
config GENERIC_CSUM
def_bool n
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 3a4e5c3..539c744 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -92,6 +92,7 @@ static __init int barrier_nospec_debugfs_init(void)
device_initcall(barrier_nospec_debugfs_init);
#endif /* CONFIG_DEBUG_FS */
+#ifdef CONFIG_PPC_BOOK3S_64
ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
{
bool thread_priv;
@@ -124,6 +125,7 @@ ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, cha
return sprintf(buf, "Vulnerable\n");
}
+#endif
ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
{
--
2.5.5
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox