* [PATCH v13 13/24] selftests/vm: pkey register should match shadow pkey
From: Ram Pai @ 2018-06-14 0:45 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1528937115-10132-1-git-send-email-linuxram@us.ibm.com>
expected_pkey_fault() is comparing the contents of pkey
register with 0. This may not be true all the time. There
could be bits set by default by the architecture
which can never be changed. Hence compare the value against
shadow pkey register, which is supposed to track the bits
accurately all throughout
cc: Dave Hansen <dave.hansen@intel.com>
cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
---
tools/testing/selftests/vm/protection_keys.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index 9afe894..adcae4a 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -916,10 +916,10 @@ void expected_pkey_fault(int pkey)
pkey_assert(last_si_pkey == pkey);
/*
- * The signal handler shold have cleared out PKEY register to let the
+ * The signal handler should have cleared out pkey-register to let the
* test program continue. We now have to restore it.
*/
- if (__read_pkey_reg() != 0)
+ if (__read_pkey_reg() != shadow_pkey_reg)
pkey_assert(0);
__write_pkey_reg(shadow_pkey_reg);
--
1.7.1
^ permalink raw reply related
* [PATCH v13 12/24] selftests/vm: introduce two arch independent abstraction
From: Ram Pai @ 2018-06-14 0:45 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1528937115-10132-1-git-send-email-linuxram@us.ibm.com>
open_hugepage_file() <- opens the huge page file
get_start_key() <-- provides the first non-reserved key.
cc: Dave Hansen <dave.hansen@intel.com>
cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
---
tools/testing/selftests/vm/pkey-helpers.h | 10 ++++++++++
tools/testing/selftests/vm/pkey-x86.h | 1 +
tools/testing/selftests/vm/protection_keys.c | 6 +++---
3 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h
index ada0146..52a1152 100644
--- a/tools/testing/selftests/vm/pkey-helpers.h
+++ b/tools/testing/selftests/vm/pkey-helpers.h
@@ -179,4 +179,14 @@ static inline void __pkey_write_allow(int pkey, int do_allow_write)
#define __stringify_1(x...) #x
#define __stringify(x...) __stringify_1(x)
+static inline int open_hugepage_file(int flag)
+{
+ return open(HUGEPAGE_FILE, flag);
+}
+
+static inline int get_start_key(void)
+{
+ return 1;
+}
+
#endif /* _PKEYS_HELPER_H */
diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h
index 2b3780d..d5fa299 100644
--- a/tools/testing/selftests/vm/pkey-x86.h
+++ b/tools/testing/selftests/vm/pkey-x86.h
@@ -48,6 +48,7 @@
#define MB (1<<20)
#define pkey_reg_t u32
#define PKEY_REG_FMT "%016x"
+#define HUGEPAGE_FILE "/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages"
static inline u32 pkey_bit_position(int pkey)
{
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index c5f9776..9afe894 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -791,7 +791,7 @@ void setup_hugetlbfs(void)
* Now go make sure that we got the pages and that they
* are 2M pages. Someone might have made 1G the default.
*/
- fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY);
+ fd = open_hugepage_file(O_RDONLY);
if (fd < 0) {
perror("opening sysfs 2M hugetlb config");
return;
@@ -1078,10 +1078,10 @@ void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey)
void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
{
int err;
- int i;
+ int i = get_start_key();
/* Note: 0 is the default pkey, so don't mess with it */
- for (i = 1; i < NR_PKEYS; i++) {
+ for (; i < NR_PKEYS; i++) {
if (pkey == i)
continue;
--
1.7.1
^ permalink raw reply related
* [PATCH v13 11/24] selftests/vm: fix alloc_random_pkey() to make it really random
From: Ram Pai @ 2018-06-14 0:45 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1528937115-10132-1-git-send-email-linuxram@us.ibm.com>
alloc_random_pkey() was allocating the same pkey every time.
Not all pkeys were geting tested. fixed it.
cc: Dave Hansen <dave.hansen@intel.com>
cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
---
tools/testing/selftests/vm/protection_keys.c | 6 +++++-
1 files changed, 5 insertions(+), 1 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index 42a91c7..c5f9776 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -24,6 +24,7 @@
#define _GNU_SOURCE
#include <errno.h>
#include <linux/futex.h>
+#include <time.h>
#include <sys/time.h>
#include <sys/syscall.h>
#include <string.h>
@@ -576,13 +577,15 @@ int alloc_random_pkey(void)
int alloced_pkeys[NR_PKEYS];
int nr_alloced = 0;
int random_index;
+
memset(alloced_pkeys, 0, sizeof(alloced_pkeys));
+ srand((unsigned int)time(NULL));
/* allocate every possible key and make a note of which ones we got */
max_nr_pkey_allocs = NR_PKEYS;
- max_nr_pkey_allocs = 1;
for (i = 0; i < max_nr_pkey_allocs; i++) {
int new_pkey = alloc_pkey();
+
if (new_pkey < 0)
break;
alloced_pkeys[nr_alloced++] = new_pkey;
@@ -598,6 +601,7 @@ int alloc_random_pkey(void)
/* go through the allocated ones that we did not want and free them */
for (i = 0; i < nr_alloced; i++) {
int free_ret;
+
if (!alloced_pkeys[i])
continue;
free_ret = sys_pkey_free(alloced_pkeys[i]);
--
1.7.1
^ permalink raw reply related
* [PATCH v13 10/24] selftests/vm: clear the bits in shadow reg when a pkey is freed.
From: Ram Pai @ 2018-06-14 0:45 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1528937115-10132-1-git-send-email-linuxram@us.ibm.com>
When a key is freed, the key is no more effective.
Clear the bits corresponding to the pkey in the shadow
register. Otherwise it will carry some spurious bits
which can trigger false-positive asserts.
cc: Dave Hansen <dave.hansen@intel.com>
cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
---
tools/testing/selftests/vm/protection_keys.c | 3 +++
1 files changed, 3 insertions(+), 0 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index da4f5d5..42a91c7 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -556,6 +556,9 @@ int alloc_pkey(void)
int sys_pkey_free(unsigned long pkey)
{
int ret = syscall(SYS_pkey_free, pkey);
+
+ if (!ret)
+ shadow_pkey_reg &= clear_pkey_flags(pkey, PKEY_DISABLE_ACCESS);
dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret);
return ret;
}
--
1.7.1
^ permalink raw reply related
* [PATCH v13 09/24] selftests/vm: fixed bugs in pkey_disable_clear()
From: Ram Pai @ 2018-06-14 0:45 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1528937115-10132-1-git-send-email-linuxram@us.ibm.com>
instead of clearing the bits, pkey_disable_clear() was setting
the bits. Fixed it.
Also fixed a wrong assertion in that function. When bits are
cleared, the resulting bit value will be less than the original.
This hasn't been a problem so far because this code isn't currently
used.
cc: Dave Hansen <dave.hansen@intel.com>
cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
---
tools/testing/selftests/vm/protection_keys.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index 5fcccdb..da4f5d5 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -433,7 +433,7 @@ void pkey_disable_clear(int pkey, int flags)
pkey, pkey, pkey_rights);
pkey_assert(pkey_rights >= 0);
- pkey_rights |= flags;
+ pkey_rights &= ~flags;
ret = hw_pkey_set(pkey, pkey_rights, 0);
shadow_pkey_reg &= clear_pkey_flags(pkey, flags);
@@ -446,7 +446,7 @@ void pkey_disable_clear(int pkey, int flags)
dprintf1("%s(%d) pkey_reg: 0x"PKEY_REG_FMT"\n", __func__,
pkey, read_pkey_reg());
if (flags)
- assert(read_pkey_reg() > orig_pkey_reg);
+ assert(read_pkey_reg() < orig_pkey_reg);
}
void pkey_write_allow(int pkey)
--
1.7.1
^ permalink raw reply related
* [PATCH v13 08/24] selftests/vm: fix the wrong assert in pkey_disable_set()
From: Ram Pai @ 2018-06-14 0:44 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1528937115-10132-1-git-send-email-linuxram@us.ibm.com>
If the flag is 0, no bits will be set. Hence we cant expect
the resulting bitmap to have a higher value than what it
was earlier.
cc: Dave Hansen <dave.hansen@intel.com>
cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
---
tools/testing/selftests/vm/protection_keys.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index 57340b3..5fcccdb 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -415,7 +415,7 @@ void pkey_disable_set(int pkey, int flags)
dprintf1("%s(%d) pkey_reg: 0x"PKEY_REG_FMT"\n",
__func__, pkey, read_pkey_reg());
if (flags)
- pkey_assert(read_pkey_reg() > orig_pkey_reg);
+ pkey_assert(read_pkey_reg() >= orig_pkey_reg);
dprintf1("END<---%s(%d, 0x%x)\n", __func__,
pkey, flags);
}
--
1.7.1
^ permalink raw reply related
* [PATCH v13 07/24] selftests/vm: generic function to handle shadow key register
From: Ram Pai @ 2018-06-14 0:44 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1528937115-10132-1-git-send-email-linuxram@us.ibm.com>
helper functions to handler shadow pkey register
cc: Dave Hansen <dave.hansen@intel.com>
cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
---
tools/testing/selftests/vm/pkey-helpers.h | 29 ++++++++++++++++++++++
tools/testing/selftests/vm/pkey-x86.h | 5 ++++
tools/testing/selftests/vm/protection_keys.c | 34 ++++++++++++++++---------
3 files changed, 56 insertions(+), 12 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h
index 2a1a024..ada0146 100644
--- a/tools/testing/selftests/vm/pkey-helpers.h
+++ b/tools/testing/selftests/vm/pkey-helpers.h
@@ -80,6 +80,35 @@ static inline void sigsafe_printf(const char *format, ...)
#error Architecture not supported
#endif /* arch */
+static inline pkey_reg_t clear_pkey_flags(int pkey, pkey_reg_t flags)
+{
+ u32 shift = pkey_bit_position(pkey);
+
+ return ~(flags << shift);
+}
+
+/*
+ * Takes pkey flags and puts them at the right bit position for the given key so
+ * that the result can be ORed into the register.
+ */
+static inline pkey_reg_t left_shift_bits(int pkey, pkey_reg_t bits)
+{
+ u32 shift = pkey_bit_position(pkey);
+
+ return (bits << shift);
+}
+
+/*
+ * Takes pkey register values and puts the flags for the given pkey at the least
+ * significant bits of the returned value.
+ */
+static inline pkey_reg_t right_shift_bits(int pkey, pkey_reg_t bits)
+{
+ u32 shift = pkey_bit_position(pkey);
+
+ return (bits >> shift);
+}
+
extern pkey_reg_t shadow_pkey_reg;
static inline pkey_reg_t _read_pkey_reg(int line)
diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h
index 5f40901..2b3780d 100644
--- a/tools/testing/selftests/vm/pkey-x86.h
+++ b/tools/testing/selftests/vm/pkey-x86.h
@@ -49,6 +49,11 @@
#define pkey_reg_t u32
#define PKEY_REG_FMT "%016x"
+static inline u32 pkey_bit_position(int pkey)
+{
+ return pkey * PKEY_BITS_PER_PKEY;
+}
+
static inline void __page_o_noops(void)
{
/* 8-bytes of instruction * 512 bytes = 1 page */
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index 90b3a41..57340b3 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -342,7 +342,7 @@ static u32 hw_pkey_get(int pkey, unsigned long flags)
__func__, pkey, flags, 0, 0);
dprintf2("%s() raw pkey_reg: "PKEY_REG_FMT"\n", __func__, pkey_reg);
- shifted_pkey_reg = (pkey_reg >> (pkey * PKEY_BITS_PER_PKEY));
+ shifted_pkey_reg = right_shift_bits(pkey, pkey_reg);
dprintf2("%s() shifted_pkey_reg: "PKEY_REG_FMT"\n", __func__,
shifted_pkey_reg);
masked_pkey_reg = shifted_pkey_reg & mask;
@@ -366,9 +366,9 @@ static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
/* copy old pkey_reg */
new_pkey_reg = old_pkey_reg;
/* mask out bits from pkey in old value: */
- new_pkey_reg &= ~(mask << (pkey * PKEY_BITS_PER_PKEY));
+ new_pkey_reg &= clear_pkey_flags(pkey, mask);
/* OR in new bits for pkey: */
- new_pkey_reg |= (rights << (pkey * PKEY_BITS_PER_PKEY));
+ new_pkey_reg |= left_shift_bits(pkey, rights);
__write_pkey_reg(new_pkey_reg);
@@ -402,7 +402,7 @@ void pkey_disable_set(int pkey, int flags)
ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
assert(!ret);
/*pkey_reg and flags have the same format */
- shadow_pkey_reg |= flags << (pkey * 2);
+ shadow_pkey_reg |= left_shift_bits(pkey, flags);
dprintf1("%s(%d) shadow: 0x"PKEY_REG_FMT"\n",
__func__, pkey, shadow_pkey_reg);
@@ -436,7 +436,7 @@ void pkey_disable_clear(int pkey, int flags)
pkey_rights |= flags;
ret = hw_pkey_set(pkey, pkey_rights, 0);
- shadow_pkey_reg &= ~(flags << (pkey * 2));
+ shadow_pkey_reg &= clear_pkey_flags(pkey, flags);
pkey_assert(ret >= 0);
pkey_rights = hw_pkey_get(pkey, syscall_flags);
@@ -494,6 +494,21 @@ int sys_pkey_alloc(unsigned long flags, unsigned long init_val)
return ret;
}
+void pkey_setup_shadow(void)
+{
+ shadow_pkey_reg = __read_pkey_reg();
+}
+
+void pkey_reset_shadow(u32 key)
+{
+ shadow_pkey_reg &= clear_pkey_flags(key, 0x3);
+}
+
+void pkey_set_shadow(u32 key, u64 init_val)
+{
+ shadow_pkey_reg |= left_shift_bits(key, init_val);
+}
+
int alloc_pkey(void)
{
int ret;
@@ -512,7 +527,7 @@ int alloc_pkey(void)
shadow_pkey_reg);
if (ret) {
/* clear both the bits: */
- shadow_pkey_reg &= ~(0x3 << (ret * 2));
+ pkey_reset_shadow(ret);
dprintf4("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT
" shadow: 0x"PKEY_REG_FMT"\n",
__func__,
@@ -522,7 +537,7 @@ int alloc_pkey(void)
* move the new state in from init_val
* (remember, we cheated and init_val == pkey_reg format)
*/
- shadow_pkey_reg |= (init_val << (ret * 2));
+ pkey_set_shadow(ret, init_val);
}
dprintf4("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT
" shadow: 0x"PKEY_REG_FMT"\n",
@@ -1403,11 +1418,6 @@ void run_tests_once(void)
iteration_nr++;
}
-void pkey_setup_shadow(void)
-{
- shadow_pkey_reg = __read_pkey_reg();
-}
-
int main(void)
{
int nr_iterations = 22;
--
1.7.1
^ permalink raw reply related
* [PATCH v13 06/24] selftests/vm: typecast the pkey register
From: Ram Pai @ 2018-06-14 0:44 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1528937115-10132-1-git-send-email-linuxram@us.ibm.com>
This is in preparation to accomadate a differing size register
across architectures.
cc: Dave Hansen <dave.hansen@intel.com>
cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
---
tools/testing/selftests/vm/pkey-helpers.h | 23 ++++---
tools/testing/selftests/vm/pkey-x86.h | 16 +++--
tools/testing/selftests/vm/protection_keys.c | 87 +++++++++++++++----------
3 files changed, 73 insertions(+), 53 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h
index 7f18a82..2a1a024 100644
--- a/tools/testing/selftests/vm/pkey-helpers.h
+++ b/tools/testing/selftests/vm/pkey-helpers.h
@@ -80,13 +80,14 @@ static inline void sigsafe_printf(const char *format, ...)
#error Architecture not supported
#endif /* arch */
-extern unsigned int shadow_pkey_reg;
+extern pkey_reg_t shadow_pkey_reg;
-static inline unsigned int _read_pkey_reg(int line)
+static inline pkey_reg_t _read_pkey_reg(int line)
{
- unsigned int pkey_reg = __read_pkey_reg();
+ pkey_reg_t pkey_reg = __read_pkey_reg();
- dprintf4("read_pkey_reg(line=%d) pkey_reg: %x shadow: %x\n",
+ dprintf4("read_pkey_reg(line=%d) pkey_reg: "PKEY_REG_FMT
+ " shadow: "PKEY_REG_FMT"\n",
line, pkey_reg, shadow_pkey_reg);
assert(pkey_reg == shadow_pkey_reg);
@@ -95,15 +96,15 @@ static inline unsigned int _read_pkey_reg(int line)
#define read_pkey_reg() _read_pkey_reg(__LINE__)
-static inline void write_pkey_reg(unsigned int pkey_reg)
+static inline void write_pkey_reg(pkey_reg_t pkey_reg)
{
- dprintf4("%s() changing %08x to %08x\n", __func__,
+ dprintf4("%s() changing "PKEY_REG_FMT" to "PKEY_REG_FMT"\n", __func__,
__read_pkey_reg(), pkey_reg);
/* will do the shadow check for us: */
read_pkey_reg();
__write_pkey_reg(pkey_reg);
shadow_pkey_reg = pkey_reg;
- dprintf4("%s(%08x) pkey_reg: %08x\n", __func__,
+ dprintf4("%s("PKEY_REG_FMT") pkey_reg: "PKEY_REG_FMT"\n", __func__,
pkey_reg, __read_pkey_reg());
}
@@ -113,7 +114,7 @@ static inline void write_pkey_reg(unsigned int pkey_reg)
*/
static inline void __pkey_access_allow(int pkey, int do_allow)
{
- unsigned int pkey_reg = read_pkey_reg();
+ pkey_reg_t pkey_reg = read_pkey_reg();
int bit = pkey * 2;
if (do_allow)
@@ -121,13 +122,13 @@ static inline void __pkey_access_allow(int pkey, int do_allow)
else
pkey_reg |= (1<<bit);
- dprintf4("pkey_reg now: %08x\n", read_pkey_reg());
+ dprintf4("pkey_reg now: "PKEY_REG_FMT"\n", read_pkey_reg());
write_pkey_reg(pkey_reg);
}
static inline void __pkey_write_allow(int pkey, int do_allow_write)
{
- long pkey_reg = read_pkey_reg();
+ pkey_reg_t pkey_reg = read_pkey_reg();
int bit = pkey * 2 + 1;
if (do_allow_write)
@@ -136,7 +137,7 @@ static inline void __pkey_write_allow(int pkey, int do_allow_write)
pkey_reg |= (1<<bit);
write_pkey_reg(pkey_reg);
- dprintf4("pkey_reg now: %08x\n", read_pkey_reg());
+ dprintf4("pkey_reg now: "PKEY_REG_FMT"\n", read_pkey_reg());
}
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h
index 2f04ade..5f40901 100644
--- a/tools/testing/selftests/vm/pkey-x86.h
+++ b/tools/testing/selftests/vm/pkey-x86.h
@@ -46,6 +46,8 @@
#define HPAGE_SIZE (1UL<<21)
#define PAGE_SIZE 4096
#define MB (1<<20)
+#define pkey_reg_t u32
+#define PKEY_REG_FMT "%016x"
static inline void __page_o_noops(void)
{
@@ -53,11 +55,11 @@ static inline void __page_o_noops(void)
asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr");
}
-static inline unsigned int __read_pkey_reg(void)
+static inline pkey_reg_t __read_pkey_reg(void)
{
unsigned int eax, edx;
unsigned int ecx = 0;
- unsigned int pkey_reg;
+ pkey_reg_t pkey_reg;
asm volatile(".byte 0x0f,0x01,0xee\n\t"
: "=a" (eax), "=d" (edx)
@@ -66,13 +68,13 @@ static inline unsigned int __read_pkey_reg(void)
return pkey_reg;
}
-static inline void __write_pkey_reg(unsigned int pkey_reg)
+static inline void __write_pkey_reg(pkey_reg_t pkey_reg)
{
- unsigned int eax = pkey_reg;
- unsigned int ecx = 0;
- unsigned int edx = 0;
+ pkey_reg_t eax = pkey_reg;
+ pkey_reg_t ecx = 0;
+ pkey_reg_t edx = 0;
- dprintf4("%s() changing %08x to %08x\n", __func__,
+ dprintf4("%s() changing "PKEY_REG_FMT" to "PKEY_REG_FMT"\n", __func__,
__read_pkey_reg(), pkey_reg);
asm volatile(".byte 0x0f,0x01,0xef\n\t"
: : "a" (eax), "c" (ecx), "d" (edx));
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index 99e4e1e..90b3a41 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -48,7 +48,7 @@
int iteration_nr = 1;
int test_nr;
-unsigned int shadow_pkey_reg;
+pkey_reg_t shadow_pkey_reg;
int dprint_in_signal;
char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
@@ -163,7 +163,7 @@ void dump_mem(void *dumpme, int len_bytes)
for (i = 0; i < len_bytes; i += sizeof(u64)) {
u64 *ptr = (u64 *)(c + i);
- dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr);
+ dprintf1("dump[%03d][@%p]: %016lx\n", i, ptr, *ptr);
}
}
@@ -197,7 +197,7 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
int trapno;
unsigned long ip;
char *fpregs;
- u32 *pkey_reg_ptr;
+ pkey_reg_t *pkey_reg_ptr;
u64 siginfo_pkey;
u32 *si_pkey_ptr;
int pkey_reg_offset;
@@ -205,7 +205,8 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
dprint_in_signal = 1;
dprintf1(">>>>===============SIGSEGV============================\n");
- dprintf1("%s()::%d, pkey_reg: 0x%x shadow: %x\n", __func__, __LINE__,
+ dprintf1("%s()::%d, pkey_reg: "PKEY_REG_FMT" shadow: "PKEY_REG_FMT"\n",
+ __func__, __LINE__,
__read_pkey_reg(), shadow_pkey_reg);
trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
@@ -213,8 +214,9 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
fpregset = uctxt->uc_mcontext.fpregs;
fpregs = (void *)fpregset;
- dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__,
- trapno, ip, si_code_str(si->si_code), si->si_code);
+ dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n",
+ __func__, trapno, ip, si_code_str(si->si_code),
+ si->si_code);
#ifdef __i386__
/*
* 32-bit has some extra padding so that userspace can tell whether
@@ -251,12 +253,13 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
pkey_assert(siginfo_pkey < NR_PKEYS);
last_si_pkey = siginfo_pkey;
- dprintf1("signal pkey_reg from xsave: %08x\n", *pkey_reg_ptr);
+ dprintf1("signal pkey_reg from xsave: "PKEY_REG_FMT"\n", *pkey_reg_ptr);
/*
* need __read_pkey_reg() version so we do not do shadow_pkey_reg
* checking
*/
- dprintf1("signal pkey_reg from pkey_reg: %08x\n", __read_pkey_reg());
+ dprintf1("signal pkey_reg from pkey_reg: "PKEY_REG_FMT"\n",
+ __read_pkey_reg());
dprintf1("pkey from siginfo: %jx\n", siginfo_pkey);
*(u64 *)pkey_reg_ptr = 0x00000000;
dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
@@ -331,16 +334,17 @@ pid_t fork_lazy_child(void)
static u32 hw_pkey_get(int pkey, unsigned long flags)
{
u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
- u32 pkey_reg = __read_pkey_reg();
- u32 shifted_pkey_reg;
+ pkey_reg_t pkey_reg = __read_pkey_reg();
+ pkey_reg_t shifted_pkey_reg;
u32 masked_pkey_reg;
dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n",
__func__, pkey, flags, 0, 0);
- dprintf2("%s() raw pkey_reg: %x\n", __func__, pkey_reg);
+ dprintf2("%s() raw pkey_reg: "PKEY_REG_FMT"\n", __func__, pkey_reg);
shifted_pkey_reg = (pkey_reg >> (pkey * PKEY_BITS_PER_PKEY));
- dprintf2("%s() shifted_pkey_reg: %x\n", __func__, shifted_pkey_reg);
+ dprintf2("%s() shifted_pkey_reg: "PKEY_REG_FMT"\n", __func__,
+ shifted_pkey_reg);
masked_pkey_reg = shifted_pkey_reg & mask;
dprintf2("%s() masked pkey_reg: %x\n", __func__, masked_pkey_reg);
/*
@@ -353,8 +357,8 @@ static u32 hw_pkey_get(int pkey, unsigned long flags)
static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
{
u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
- u32 old_pkey_reg = __read_pkey_reg();
- u32 new_pkey_reg;
+ pkey_reg_t old_pkey_reg = __read_pkey_reg();
+ pkey_reg_t new_pkey_reg;
/* make sure that 'rights' only contains the bits we expect: */
assert(!(rights & ~mask));
@@ -369,7 +373,7 @@ static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
__write_pkey_reg(new_pkey_reg);
dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x"
- " pkey_reg now: %x old_pkey_reg: %x\n",
+ " pkey_reg now: "PKEY_REG_FMT" old_pkey_reg: "PKEY_REG_FMT"\n",
__func__, pkey, rights, flags, 0, __read_pkey_reg(),
old_pkey_reg);
return 0;
@@ -380,7 +384,7 @@ void pkey_disable_set(int pkey, int flags)
unsigned long syscall_flags = 0;
int ret;
int pkey_rights;
- u32 orig_pkey_reg = read_pkey_reg();
+ pkey_reg_t orig_pkey_reg = read_pkey_reg();
dprintf1("START->%s(%d, 0x%x)\n", __func__,
pkey, flags);
@@ -390,6 +394,7 @@ void pkey_disable_set(int pkey, int flags)
dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
pkey, pkey, pkey_rights);
+
pkey_assert(pkey_rights >= 0);
pkey_rights |= flags;
@@ -398,7 +403,8 @@ void pkey_disable_set(int pkey, int flags)
assert(!ret);
/*pkey_reg and flags have the same format */
shadow_pkey_reg |= flags << (pkey * 2);
- dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkey_reg);
+ dprintf1("%s(%d) shadow: 0x"PKEY_REG_FMT"\n",
+ __func__, pkey, shadow_pkey_reg);
pkey_assert(ret >= 0);
@@ -406,7 +412,8 @@ void pkey_disable_set(int pkey, int flags)
dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
pkey, pkey, pkey_rights);
- dprintf1("%s(%d) pkey_reg: 0x%x\n", __func__, pkey, read_pkey_reg());
+ dprintf1("%s(%d) pkey_reg: 0x"PKEY_REG_FMT"\n",
+ __func__, pkey, read_pkey_reg());
if (flags)
pkey_assert(read_pkey_reg() > orig_pkey_reg);
dprintf1("END<---%s(%d, 0x%x)\n", __func__,
@@ -418,7 +425,7 @@ void pkey_disable_clear(int pkey, int flags)
unsigned long syscall_flags = 0;
int ret;
int pkey_rights = hw_pkey_get(pkey, syscall_flags);
- u32 orig_pkey_reg = read_pkey_reg();
+ pkey_reg_t orig_pkey_reg = read_pkey_reg();
pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
@@ -436,7 +443,8 @@ void pkey_disable_clear(int pkey, int flags)
dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
pkey, pkey, pkey_rights);
- dprintf1("%s(%d) pkey_reg: 0x%x\n", __func__, pkey, read_pkey_reg());
+ dprintf1("%s(%d) pkey_reg: 0x"PKEY_REG_FMT"\n", __func__,
+ pkey, read_pkey_reg());
if (flags)
assert(read_pkey_reg() > orig_pkey_reg);
}
@@ -491,20 +499,22 @@ int alloc_pkey(void)
int ret;
unsigned long init_val = 0x0;
- dprintf1("%s()::%d, pkey_reg: 0x%x shadow: %x\n", __func__,
- __LINE__, __read_pkey_reg(), shadow_pkey_reg);
+ dprintf1("%s()::%d, pkey_reg: "PKEY_REG_FMT" shadow: "PKEY_REG_FMT"\n",
+ __func__, __LINE__, __read_pkey_reg(), shadow_pkey_reg);
ret = sys_pkey_alloc(0, init_val);
/*
* pkey_alloc() sets PKEY register, so we need to reflect it in
* shadow_pkey_reg:
*/
- dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n",
+ dprintf4("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT
+ " shadow: 0x"PKEY_REG_FMT"\n",
__func__, __LINE__, ret, __read_pkey_reg(),
shadow_pkey_reg);
if (ret) {
/* clear both the bits: */
shadow_pkey_reg &= ~(0x3 << (ret * 2));
- dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n",
+ dprintf4("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT
+ " shadow: 0x"PKEY_REG_FMT"\n",
__func__,
__LINE__, ret, __read_pkey_reg(),
shadow_pkey_reg);
@@ -514,13 +524,15 @@ int alloc_pkey(void)
*/
shadow_pkey_reg |= (init_val << (ret * 2));
}
- dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n",
+ dprintf4("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT
+ " shadow: 0x"PKEY_REG_FMT"\n",
__func__, __LINE__, ret, __read_pkey_reg(),
shadow_pkey_reg);
dprintf1("%s()::%d errno: %d\n", __func__, __LINE__, errno);
/* for shadow checking: */
read_pkey_reg();
- dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n",
+ dprintf4("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT
+ " shadow: 0x"PKEY_REG_FMT"\n",
__func__, __LINE__, ret, __read_pkey_reg(),
shadow_pkey_reg);
return ret;
@@ -573,7 +585,8 @@ int alloc_random_pkey(void)
free_ret = sys_pkey_free(alloced_pkeys[i]);
pkey_assert(!free_ret);
}
- dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", __func__,
+ dprintf1("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT
+ " shadow: 0x"PKEY_REG_FMT"\n", __func__,
__LINE__, ret, __read_pkey_reg(), shadow_pkey_reg);
return ret;
}
@@ -592,11 +605,13 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
if (nr_iterations-- < 0)
break;
- dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n",
+ dprintf1("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT
+ " shadow: 0x"PKEY_REG_FMT"\n",
__func__, __LINE__, ret, __read_pkey_reg(),
shadow_pkey_reg);
sys_pkey_free(rpkey);
- dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n",
+ dprintf1("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT
+ " shadow: 0x"PKEY_REG_FMT"\n",
__func__, __LINE__, ret, __read_pkey_reg(),
shadow_pkey_reg);
}
@@ -606,7 +621,8 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
ptr, size, orig_prot, pkey, ret);
pkey_assert(!ret);
- dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", __func__,
+ dprintf1("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT
+ " shadow: 0x"PKEY_REG_FMT"\n", __func__,
__LINE__, ret, __read_pkey_reg(), shadow_pkey_reg);
return ret;
}
@@ -885,7 +901,7 @@ void expected_pkey_fault(int pkey)
pkey_assert(0);
__write_pkey_reg(shadow_pkey_reg);
- dprintf1("%s() set pkey_reg=%x to restore state after signal "
+ dprintf1("%s() set pkey_reg="PKEY_REG_FMT" to restore state after signal "
"nuked it\n", __func__, shadow_pkey_reg);
last_pkey_faults = pkey_faults;
last_si_pkey = -1;
@@ -1082,7 +1098,8 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
int new_pkey;
dprintf1("%s() alloc loop: %d\n", __func__, i);
new_pkey = alloc_pkey();
- dprintf4("%s()::%d, err: %d pkey_reg: 0x%x shadow: 0x%x\n",
+ dprintf4("%s()::%d, err: %d pkey_reg: 0x"PKEY_REG_FMT
+ " shadow: 0x"PKEY_REG_FMT"\n",
__func__, __LINE__, err, __read_pkey_reg(),
shadow_pkey_reg);
read_pkey_reg(); /* for shadow checking */
@@ -1264,7 +1281,7 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
pkey_assert(!ret);
pkey_access_deny(pkey);
- dprintf2("pkey_reg: %x\n", read_pkey_reg());
+ dprintf2("pkey_reg: "PKEY_REG_FMT"\n", read_pkey_reg());
/*
* Make sure this is an *instruction* fault
@@ -1295,7 +1312,7 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
pkey_assert(!ret);
- dprintf2("pkru: %x\n", read_pkey_reg());
+ dprintf2("pkru: "PKEY_REG_FMT"\n", read_pkey_reg());
/* Make sure this is an *instruction* fault */
madvise(p1, PAGE_SIZE, MADV_DONTNEED);
@@ -1412,7 +1429,7 @@ int main(void)
}
pkey_setup_shadow();
- printf("startup pkey_reg: %x\n", read_pkey_reg());
+ printf("startup pkey_reg: 0x"PKEY_REG_FMT"\n", read_pkey_reg());
setup_hugetlbfs();
while (nr_iterations-- > 0)
--
1.7.1
^ permalink raw reply related
* [PATCH v13 05/24] selftests/vm: Make gcc check arguments of sigsafe_printf()
From: Ram Pai @ 2018-06-14 0:44 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1528937115-10132-1-git-send-email-linuxram@us.ibm.com>
From: Thiago Jung Bauermann <bauerman@linux.ibm.com>
This will help us ensure we print pkey_reg_t values correctly in different
architectures.
Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
---
tools/testing/selftests/vm/pkey-helpers.h | 4 ++++
1 files changed, 4 insertions(+), 0 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h
index 3ed2f02..7f18a82 100644
--- a/tools/testing/selftests/vm/pkey-helpers.h
+++ b/tools/testing/selftests/vm/pkey-helpers.h
@@ -27,6 +27,10 @@
#define DPRINT_IN_SIGNAL_BUF_SIZE 4096
extern int dprint_in_signal;
extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
+
+#ifdef __GNUC__
+__attribute__((format(printf, 1, 2)))
+#endif
static inline void sigsafe_printf(const char *format, ...)
{
va_list ap;
--
1.7.1
^ permalink raw reply related
* [PATCH v13 04/24] selftests/vm: move arch-specific definitions to arch-specific header
From: Ram Pai @ 2018-06-14 0:44 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1528937115-10132-1-git-send-email-linuxram@us.ibm.com>
From: Thiago Jung Bauermann <bauerman@linux.ibm.com>
In preparation for multi-arch support, move definitions which have
arch-specific values to x86-specific header.
cc: Dave Hansen <dave.hansen@intel.com>
cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
---
tools/testing/selftests/vm/pkey-helpers.h | 111 +-----------------
tools/testing/selftests/vm/pkey-x86.h | 156 ++++++++++++++++++++++++++
tools/testing/selftests/vm/protection_keys.c | 47 --------
3 files changed, 162 insertions(+), 152 deletions(-)
create mode 100644 tools/testing/selftests/vm/pkey-x86.h
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h
index 6ad1bd5..3ed2f02 100644
--- a/tools/testing/selftests/vm/pkey-helpers.h
+++ b/tools/testing/selftests/vm/pkey-helpers.h
@@ -21,9 +21,6 @@
#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
-#define NR_PKEYS 16
-#define PKEY_BITS_PER_PKEY 2
-
#ifndef DEBUG_LEVEL
#define DEBUG_LEVEL 0
#endif
@@ -73,19 +70,13 @@ static inline void sigsafe_printf(const char *format, ...)
} \
} while (0)
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+#include "pkey-x86.h"
+#else /* arch */
+#error Architecture not supported
+#endif /* arch */
+
extern unsigned int shadow_pkey_reg;
-static inline unsigned int __read_pkey_reg(void)
-{
- unsigned int eax, edx;
- unsigned int ecx = 0;
- unsigned int pkey_reg;
-
- asm volatile(".byte 0x0f,0x01,0xee\n\t"
- : "=a" (eax), "=d" (edx)
- : "c" (ecx));
- pkey_reg = eax;
- return pkey_reg;
-}
static inline unsigned int _read_pkey_reg(int line)
{
@@ -100,19 +91,6 @@ static inline unsigned int _read_pkey_reg(int line)
#define read_pkey_reg() _read_pkey_reg(__LINE__)
-static inline void __write_pkey_reg(unsigned int pkey_reg)
-{
- unsigned int eax = pkey_reg;
- unsigned int ecx = 0;
- unsigned int edx = 0;
-
- dprintf4("%s() changing %08x to %08x\n", __func__,
- __read_pkey_reg(), pkey_reg);
- asm volatile(".byte 0x0f,0x01,0xef\n\t"
- : : "a" (eax), "c" (ecx), "d" (edx));
- assert(pkey_reg == __read_pkey_reg());
-}
-
static inline void write_pkey_reg(unsigned int pkey_reg)
{
dprintf4("%s() changing %08x to %08x\n", __func__,
@@ -157,83 +135,6 @@ static inline void __pkey_write_allow(int pkey, int do_allow_write)
dprintf4("pkey_reg now: %08x\n", read_pkey_reg());
}
-#define PAGE_SIZE 4096
-#define MB (1<<20)
-
-static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- /* ecx is often an input as well as an output. */
- asm volatile(
- "cpuid;"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx));
-}
-
-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */
-#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */
-#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */
-
-static inline int cpu_has_pku(void)
-{
- unsigned int eax;
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
-
- eax = 0x7;
- ecx = 0x0;
- __cpuid(&eax, &ebx, &ecx, &edx);
-
- if (!(ecx & X86_FEATURE_PKU)) {
- dprintf2("cpu does not have PKU\n");
- return 0;
- }
- if (!(ecx & X86_FEATURE_OSPKE)) {
- dprintf2("cpu does not have OSPKE\n");
- return 0;
- }
- return 1;
-}
-
-#define XSTATE_PKEY_BIT (9)
-#define XSTATE_PKEY 0x200
-
-int pkey_reg_xstate_offset(void)
-{
- unsigned int eax;
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
- int xstate_offset;
- int xstate_size;
- unsigned long XSTATE_CPUID = 0xd;
- int leaf;
-
- /* assume that XSTATE_PKEY is set in XCR0 */
- leaf = XSTATE_PKEY_BIT;
- {
- eax = XSTATE_CPUID;
- ecx = leaf;
- __cpuid(&eax, &ebx, &ecx, &edx);
-
- if (leaf == XSTATE_PKEY_BIT) {
- xstate_offset = ebx;
- xstate_size = eax;
- }
- }
-
- if (xstate_size == 0) {
- printf("could not find size/offset of PKEY in xsave state\n");
- return 0;
- }
-
- return xstate_offset;
-}
-
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h
new file mode 100644
index 0000000..2f04ade
--- /dev/null
+++ b/tools/testing/selftests/vm/pkey-x86.h
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _PKEYS_X86_H
+#define _PKEYS_X86_H
+
+#ifdef __i386__
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key 380
+#endif
+
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc 381
+# define SYS_pkey_free 382
+#endif
+
+#define REG_IP_IDX REG_EIP
+#define si_pkey_offset 0x14
+
+#else
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key 329
+#endif
+
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc 330
+# define SYS_pkey_free 331
+#endif
+
+#define REG_IP_IDX REG_RIP
+#define si_pkey_offset 0x20
+
+#endif
+
+#ifndef PKEY_DISABLE_ACCESS
+# define PKEY_DISABLE_ACCESS 0x1
+#endif
+
+#ifndef PKEY_DISABLE_WRITE
+# define PKEY_DISABLE_WRITE 0x2
+#endif
+
+#define NR_PKEYS 16
+#define PKEY_BITS_PER_PKEY 2
+#define HPAGE_SIZE (1UL<<21)
+#define PAGE_SIZE 4096
+#define MB (1<<20)
+
+static inline void __page_o_noops(void)
+{
+ /* 8-bytes of instruction * 512 bytes = 1 page */
+ asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr");
+}
+
+static inline unsigned int __read_pkey_reg(void)
+{
+ unsigned int eax, edx;
+ unsigned int ecx = 0;
+ unsigned int pkey_reg;
+
+ asm volatile(".byte 0x0f,0x01,0xee\n\t"
+ : "=a" (eax), "=d" (edx)
+ : "c" (ecx));
+ pkey_reg = eax;
+ return pkey_reg;
+}
+
+static inline void __write_pkey_reg(unsigned int pkey_reg)
+{
+ unsigned int eax = pkey_reg;
+ unsigned int ecx = 0;
+ unsigned int edx = 0;
+
+ dprintf4("%s() changing %08x to %08x\n", __func__,
+ __read_pkey_reg(), pkey_reg);
+ asm volatile(".byte 0x0f,0x01,0xef\n\t"
+ : : "a" (eax), "c" (ecx), "d" (edx));
+ assert(pkey_reg == __read_pkey_reg());
+}
+
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ /* ecx is often an input as well as an output. */
+ asm volatile(
+ "cpuid;"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (*eax), "2" (*ecx));
+}
+
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */
+#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */
+#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */
+
+static inline int cpu_has_pku(void)
+{
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+
+ eax = 0x7;
+ ecx = 0x0;
+ __cpuid(&eax, &ebx, &ecx, &edx);
+
+ if (!(ecx & X86_FEATURE_PKU)) {
+ dprintf2("cpu does not have PKU\n");
+ return 0;
+ }
+ if (!(ecx & X86_FEATURE_OSPKE)) {
+ dprintf2("cpu does not have OSPKE\n");
+ return 0;
+ }
+ return 1;
+}
+
+#define XSTATE_PKEY_BIT (9)
+#define XSTATE_PKEY 0x200
+
+int pkey_reg_xstate_offset(void)
+{
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+ int xstate_offset;
+ int xstate_size;
+ unsigned long XSTATE_CPUID = 0xd;
+ int leaf;
+
+ /* assume that XSTATE_PKEY is set in XCR0 */
+ leaf = XSTATE_PKEY_BIT;
+ {
+ eax = XSTATE_CPUID;
+ ecx = leaf;
+ __cpuid(&eax, &ebx, &ecx, &edx);
+
+ if (leaf == XSTATE_PKEY_BIT) {
+ xstate_offset = ebx;
+ xstate_size = eax;
+ }
+ }
+
+ if (xstate_size == 0) {
+ printf("could not find size/offset of PKEY in xsave state\n");
+ return 0;
+ }
+
+ return xstate_offset;
+}
+
+#endif /* _PKEYS_X86_H */
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index cad52dc..99e4e1e 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -49,9 +49,6 @@
int test_nr;
unsigned int shadow_pkey_reg;
-
-#define HPAGE_SIZE (1UL<<21)
-
int dprint_in_signal;
char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
@@ -137,12 +134,6 @@ void abort_hooks(void)
#endif
}
-static inline void __page_o_noops(void)
-{
- /* 8-bytes of instruction * 512 bytes = 1 page */
- asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr");
-}
-
/*
* This attempts to have roughly a page of instructions followed by a few
* instructions that do a write, and another page of instructions. That
@@ -165,36 +156,6 @@ void lots_o_noops_around_write(int *write_to_me)
dprintf3("%s() done\n", __func__);
}
-#ifdef __i386__
-
-#ifndef SYS_mprotect_key
-# define SYS_mprotect_key 380
-#endif
-
-#ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 381
-# define SYS_pkey_free 382
-#endif
-
-#define REG_IP_IDX REG_EIP
-#define si_pkey_offset 0x14
-
-#else
-
-#ifndef SYS_mprotect_key
-# define SYS_mprotect_key 329
-#endif
-
-#ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 330
-# define SYS_pkey_free 331
-#endif
-
-#define REG_IP_IDX REG_RIP
-#define si_pkey_offset 0x20
-
-#endif
-
void dump_mem(void *dumpme, int len_bytes)
{
char *c = (void *)dumpme;
@@ -367,14 +328,6 @@ pid_t fork_lazy_child(void)
return forkret;
}
-#ifndef PKEY_DISABLE_ACCESS
-# define PKEY_DISABLE_ACCESS 0x1
-#endif
-
-#ifndef PKEY_DISABLE_WRITE
-# define PKEY_DISABLE_WRITE 0x2
-#endif
-
static u32 hw_pkey_get(int pkey, unsigned long flags)
{
u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
--
1.7.1
^ permalink raw reply related
* [PATCH v13 03/24] selftests/vm: move generic definitions to header file
From: Ram Pai @ 2018-06-14 0:44 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1528937115-10132-1-git-send-email-linuxram@us.ibm.com>
Moved all the generic definition and helper functions to the header file.
cc: Dave Hansen <dave.hansen@intel.com>
cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
---
tools/testing/selftests/vm/pkey-helpers.h | 35 ++++++++++++++++++++++---
tools/testing/selftests/vm/protection_keys.c | 27 --------------------
2 files changed, 30 insertions(+), 32 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h
index d5779be..6ad1bd5 100644
--- a/tools/testing/selftests/vm/pkey-helpers.h
+++ b/tools/testing/selftests/vm/pkey-helpers.h
@@ -13,6 +13,14 @@
#include <ucontext.h>
#include <sys/mman.h>
+/* Define some kernel-like types */
+#define u8 uint8_t
+#define u16 uint16_t
+#define u32 uint32_t
+#define u64 uint64_t
+
+#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
+
#define NR_PKEYS 16
#define PKEY_BITS_PER_PKEY 2
@@ -53,6 +61,18 @@ static inline void sigsafe_printf(const char *format, ...)
#define dprintf3(args...) dprintf_level(3, args)
#define dprintf4(args...) dprintf_level(4, args)
+extern void abort_hooks(void);
+#define pkey_assert(condition) do { \
+ if (!(condition)) { \
+ dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \
+ __FILE__, __LINE__, \
+ test_nr, iteration_nr); \
+ dprintf0("errno at assert: %d", errno); \
+ abort_hooks(); \
+ exit(__LINE__); \
+ } \
+} while (0)
+
extern unsigned int shadow_pkey_reg;
static inline unsigned int __read_pkey_reg(void)
{
@@ -137,11 +157,6 @@ static inline void __pkey_write_allow(int pkey, int do_allow_write)
dprintf4("pkey_reg now: %08x\n", read_pkey_reg());
}
-#define PROT_PKEY0 0x10 /* protection key value (bit 0) */
-#define PROT_PKEY1 0x20 /* protection key value (bit 1) */
-#define PROT_PKEY2 0x40 /* protection key value (bit 2) */
-#define PROT_PKEY3 0x80 /* protection key value (bit 3) */
-
#define PAGE_SIZE 4096
#define MB (1<<20)
@@ -219,4 +234,14 @@ int pkey_reg_xstate_offset(void)
return xstate_offset;
}
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
+#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
+#define ALIGN_PTR_UP(p, ptr_align_to) \
+ ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
+#define ALIGN_PTR_DOWN(p, ptr_align_to) \
+ ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to))
+#define __stringify_1(x...) #x
+#define __stringify(x...) __stringify_1(x)
+
#endif /* _PKEYS_HELPER_H */
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index 9f373cc..cad52dc 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -51,31 +51,10 @@
unsigned int shadow_pkey_reg;
#define HPAGE_SIZE (1UL<<21)
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
-#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
-#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
-#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
-#define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to))
-#define __stringify_1(x...) #x
-#define __stringify(x...) __stringify_1(x)
-
-#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
int dprint_in_signal;
char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
-extern void abort_hooks(void);
-#define pkey_assert(condition) do { \
- if (!(condition)) { \
- dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \
- __FILE__, __LINE__, \
- test_nr, iteration_nr); \
- dprintf0("errno at assert: %d", errno); \
- abort_hooks(); \
- exit(__LINE__); \
- } \
-} while (0)
-
void cat_into_file(char *str, char *file)
{
int fd = open(file, O_RDWR);
@@ -186,12 +165,6 @@ void lots_o_noops_around_write(int *write_to_me)
dprintf3("%s() done\n", __func__);
}
-/* Define some kernel-like types */
-#define u8 uint8_t
-#define u16 uint16_t
-#define u32 uint32_t
-#define u64 uint64_t
-
#ifdef __i386__
#ifndef SYS_mprotect_key
--
1.7.1
^ permalink raw reply related
* [PATCH v13 02/24] selftests/vm: rename all references to pkru to a generic name
From: Ram Pai @ 2018-06-14 0:44 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1528937115-10132-1-git-send-email-linuxram@us.ibm.com>
some pkru references are named to pkey_reg
and some prku references are renamed to pkey
cc: Dave Hansen <dave.hansen@intel.com>
cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
---
tools/testing/selftests/vm/pkey-helpers.h | 85 +++++-----
tools/testing/selftests/vm/protection_keys.c | 238 ++++++++++++++------------
2 files changed, 169 insertions(+), 154 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h
index 254e543..d5779be 100644
--- a/tools/testing/selftests/vm/pkey-helpers.h
+++ b/tools/testing/selftests/vm/pkey-helpers.h
@@ -14,7 +14,7 @@
#include <sys/mman.h>
#define NR_PKEYS 16
-#define PKRU_BITS_PER_PKEY 2
+#define PKEY_BITS_PER_PKEY 2
#ifndef DEBUG_LEVEL
#define DEBUG_LEVEL 0
@@ -53,85 +53,88 @@ static inline void sigsafe_printf(const char *format, ...)
#define dprintf3(args...) dprintf_level(3, args)
#define dprintf4(args...) dprintf_level(4, args)
-extern unsigned int shadow_pkru;
-static inline unsigned int __rdpkru(void)
+extern unsigned int shadow_pkey_reg;
+static inline unsigned int __read_pkey_reg(void)
{
unsigned int eax, edx;
unsigned int ecx = 0;
- unsigned int pkru;
+ unsigned int pkey_reg;
asm volatile(".byte 0x0f,0x01,0xee\n\t"
: "=a" (eax), "=d" (edx)
: "c" (ecx));
- pkru = eax;
- return pkru;
+ pkey_reg = eax;
+ return pkey_reg;
}
-static inline unsigned int _rdpkru(int line)
+static inline unsigned int _read_pkey_reg(int line)
{
- unsigned int pkru = __rdpkru();
+ unsigned int pkey_reg = __read_pkey_reg();
- dprintf4("rdpkru(line=%d) pkru: %x shadow: %x\n",
- line, pkru, shadow_pkru);
- assert(pkru == shadow_pkru);
+ dprintf4("read_pkey_reg(line=%d) pkey_reg: %x shadow: %x\n",
+ line, pkey_reg, shadow_pkey_reg);
+ assert(pkey_reg == shadow_pkey_reg);
- return pkru;
+ return pkey_reg;
}
-#define rdpkru() _rdpkru(__LINE__)
+#define read_pkey_reg() _read_pkey_reg(__LINE__)
-static inline void __wrpkru(unsigned int pkru)
+static inline void __write_pkey_reg(unsigned int pkey_reg)
{
- unsigned int eax = pkru;
+ unsigned int eax = pkey_reg;
unsigned int ecx = 0;
unsigned int edx = 0;
- dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
+ dprintf4("%s() changing %08x to %08x\n", __func__,
+ __read_pkey_reg(), pkey_reg);
asm volatile(".byte 0x0f,0x01,0xef\n\t"
: : "a" (eax), "c" (ecx), "d" (edx));
- assert(pkru == __rdpkru());
+ assert(pkey_reg == __read_pkey_reg());
}
-static inline void wrpkru(unsigned int pkru)
+static inline void write_pkey_reg(unsigned int pkey_reg)
{
- dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
+ dprintf4("%s() changing %08x to %08x\n", __func__,
+ __read_pkey_reg(), pkey_reg);
/* will do the shadow check for us: */
- rdpkru();
- __wrpkru(pkru);
- shadow_pkru = pkru;
- dprintf4("%s(%08x) pkru: %08x\n", __func__, pkru, __rdpkru());
+ read_pkey_reg();
+ __write_pkey_reg(pkey_reg);
+ shadow_pkey_reg = pkey_reg;
+ dprintf4("%s(%08x) pkey_reg: %08x\n", __func__,
+ pkey_reg, __read_pkey_reg());
}
/*
* These are technically racy. since something could
- * change PKRU between the read and the write.
+ * change PKEY register between the read and the write.
*/
static inline void __pkey_access_allow(int pkey, int do_allow)
{
- unsigned int pkru = rdpkru();
+ unsigned int pkey_reg = read_pkey_reg();
int bit = pkey * 2;
if (do_allow)
- pkru &= (1<<bit);
+ pkey_reg &= (1<<bit);
else
- pkru |= (1<<bit);
+ pkey_reg |= (1<<bit);
- dprintf4("pkru now: %08x\n", rdpkru());
- wrpkru(pkru);
+ dprintf4("pkey_reg now: %08x\n", read_pkey_reg());
+ write_pkey_reg(pkey_reg);
}
static inline void __pkey_write_allow(int pkey, int do_allow_write)
{
- long pkru = rdpkru();
+ long pkey_reg = read_pkey_reg();
int bit = pkey * 2 + 1;
if (do_allow_write)
- pkru &= (1<<bit);
+ pkey_reg &= (1<<bit);
else
- pkru |= (1<<bit);
+ pkey_reg |= (1<<bit);
- wrpkru(pkru);
- dprintf4("pkru now: %08x\n", rdpkru());
+ write_pkey_reg(pkey_reg);
+ dprintf4("pkey_reg now: %08x\n", read_pkey_reg());
}
#define PROT_PKEY0 0x10 /* protection key value (bit 0) */
@@ -181,10 +184,10 @@ static inline int cpu_has_pku(void)
return 1;
}
-#define XSTATE_PKRU_BIT (9)
-#define XSTATE_PKRU 0x200
+#define XSTATE_PKEY_BIT (9)
+#define XSTATE_PKEY 0x200
-int pkru_xstate_offset(void)
+int pkey_reg_xstate_offset(void)
{
unsigned int eax;
unsigned int ebx;
@@ -195,21 +198,21 @@ int pkru_xstate_offset(void)
unsigned long XSTATE_CPUID = 0xd;
int leaf;
- /* assume that XSTATE_PKRU is set in XCR0 */
- leaf = XSTATE_PKRU_BIT;
+ /* assume that XSTATE_PKEY is set in XCR0 */
+ leaf = XSTATE_PKEY_BIT;
{
eax = XSTATE_CPUID;
ecx = leaf;
__cpuid(&eax, &ebx, &ecx, &edx);
- if (leaf == XSTATE_PKRU_BIT) {
+ if (leaf == XSTATE_PKEY_BIT) {
xstate_offset = ebx;
xstate_size = eax;
}
}
if (xstate_size == 0) {
- printf("could not find size/offset of PKRU in xsave state\n");
+ printf("could not find size/offset of PKEY in xsave state\n");
return 0;
}
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index 460b4bd..9f373cc 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -1,11 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Tests x86 Memory Protection Keys (see Documentation/x86/protection-keys.txt)
+ * Tests Memory Protection Keys (see Documentation/vm/protection-keys.txt)
*
* There are examples in here of:
* * how to set protection keys on memory
- * * how to set/clear bits in PKRU (the rights register)
- * * how to handle SEGV_PKRU signals and extract pkey-relevant
+ * * how to set/clear bits in pkey registers (the rights register)
+ * * how to handle SEGV_PKUERR signals and extract pkey-relevant
* information from the siginfo
*
* Things to add:
@@ -48,7 +48,7 @@
int iteration_nr = 1;
int test_nr;
-unsigned int shadow_pkru;
+unsigned int shadow_pkey_reg;
#define HPAGE_SIZE (1UL<<21)
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
@@ -255,7 +255,7 @@ void dump_mem(void *dumpme, int len_bytes)
return "UNKNOWN";
}
-int pkru_faults;
+int pkey_faults;
int last_si_pkey = -1;
void signal_handler(int signum, siginfo_t *si, void *vucontext)
{
@@ -263,16 +263,16 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
int trapno;
unsigned long ip;
char *fpregs;
- u32 *pkru_ptr;
+ u32 *pkey_reg_ptr;
u64 siginfo_pkey;
u32 *si_pkey_ptr;
- int pkru_offset;
+ int pkey_reg_offset;
fpregset_t fpregset;
dprint_in_signal = 1;
dprintf1(">>>>===============SIGSEGV============================\n");
- dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__,
- __rdpkru(), shadow_pkru);
+ dprintf1("%s()::%d, pkey_reg: 0x%x shadow: %x\n", __func__, __LINE__,
+ __read_pkey_reg(), shadow_pkey_reg);
trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
@@ -289,19 +289,19 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
*/
fpregs += 0x70;
#endif
- pkru_offset = pkru_xstate_offset();
- pkru_ptr = (void *)(&fpregs[pkru_offset]);
+ pkey_reg_offset = pkey_reg_xstate_offset();
+ pkey_reg_ptr = (void *)(&fpregs[pkey_reg_offset]);
dprintf1("siginfo: %p\n", si);
dprintf1(" fpregs: %p\n", fpregs);
/*
- * If we got a PKRU fault, we *HAVE* to have at least one bit set in
+ * If we got a PKEY fault, we *HAVE* to have at least one bit set in
* here.
*/
- dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset());
+ dprintf1("pkey_reg_xstate_offset: %d\n", pkey_reg_xstate_offset());
if (DEBUG_LEVEL > 4)
- dump_mem(pkru_ptr - 128, 256);
- pkey_assert(*pkru_ptr);
+ dump_mem(pkey_reg_ptr - 128, 256);
+ pkey_assert(*pkey_reg_ptr);
if ((si->si_code == SEGV_MAPERR) ||
(si->si_code == SEGV_ACCERR) ||
@@ -317,13 +317,16 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
pkey_assert(siginfo_pkey < NR_PKEYS);
last_si_pkey = siginfo_pkey;
- dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr);
- /* need __rdpkru() version so we do not do shadow_pkru checking */
- dprintf1("signal pkru from pkru: %08x\n", __rdpkru());
+ dprintf1("signal pkey_reg from xsave: %08x\n", *pkey_reg_ptr);
+ /*
+ * need __read_pkey_reg() version so we do not do shadow_pkey_reg
+ * checking
+ */
+ dprintf1("signal pkey_reg from pkey_reg: %08x\n", __read_pkey_reg());
dprintf1("pkey from siginfo: %jx\n", siginfo_pkey);
- *(u64 *)pkru_ptr = 0x00000000;
+ *(u64 *)pkey_reg_ptr = 0x00000000;
dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
- pkru_faults++;
+ pkey_faults++;
dprintf1("<<<<==================================================\n");
dprint_in_signal = 0;
}
@@ -402,45 +405,47 @@ pid_t fork_lazy_child(void)
static u32 hw_pkey_get(int pkey, unsigned long flags)
{
u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
- u32 pkru = __rdpkru();
- u32 shifted_pkru;
- u32 masked_pkru;
+ u32 pkey_reg = __read_pkey_reg();
+ u32 shifted_pkey_reg;
+ u32 masked_pkey_reg;
dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n",
__func__, pkey, flags, 0, 0);
- dprintf2("%s() raw pkru: %x\n", __func__, pkru);
+ dprintf2("%s() raw pkey_reg: %x\n", __func__, pkey_reg);
- shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY));
- dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru);
- masked_pkru = shifted_pkru & mask;
- dprintf2("%s() masked pkru: %x\n", __func__, masked_pkru);
+ shifted_pkey_reg = (pkey_reg >> (pkey * PKEY_BITS_PER_PKEY));
+ dprintf2("%s() shifted_pkey_reg: %x\n", __func__, shifted_pkey_reg);
+ masked_pkey_reg = shifted_pkey_reg & mask;
+ dprintf2("%s() masked pkey_reg: %x\n", __func__, masked_pkey_reg);
/*
* shift down the relevant bits to the lowest two, then
* mask off all the other high bits.
*/
- return masked_pkru;
+ return masked_pkey_reg;
}
static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
{
u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
- u32 old_pkru = __rdpkru();
- u32 new_pkru;
+ u32 old_pkey_reg = __read_pkey_reg();
+ u32 new_pkey_reg;
/* make sure that 'rights' only contains the bits we expect: */
assert(!(rights & ~mask));
- /* copy old pkru */
- new_pkru = old_pkru;
+ /* copy old pkey_reg */
+ new_pkey_reg = old_pkey_reg;
/* mask out bits from pkey in old value: */
- new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY));
+ new_pkey_reg &= ~(mask << (pkey * PKEY_BITS_PER_PKEY));
/* OR in new bits for pkey: */
- new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY));
+ new_pkey_reg |= (rights << (pkey * PKEY_BITS_PER_PKEY));
- __wrpkru(new_pkru);
+ __write_pkey_reg(new_pkey_reg);
- dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n",
- __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru);
+ dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x"
+ " pkey_reg now: %x old_pkey_reg: %x\n",
+ __func__, pkey, rights, flags, 0, __read_pkey_reg(),
+ old_pkey_reg);
return 0;
}
@@ -449,7 +454,7 @@ void pkey_disable_set(int pkey, int flags)
unsigned long syscall_flags = 0;
int ret;
int pkey_rights;
- u32 orig_pkru = rdpkru();
+ u32 orig_pkey_reg = read_pkey_reg();
dprintf1("START->%s(%d, 0x%x)\n", __func__,
pkey, flags);
@@ -465,9 +470,9 @@ void pkey_disable_set(int pkey, int flags)
ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
assert(!ret);
- /*pkru and flags have the same format */
- shadow_pkru |= flags << (pkey * 2);
- dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru);
+ /*pkey_reg and flags have the same format */
+ shadow_pkey_reg |= flags << (pkey * 2);
+ dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkey_reg);
pkey_assert(ret >= 0);
@@ -475,9 +480,9 @@ void pkey_disable_set(int pkey, int flags)
dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
pkey, pkey, pkey_rights);
- dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
+ dprintf1("%s(%d) pkey_reg: 0x%x\n", __func__, pkey, read_pkey_reg());
if (flags)
- pkey_assert(rdpkru() > orig_pkru);
+ pkey_assert(read_pkey_reg() > orig_pkey_reg);
dprintf1("END<---%s(%d, 0x%x)\n", __func__,
pkey, flags);
}
@@ -487,7 +492,7 @@ void pkey_disable_clear(int pkey, int flags)
unsigned long syscall_flags = 0;
int ret;
int pkey_rights = hw_pkey_get(pkey, syscall_flags);
- u32 orig_pkru = rdpkru();
+ u32 orig_pkey_reg = read_pkey_reg();
pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
@@ -498,17 +503,16 @@ void pkey_disable_clear(int pkey, int flags)
pkey_rights |= flags;
ret = hw_pkey_set(pkey, pkey_rights, 0);
- /* pkru and flags have the same format */
- shadow_pkru &= ~(flags << (pkey * 2));
+ shadow_pkey_reg &= ~(flags << (pkey * 2));
pkey_assert(ret >= 0);
pkey_rights = hw_pkey_get(pkey, syscall_flags);
dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
pkey, pkey, pkey_rights);
- dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
+ dprintf1("%s(%d) pkey_reg: 0x%x\n", __func__, pkey, read_pkey_reg());
if (flags)
- assert(rdpkru() > orig_pkru);
+ assert(read_pkey_reg() > orig_pkey_reg);
}
void pkey_write_allow(int pkey)
@@ -561,33 +565,38 @@ int alloc_pkey(void)
int ret;
unsigned long init_val = 0x0;
- dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n",
- __LINE__, __rdpkru(), shadow_pkru);
+ dprintf1("%s()::%d, pkey_reg: 0x%x shadow: %x\n", __func__,
+ __LINE__, __read_pkey_reg(), shadow_pkey_reg);
ret = sys_pkey_alloc(0, init_val);
/*
- * pkey_alloc() sets PKRU, so we need to reflect it in
- * shadow_pkru:
+ * pkey_alloc() sets PKEY register, so we need to reflect it in
+ * shadow_pkey_reg:
*/
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
+ dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n",
+ __func__, __LINE__, ret, __read_pkey_reg(),
+ shadow_pkey_reg);
if (ret) {
/* clear both the bits: */
- shadow_pkru &= ~(0x3 << (ret * 2));
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
+ shadow_pkey_reg &= ~(0x3 << (ret * 2));
+ dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n",
+ __func__,
+ __LINE__, ret, __read_pkey_reg(),
+ shadow_pkey_reg);
/*
* move the new state in from init_val
- * (remember, we cheated and init_val == pkru format)
+ * (remember, we cheated and init_val == pkey_reg format)
*/
- shadow_pkru |= (init_val << (ret * 2));
+ shadow_pkey_reg |= (init_val << (ret * 2));
}
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
- dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno);
+ dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n",
+ __func__, __LINE__, ret, __read_pkey_reg(),
+ shadow_pkey_reg);
+ dprintf1("%s()::%d errno: %d\n", __func__, __LINE__, errno);
/* for shadow checking: */
- rdpkru();
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
+ read_pkey_reg();
+ dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n",
+ __func__, __LINE__, ret, __read_pkey_reg(),
+ shadow_pkey_reg);
return ret;
}
@@ -638,8 +647,8 @@ int alloc_random_pkey(void)
free_ret = sys_pkey_free(alloced_pkeys[i]);
pkey_assert(!free_ret);
}
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
+ dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", __func__,
+ __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg);
return ret;
}
@@ -657,11 +666,13 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
if (nr_iterations-- < 0)
break;
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
+ dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n",
+ __func__, __LINE__, ret, __read_pkey_reg(),
+ shadow_pkey_reg);
sys_pkey_free(rpkey);
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
+ dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n",
+ __func__, __LINE__, ret, __read_pkey_reg(),
+ shadow_pkey_reg);
}
pkey_assert(pkey < NR_PKEYS);
@@ -669,8 +680,8 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
ptr, size, orig_prot, pkey, ret);
pkey_assert(!ret);
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
+ dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", __func__,
+ __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg);
return ret;
}
@@ -752,7 +763,7 @@ void free_pkey_malloc(void *ptr)
void *ptr;
int ret;
- rdpkru();
+ read_pkey_reg();
dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
size, prot, pkey);
pkey_assert(pkey < NR_PKEYS);
@@ -761,7 +772,7 @@ void free_pkey_malloc(void *ptr)
ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey);
pkey_assert(!ret);
record_pkey_malloc(ptr, size, prot);
- rdpkru();
+ read_pkey_reg();
dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr);
return ptr;
@@ -924,15 +935,15 @@ void setup_hugetlbfs(void)
return ret;
}
-int last_pkru_faults;
+int last_pkey_faults;
#define UNKNOWN_PKEY -2
-void expected_pk_fault(int pkey)
+void expected_pkey_fault(int pkey)
{
- dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n",
- __func__, last_pkru_faults, pkru_faults);
+ dprintf2("%s(): last_pkey_faults: %d pkey_faults: %d\n",
+ __func__, last_pkey_faults, pkey_faults);
dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey);
- pkey_assert(last_pkru_faults + 1 == pkru_faults);
+ pkey_assert(last_pkey_faults + 1 == pkey_faults);
/*
* For exec-only memory, we do not know the pkey in
* advance, so skip this check.
@@ -941,23 +952,23 @@ void expected_pk_fault(int pkey)
pkey_assert(last_si_pkey == pkey);
/*
- * The signal handler shold have cleared out PKRU to let the
+ * The signal handler shold have cleared out PKEY register to let the
* test program continue. We now have to restore it.
*/
- if (__rdpkru() != 0)
+ if (__read_pkey_reg() != 0)
pkey_assert(0);
- __wrpkru(shadow_pkru);
- dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n",
- __func__, shadow_pkru);
- last_pkru_faults = pkru_faults;
+ __write_pkey_reg(shadow_pkey_reg);
+ dprintf1("%s() set pkey_reg=%x to restore state after signal "
+ "nuked it\n", __func__, shadow_pkey_reg);
+ last_pkey_faults = pkey_faults;
last_si_pkey = -1;
}
-#define do_not_expect_pk_fault(msg) do { \
- if (last_pkru_faults != pkru_faults) \
- dprintf0("unexpected PK fault: %s\n", msg); \
- pkey_assert(last_pkru_faults == pkru_faults); \
+#define do_not_expect_pkey_fault(msg) do { \
+ if (last_pkey_faults != pkey_faults) \
+ dprintf0("unexpected PKey fault: %s\n", msg); \
+ pkey_assert(last_pkey_faults == pkey_faults); \
} while (0)
int test_fds[10] = { -1 };
@@ -1015,25 +1026,25 @@ void test_read_of_access_disabled_region(int *ptr, u16 pkey)
int ptr_contents;
dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr);
- rdpkru();
+ read_pkey_reg();
pkey_access_deny(pkey);
ptr_contents = read_ptr(ptr);
dprintf1("*ptr: %d\n", ptr_contents);
- expected_pk_fault(pkey);
+ expected_pkey_fault(pkey);
}
void test_write_of_write_disabled_region(int *ptr, u16 pkey)
{
dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey);
pkey_write_deny(pkey);
*ptr = __LINE__;
- expected_pk_fault(pkey);
+ expected_pkey_fault(pkey);
}
void test_write_of_access_disabled_region(int *ptr, u16 pkey)
{
dprintf1("disabling access to PKEY[%02d], doing write\n", pkey);
pkey_access_deny(pkey);
*ptr = __LINE__;
- expected_pk_fault(pkey);
+ expected_pkey_fault(pkey);
}
void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
{
@@ -1145,9 +1156,10 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
int new_pkey;
dprintf1("%s() alloc loop: %d\n", __func__, i);
new_pkey = alloc_pkey();
- dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, err, __rdpkru(), shadow_pkru);
- rdpkru(); /* for shadow checking */
+ dprintf4("%s()::%d, err: %d pkey_reg: 0x%x shadow: 0x%x\n",
+ __func__, __LINE__, err, __read_pkey_reg(),
+ shadow_pkey_reg);
+ read_pkey_reg(); /* for shadow checking */
dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC);
if ((new_pkey == -1) && (errno == ENOSPC)) {
dprintf2("%s() failed to allocate pkey after %d tries\n",
@@ -1180,7 +1192,7 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
for (i = 0; i < nr_allocated_pkeys; i++) {
err = sys_pkey_free(allocated_pkeys[i]);
pkey_assert(!err);
- rdpkru(); /* for shadow checking */
+ read_pkey_reg(); /* for shadow checking */
}
}
@@ -1266,7 +1278,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey)
pkey_assert(ret != -1);
/* Now access from the current task, and expect an exception: */
peek_result = read_ptr(ptr);
- expected_pk_fault(pkey);
+ expected_pkey_fault(pkey);
/*
* Try to access the NON-pkey-protected "plain_ptr" via ptrace:
@@ -1276,7 +1288,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey)
pkey_assert(ret != -1);
/* Now access from the current task, and expect NO exception: */
peek_result = read_ptr(plain_ptr);
- do_not_expect_pk_fault("read plain pointer after ptrace");
+ do_not_expect_pkey_fault("read plain pointer after ptrace");
ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0);
pkey_assert(ret != -1);
@@ -1326,17 +1338,17 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
pkey_assert(!ret);
pkey_access_deny(pkey);
- dprintf2("pkru: %x\n", rdpkru());
+ dprintf2("pkey_reg: %x\n", read_pkey_reg());
/*
* Make sure this is an *instruction* fault
*/
madvise(p1, PAGE_SIZE, MADV_DONTNEED);
lots_o_noops_around_write(&scratch);
- do_not_expect_pk_fault("executing on PROT_EXEC memory");
+ do_not_expect_pkey_fault("executing on PROT_EXEC memory");
ptr_contents = read_ptr(p1);
dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
- expected_pk_fault(pkey);
+ expected_pkey_fault(pkey);
}
void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
@@ -1357,15 +1369,15 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
pkey_assert(!ret);
- dprintf2("pkru: %x\n", rdpkru());
+ dprintf2("pkru: %x\n", read_pkey_reg());
/* Make sure this is an *instruction* fault */
madvise(p1, PAGE_SIZE, MADV_DONTNEED);
lots_o_noops_around_write(&scratch);
- do_not_expect_pk_fault("executing on PROT_EXEC memory");
+ do_not_expect_pkey_fault("executing on PROT_EXEC memory");
ptr_contents = read_ptr(p1);
dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
- expected_pk_fault(UNKNOWN_PKEY);
+ expected_pkey_fault(UNKNOWN_PKEY);
/*
* Put the memory back to non-PROT_EXEC. Should clear the
@@ -1379,7 +1391,7 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC);
pkey_assert(!ret);
ptr_contents = read_ptr(p1);
- do_not_expect_pk_fault("plain read on recently PROT_EXEC area");
+ do_not_expect_pkey_fault("plain read on recently PROT_EXEC area");
}
void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
@@ -1421,7 +1433,7 @@ void run_tests_once(void)
for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) {
int pkey;
- int orig_pkru_faults = pkru_faults;
+ int orig_pkey_faults = pkey_faults;
dprintf1("======================\n");
dprintf1("test %d preparing...\n", test_nr);
@@ -1436,8 +1448,8 @@ void run_tests_once(void)
free_pkey_malloc(ptr);
sys_pkey_free(pkey);
- dprintf1("pkru_faults: %d\n", pkru_faults);
- dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults);
+ dprintf1("pkey_faults: %d\n", pkey_faults);
+ dprintf1("orig_pkey_faults: %d\n", orig_pkey_faults);
tracing_off();
close_test_fds();
@@ -1450,7 +1462,7 @@ void run_tests_once(void)
void pkey_setup_shadow(void)
{
- shadow_pkru = __rdpkru();
+ shadow_pkey_reg = __read_pkey_reg();
}
int main(void)
@@ -1474,7 +1486,7 @@ int main(void)
}
pkey_setup_shadow();
- printf("startup pkru: %x\n", rdpkru());
+ printf("startup pkey_reg: %x\n", read_pkey_reg());
setup_hugetlbfs();
while (nr_iterations-- > 0)
--
1.7.1
^ permalink raw reply related
* [PATCH v13 01/24] selftests/x86: Move protecton key selftest to arch neutral directory
From: Ram Pai @ 2018-06-14 0:44 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
In-Reply-To: <1528937115-10132-1-git-send-email-linuxram@us.ibm.com>
cc: Dave Hansen <dave.hansen@intel.com>
cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
---
tools/testing/selftests/vm/.gitignore | 1 +
tools/testing/selftests/vm/Makefile | 1 +
tools/testing/selftests/vm/pkey-helpers.h | 219 ++++
tools/testing/selftests/vm/protection_keys.c | 1485 +++++++++++++++++++++++++
tools/testing/selftests/x86/.gitignore | 1 -
tools/testing/selftests/x86/pkey-helpers.h | 219 ----
tools/testing/selftests/x86/protection_keys.c | 1485 -------------------------
7 files changed, 1706 insertions(+), 1705 deletions(-)
create mode 100644 tools/testing/selftests/vm/pkey-helpers.h
create mode 100644 tools/testing/selftests/vm/protection_keys.c
delete mode 100644 tools/testing/selftests/x86/pkey-helpers.h
delete mode 100644 tools/testing/selftests/x86/protection_keys.c
diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index 342c7bc..0214fbf 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -12,3 +12,4 @@ mlock-random-test
virtual_address_range
gup_benchmark
va_128TBswitch
+protection_keys
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index fdefa22..9788a58 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -20,6 +20,7 @@ TEST_GEN_FILES += transhuge-stress
TEST_GEN_FILES += userfaultfd
TEST_GEN_FILES += va_128TBswitch
TEST_GEN_FILES += virtual_address_range
+TEST_GEN_FILES += protection_keys
TEST_PROGS := run_vmtests
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h
new file mode 100644
index 0000000..254e543
--- /dev/null
+++ b/tools/testing/selftests/vm/pkey-helpers.h
@@ -0,0 +1,219 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PKEYS_HELPER_H
+#define _PKEYS_HELPER_H
+#define _GNU_SOURCE
+#include <string.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+
+#define NR_PKEYS 16
+#define PKRU_BITS_PER_PKEY 2
+
+#ifndef DEBUG_LEVEL
+#define DEBUG_LEVEL 0
+#endif
+#define DPRINT_IN_SIGNAL_BUF_SIZE 4096
+extern int dprint_in_signal;
+extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
+static inline void sigsafe_printf(const char *format, ...)
+{
+ va_list ap;
+
+ if (!dprint_in_signal) {
+ va_start(ap, format);
+ vprintf(format, ap);
+ va_end(ap);
+ } else {
+ int ret;
+ /*
+ * No printf() functions are signal-safe.
+ * They deadlock easily. Write the format
+ * string to get some output, even if
+ * incomplete.
+ */
+ ret = write(1, format, strlen(format));
+ if (ret < 0)
+ exit(1);
+ }
+}
+#define dprintf_level(level, args...) do { \
+ if (level <= DEBUG_LEVEL) \
+ sigsafe_printf(args); \
+} while (0)
+#define dprintf0(args...) dprintf_level(0, args)
+#define dprintf1(args...) dprintf_level(1, args)
+#define dprintf2(args...) dprintf_level(2, args)
+#define dprintf3(args...) dprintf_level(3, args)
+#define dprintf4(args...) dprintf_level(4, args)
+
+extern unsigned int shadow_pkru;
+static inline unsigned int __rdpkru(void)
+{
+ unsigned int eax, edx;
+ unsigned int ecx = 0;
+ unsigned int pkru;
+
+ asm volatile(".byte 0x0f,0x01,0xee\n\t"
+ : "=a" (eax), "=d" (edx)
+ : "c" (ecx));
+ pkru = eax;
+ return pkru;
+}
+
+static inline unsigned int _rdpkru(int line)
+{
+ unsigned int pkru = __rdpkru();
+
+ dprintf4("rdpkru(line=%d) pkru: %x shadow: %x\n",
+ line, pkru, shadow_pkru);
+ assert(pkru == shadow_pkru);
+
+ return pkru;
+}
+
+#define rdpkru() _rdpkru(__LINE__)
+
+static inline void __wrpkru(unsigned int pkru)
+{
+ unsigned int eax = pkru;
+ unsigned int ecx = 0;
+ unsigned int edx = 0;
+
+ dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
+ asm volatile(".byte 0x0f,0x01,0xef\n\t"
+ : : "a" (eax), "c" (ecx), "d" (edx));
+ assert(pkru == __rdpkru());
+}
+
+static inline void wrpkru(unsigned int pkru)
+{
+ dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
+ /* will do the shadow check for us: */
+ rdpkru();
+ __wrpkru(pkru);
+ shadow_pkru = pkru;
+ dprintf4("%s(%08x) pkru: %08x\n", __func__, pkru, __rdpkru());
+}
+
+/*
+ * These are technically racy. since something could
+ * change PKRU between the read and the write.
+ */
+static inline void __pkey_access_allow(int pkey, int do_allow)
+{
+ unsigned int pkru = rdpkru();
+ int bit = pkey * 2;
+
+ if (do_allow)
+ pkru &= (1<<bit);
+ else
+ pkru |= (1<<bit);
+
+ dprintf4("pkru now: %08x\n", rdpkru());
+ wrpkru(pkru);
+}
+
+static inline void __pkey_write_allow(int pkey, int do_allow_write)
+{
+ long pkru = rdpkru();
+ int bit = pkey * 2 + 1;
+
+ if (do_allow_write)
+ pkru &= (1<<bit);
+ else
+ pkru |= (1<<bit);
+
+ wrpkru(pkru);
+ dprintf4("pkru now: %08x\n", rdpkru());
+}
+
+#define PROT_PKEY0 0x10 /* protection key value (bit 0) */
+#define PROT_PKEY1 0x20 /* protection key value (bit 1) */
+#define PROT_PKEY2 0x40 /* protection key value (bit 2) */
+#define PROT_PKEY3 0x80 /* protection key value (bit 3) */
+
+#define PAGE_SIZE 4096
+#define MB (1<<20)
+
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ /* ecx is often an input as well as an output. */
+ asm volatile(
+ "cpuid;"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (*eax), "2" (*ecx));
+}
+
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */
+#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */
+#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */
+
+static inline int cpu_has_pku(void)
+{
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+
+ eax = 0x7;
+ ecx = 0x0;
+ __cpuid(&eax, &ebx, &ecx, &edx);
+
+ if (!(ecx & X86_FEATURE_PKU)) {
+ dprintf2("cpu does not have PKU\n");
+ return 0;
+ }
+ if (!(ecx & X86_FEATURE_OSPKE)) {
+ dprintf2("cpu does not have OSPKE\n");
+ return 0;
+ }
+ return 1;
+}
+
+#define XSTATE_PKRU_BIT (9)
+#define XSTATE_PKRU 0x200
+
+int pkru_xstate_offset(void)
+{
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+ int xstate_offset;
+ int xstate_size;
+ unsigned long XSTATE_CPUID = 0xd;
+ int leaf;
+
+ /* assume that XSTATE_PKRU is set in XCR0 */
+ leaf = XSTATE_PKRU_BIT;
+ {
+ eax = XSTATE_CPUID;
+ ecx = leaf;
+ __cpuid(&eax, &ebx, &ecx, &edx);
+
+ if (leaf == XSTATE_PKRU_BIT) {
+ xstate_offset = ebx;
+ xstate_size = eax;
+ }
+ }
+
+ if (xstate_size == 0) {
+ printf("could not find size/offset of PKRU in xsave state\n");
+ return 0;
+ }
+
+ return xstate_offset;
+}
+
+#endif /* _PKEYS_HELPER_H */
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
new file mode 100644
index 0000000..460b4bd
--- /dev/null
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -0,0 +1,1485 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tests x86 Memory Protection Keys (see Documentation/x86/protection-keys.txt)
+ *
+ * There are examples in here of:
+ * * how to set protection keys on memory
+ * * how to set/clear bits in PKRU (the rights register)
+ * * how to handle SEGV_PKRU signals and extract pkey-relevant
+ * information from the siginfo
+ *
+ * Things to add:
+ * make sure KSM and KSM COW breaking works
+ * prefault pages in at malloc, or not
+ * protect MPX bounds tables with protection keys?
+ * make sure VMA splitting/merging is working correctly
+ * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys
+ * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel
+ * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks
+ *
+ * Compile like this:
+ * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
+ * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
+ */
+#define _GNU_SOURCE
+#include <errno.h>
+#include <linux/futex.h>
+#include <sys/time.h>
+#include <sys/syscall.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/ptrace.h>
+#include <setjmp.h>
+
+#include "pkey-helpers.h"
+
+int iteration_nr = 1;
+int test_nr;
+
+unsigned int shadow_pkru;
+
+#define HPAGE_SIZE (1UL<<21)
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
+#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
+#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
+#define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to))
+#define __stringify_1(x...) #x
+#define __stringify(x...) __stringify_1(x)
+
+#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
+
+int dprint_in_signal;
+char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
+
+extern void abort_hooks(void);
+#define pkey_assert(condition) do { \
+ if (!(condition)) { \
+ dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \
+ __FILE__, __LINE__, \
+ test_nr, iteration_nr); \
+ dprintf0("errno at assert: %d", errno); \
+ abort_hooks(); \
+ exit(__LINE__); \
+ } \
+} while (0)
+
+void cat_into_file(char *str, char *file)
+{
+ int fd = open(file, O_RDWR);
+ int ret;
+
+ dprintf2("%s(): writing '%s' to '%s'\n", __func__, str, file);
+ /*
+ * these need to be raw because they are called under
+ * pkey_assert()
+ */
+ if (fd < 0) {
+ fprintf(stderr, "error opening '%s'\n", str);
+ perror("error: ");
+ exit(__LINE__);
+ }
+
+ ret = write(fd, str, strlen(str));
+ if (ret != strlen(str)) {
+ perror("write to file failed");
+ fprintf(stderr, "filename: '%s' str: '%s'\n", file, str);
+ exit(__LINE__);
+ }
+ close(fd);
+}
+
+#if CONTROL_TRACING > 0
+static int warned_tracing;
+int tracing_root_ok(void)
+{
+ if (geteuid() != 0) {
+ if (!warned_tracing)
+ fprintf(stderr, "WARNING: not run as root, "
+ "can not do tracing control\n");
+ warned_tracing = 1;
+ return 0;
+ }
+ return 1;
+}
+#endif
+
+void tracing_on(void)
+{
+#if CONTROL_TRACING > 0
+#define TRACEDIR "/sys/kernel/debug/tracing"
+ char pidstr[32];
+
+ if (!tracing_root_ok())
+ return;
+
+ sprintf(pidstr, "%d", getpid());
+ cat_into_file("0", TRACEDIR "/tracing_on");
+ cat_into_file("\n", TRACEDIR "/trace");
+ if (1) {
+ cat_into_file("function_graph", TRACEDIR "/current_tracer");
+ cat_into_file("1", TRACEDIR "/options/funcgraph-proc");
+ } else {
+ cat_into_file("nop", TRACEDIR "/current_tracer");
+ }
+ cat_into_file(pidstr, TRACEDIR "/set_ftrace_pid");
+ cat_into_file("1", TRACEDIR "/tracing_on");
+ dprintf1("enabled tracing\n");
+#endif
+}
+
+void tracing_off(void)
+{
+#if CONTROL_TRACING > 0
+ if (!tracing_root_ok())
+ return;
+ cat_into_file("0", "/sys/kernel/debug/tracing/tracing_on");
+#endif
+}
+
+void abort_hooks(void)
+{
+ fprintf(stderr, "running %s()...\n", __func__);
+ tracing_off();
+#ifdef SLEEP_ON_ABORT
+ sleep(SLEEP_ON_ABORT);
+#endif
+}
+
+static inline void __page_o_noops(void)
+{
+ /* 8-bytes of instruction * 512 bytes = 1 page */
+ asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr");
+}
+
+/*
+ * This attempts to have roughly a page of instructions followed by a few
+ * instructions that do a write, and another page of instructions. That
+ * way, we are pretty sure that the write is in the second page of
+ * instructions and has at least a page of padding behind it.
+ *
+ * *That* lets us be sure to madvise() away the write instruction, which
+ * will then fault, which makes sure that the fault code handles
+ * execute-only memory properly.
+ */
+__attribute__((__aligned__(PAGE_SIZE)))
+void lots_o_noops_around_write(int *write_to_me)
+{
+ dprintf3("running %s()\n", __func__);
+ __page_o_noops();
+ /* Assume this happens in the second page of instructions: */
+ *write_to_me = __LINE__;
+ /* pad out by another page: */
+ __page_o_noops();
+ dprintf3("%s() done\n", __func__);
+}
+
+/* Define some kernel-like types */
+#define u8 uint8_t
+#define u16 uint16_t
+#define u32 uint32_t
+#define u64 uint64_t
+
+#ifdef __i386__
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key 380
+#endif
+
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc 381
+# define SYS_pkey_free 382
+#endif
+
+#define REG_IP_IDX REG_EIP
+#define si_pkey_offset 0x14
+
+#else
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key 329
+#endif
+
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc 330
+# define SYS_pkey_free 331
+#endif
+
+#define REG_IP_IDX REG_RIP
+#define si_pkey_offset 0x20
+
+#endif
+
+void dump_mem(void *dumpme, int len_bytes)
+{
+ char *c = (void *)dumpme;
+ int i;
+
+ for (i = 0; i < len_bytes; i += sizeof(u64)) {
+ u64 *ptr = (u64 *)(c + i);
+ dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr);
+ }
+}
+
+/* Failed address bound checks: */
+#ifndef SEGV_BNDERR
+# define SEGV_BNDERR 3
+#endif
+
+#ifndef SEGV_PKUERR
+# define SEGV_PKUERR 4
+#endif
+
+static char *si_code_str(int si_code)
+{
+ if (si_code == SEGV_MAPERR)
+ return "SEGV_MAPERR";
+ if (si_code == SEGV_ACCERR)
+ return "SEGV_ACCERR";
+ if (si_code == SEGV_BNDERR)
+ return "SEGV_BNDERR";
+ if (si_code == SEGV_PKUERR)
+ return "SEGV_PKUERR";
+ return "UNKNOWN";
+}
+
+int pkru_faults;
+int last_si_pkey = -1;
+void signal_handler(int signum, siginfo_t *si, void *vucontext)
+{
+ ucontext_t *uctxt = vucontext;
+ int trapno;
+ unsigned long ip;
+ char *fpregs;
+ u32 *pkru_ptr;
+ u64 siginfo_pkey;
+ u32 *si_pkey_ptr;
+ int pkru_offset;
+ fpregset_t fpregset;
+
+ dprint_in_signal = 1;
+ dprintf1(">>>>===============SIGSEGV============================\n");
+ dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__,
+ __rdpkru(), shadow_pkru);
+
+ trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
+ ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
+ fpregset = uctxt->uc_mcontext.fpregs;
+ fpregs = (void *)fpregset;
+
+ dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__,
+ trapno, ip, si_code_str(si->si_code), si->si_code);
+#ifdef __i386__
+ /*
+ * 32-bit has some extra padding so that userspace can tell whether
+ * the XSTATE header is present in addition to the "legacy" FPU
+ * state. We just assume that it is here.
+ */
+ fpregs += 0x70;
+#endif
+ pkru_offset = pkru_xstate_offset();
+ pkru_ptr = (void *)(&fpregs[pkru_offset]);
+
+ dprintf1("siginfo: %p\n", si);
+ dprintf1(" fpregs: %p\n", fpregs);
+ /*
+ * If we got a PKRU fault, we *HAVE* to have at least one bit set in
+ * here.
+ */
+ dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset());
+ if (DEBUG_LEVEL > 4)
+ dump_mem(pkru_ptr - 128, 256);
+ pkey_assert(*pkru_ptr);
+
+ if ((si->si_code == SEGV_MAPERR) ||
+ (si->si_code == SEGV_ACCERR) ||
+ (si->si_code == SEGV_BNDERR)) {
+ printf("non-PK si_code, exiting...\n");
+ exit(4);
+ }
+
+ si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
+ dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
+ dump_mem((u8 *)si_pkey_ptr - 8, 24);
+ siginfo_pkey = *si_pkey_ptr;
+ pkey_assert(siginfo_pkey < NR_PKEYS);
+ last_si_pkey = siginfo_pkey;
+
+ dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr);
+ /* need __rdpkru() version so we do not do shadow_pkru checking */
+ dprintf1("signal pkru from pkru: %08x\n", __rdpkru());
+ dprintf1("pkey from siginfo: %jx\n", siginfo_pkey);
+ *(u64 *)pkru_ptr = 0x00000000;
+ dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
+ pkru_faults++;
+ dprintf1("<<<<==================================================\n");
+ dprint_in_signal = 0;
+}
+
+int wait_all_children(void)
+{
+ int status;
+ return waitpid(-1, &status, 0);
+}
+
+void sig_chld(int x)
+{
+ dprint_in_signal = 1;
+ dprintf2("[%d] SIGCHLD: %d\n", getpid(), x);
+ dprint_in_signal = 0;
+}
+
+void setup_sigsegv_handler(void)
+{
+ int r, rs;
+ struct sigaction newact;
+ struct sigaction oldact;
+
+ /* #PF is mapped to sigsegv */
+ int signum = SIGSEGV;
+
+ newact.sa_handler = 0;
+ newact.sa_sigaction = signal_handler;
+
+ /*sigset_t - signals to block while in the handler */
+ /* get the old signal mask. */
+ rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask);
+ pkey_assert(rs == 0);
+
+ /* call sa_sigaction, not sa_handler*/
+ newact.sa_flags = SA_SIGINFO;
+
+ newact.sa_restorer = 0; /* void(*)(), obsolete */
+ r = sigaction(signum, &newact, &oldact);
+ r = sigaction(SIGALRM, &newact, &oldact);
+ pkey_assert(r == 0);
+}
+
+void setup_handlers(void)
+{
+ signal(SIGCHLD, &sig_chld);
+ setup_sigsegv_handler();
+}
+
+pid_t fork_lazy_child(void)
+{
+ pid_t forkret;
+
+ forkret = fork();
+ pkey_assert(forkret >= 0);
+ dprintf3("[%d] fork() ret: %d\n", getpid(), forkret);
+
+ if (!forkret) {
+ /* in the child */
+ while (1) {
+ dprintf1("child sleeping...\n");
+ sleep(30);
+ }
+ }
+ return forkret;
+}
+
+#ifndef PKEY_DISABLE_ACCESS
+# define PKEY_DISABLE_ACCESS 0x1
+#endif
+
+#ifndef PKEY_DISABLE_WRITE
+# define PKEY_DISABLE_WRITE 0x2
+#endif
+
+static u32 hw_pkey_get(int pkey, unsigned long flags)
+{
+ u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
+ u32 pkru = __rdpkru();
+ u32 shifted_pkru;
+ u32 masked_pkru;
+
+ dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n",
+ __func__, pkey, flags, 0, 0);
+ dprintf2("%s() raw pkru: %x\n", __func__, pkru);
+
+ shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY));
+ dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru);
+ masked_pkru = shifted_pkru & mask;
+ dprintf2("%s() masked pkru: %x\n", __func__, masked_pkru);
+ /*
+ * shift down the relevant bits to the lowest two, then
+ * mask off all the other high bits.
+ */
+ return masked_pkru;
+}
+
+static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
+{
+ u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
+ u32 old_pkru = __rdpkru();
+ u32 new_pkru;
+
+ /* make sure that 'rights' only contains the bits we expect: */
+ assert(!(rights & ~mask));
+
+ /* copy old pkru */
+ new_pkru = old_pkru;
+ /* mask out bits from pkey in old value: */
+ new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY));
+ /* OR in new bits for pkey: */
+ new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY));
+
+ __wrpkru(new_pkru);
+
+ dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n",
+ __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru);
+ return 0;
+}
+
+void pkey_disable_set(int pkey, int flags)
+{
+ unsigned long syscall_flags = 0;
+ int ret;
+ int pkey_rights;
+ u32 orig_pkru = rdpkru();
+
+ dprintf1("START->%s(%d, 0x%x)\n", __func__,
+ pkey, flags);
+ pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
+
+ pkey_rights = hw_pkey_get(pkey, syscall_flags);
+
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+ pkey, pkey, pkey_rights);
+ pkey_assert(pkey_rights >= 0);
+
+ pkey_rights |= flags;
+
+ ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
+ assert(!ret);
+ /*pkru and flags have the same format */
+ shadow_pkru |= flags << (pkey * 2);
+ dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru);
+
+ pkey_assert(ret >= 0);
+
+ pkey_rights = hw_pkey_get(pkey, syscall_flags);
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+ pkey, pkey, pkey_rights);
+
+ dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
+ if (flags)
+ pkey_assert(rdpkru() > orig_pkru);
+ dprintf1("END<---%s(%d, 0x%x)\n", __func__,
+ pkey, flags);
+}
+
+void pkey_disable_clear(int pkey, int flags)
+{
+ unsigned long syscall_flags = 0;
+ int ret;
+ int pkey_rights = hw_pkey_get(pkey, syscall_flags);
+ u32 orig_pkru = rdpkru();
+
+ pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
+
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+ pkey, pkey, pkey_rights);
+ pkey_assert(pkey_rights >= 0);
+
+ pkey_rights |= flags;
+
+ ret = hw_pkey_set(pkey, pkey_rights, 0);
+ /* pkru and flags have the same format */
+ shadow_pkru &= ~(flags << (pkey * 2));
+ pkey_assert(ret >= 0);
+
+ pkey_rights = hw_pkey_get(pkey, syscall_flags);
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+ pkey, pkey, pkey_rights);
+
+ dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
+ if (flags)
+ assert(rdpkru() > orig_pkru);
+}
+
+void pkey_write_allow(int pkey)
+{
+ pkey_disable_clear(pkey, PKEY_DISABLE_WRITE);
+}
+void pkey_write_deny(int pkey)
+{
+ pkey_disable_set(pkey, PKEY_DISABLE_WRITE);
+}
+void pkey_access_allow(int pkey)
+{
+ pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS);
+}
+void pkey_access_deny(int pkey)
+{
+ pkey_disable_set(pkey, PKEY_DISABLE_ACCESS);
+}
+
+int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
+ unsigned long pkey)
+{
+ int sret;
+
+ dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__,
+ ptr, size, orig_prot, pkey);
+
+ errno = 0;
+ sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey);
+ if (errno) {
+ dprintf2("SYS_mprotect_key sret: %d\n", sret);
+ dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot);
+ dprintf2("SYS_mprotect_key failed, errno: %d\n", errno);
+ if (DEBUG_LEVEL >= 2)
+ perror("SYS_mprotect_pkey");
+ }
+ return sret;
+}
+
+int sys_pkey_alloc(unsigned long flags, unsigned long init_val)
+{
+ int ret = syscall(SYS_pkey_alloc, flags, init_val);
+ dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n",
+ __func__, flags, init_val, ret, errno);
+ return ret;
+}
+
+int alloc_pkey(void)
+{
+ int ret;
+ unsigned long init_val = 0x0;
+
+ dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n",
+ __LINE__, __rdpkru(), shadow_pkru);
+ ret = sys_pkey_alloc(0, init_val);
+ /*
+ * pkey_alloc() sets PKRU, so we need to reflect it in
+ * shadow_pkru:
+ */
+ dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ if (ret) {
+ /* clear both the bits: */
+ shadow_pkru &= ~(0x3 << (ret * 2));
+ dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ /*
+ * move the new state in from init_val
+ * (remember, we cheated and init_val == pkru format)
+ */
+ shadow_pkru |= (init_val << (ret * 2));
+ }
+ dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno);
+ /* for shadow checking: */
+ rdpkru();
+ dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ return ret;
+}
+
+int sys_pkey_free(unsigned long pkey)
+{
+ int ret = syscall(SYS_pkey_free, pkey);
+ dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret);
+ return ret;
+}
+
+/*
+ * I had a bug where pkey bits could be set by mprotect() but
+ * not cleared. This ensures we get lots of random bit sets
+ * and clears on the vma and pte pkey bits.
+ */
+int alloc_random_pkey(void)
+{
+ int max_nr_pkey_allocs;
+ int ret;
+ int i;
+ int alloced_pkeys[NR_PKEYS];
+ int nr_alloced = 0;
+ int random_index;
+ memset(alloced_pkeys, 0, sizeof(alloced_pkeys));
+
+ /* allocate every possible key and make a note of which ones we got */
+ max_nr_pkey_allocs = NR_PKEYS;
+ max_nr_pkey_allocs = 1;
+ for (i = 0; i < max_nr_pkey_allocs; i++) {
+ int new_pkey = alloc_pkey();
+ if (new_pkey < 0)
+ break;
+ alloced_pkeys[nr_alloced++] = new_pkey;
+ }
+
+ pkey_assert(nr_alloced > 0);
+ /* select a random one out of the allocated ones */
+ random_index = rand() % nr_alloced;
+ ret = alloced_pkeys[random_index];
+ /* now zero it out so we don't free it next */
+ alloced_pkeys[random_index] = 0;
+
+ /* go through the allocated ones that we did not want and free them */
+ for (i = 0; i < nr_alloced; i++) {
+ int free_ret;
+ if (!alloced_pkeys[i])
+ continue;
+ free_ret = sys_pkey_free(alloced_pkeys[i]);
+ pkey_assert(!free_ret);
+ }
+ dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ return ret;
+}
+
+int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
+ unsigned long pkey)
+{
+ int nr_iterations = random() % 100;
+ int ret;
+
+ while (0) {
+ int rpkey = alloc_random_pkey();
+ ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
+ dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
+ ptr, size, orig_prot, pkey, ret);
+ if (nr_iterations-- < 0)
+ break;
+
+ dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ sys_pkey_free(rpkey);
+ dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ }
+ pkey_assert(pkey < NR_PKEYS);
+
+ ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
+ dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
+ ptr, size, orig_prot, pkey, ret);
+ pkey_assert(!ret);
+ dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ return ret;
+}
+
+struct pkey_malloc_record {
+ void *ptr;
+ long size;
+ int prot;
+};
+struct pkey_malloc_record *pkey_malloc_records;
+struct pkey_malloc_record *pkey_last_malloc_record;
+long nr_pkey_malloc_records;
+void record_pkey_malloc(void *ptr, long size, int prot)
+{
+ long i;
+ struct pkey_malloc_record *rec = NULL;
+
+ for (i = 0; i < nr_pkey_malloc_records; i++) {
+ rec = &pkey_malloc_records[i];
+ /* find a free record */
+ if (rec)
+ break;
+ }
+ if (!rec) {
+ /* every record is full */
+ size_t old_nr_records = nr_pkey_malloc_records;
+ size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1);
+ size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record);
+ dprintf2("new_nr_records: %zd\n", new_nr_records);
+ dprintf2("new_size: %zd\n", new_size);
+ pkey_malloc_records = realloc(pkey_malloc_records, new_size);
+ pkey_assert(pkey_malloc_records != NULL);
+ rec = &pkey_malloc_records[nr_pkey_malloc_records];
+ /*
+ * realloc() does not initialize memory, so zero it from
+ * the first new record all the way to the end.
+ */
+ for (i = 0; i < new_nr_records - old_nr_records; i++)
+ memset(rec + i, 0, sizeof(*rec));
+ }
+ dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n",
+ (int)(rec - pkey_malloc_records), rec, ptr, size);
+ rec->ptr = ptr;
+ rec->size = size;
+ rec->prot = prot;
+ pkey_last_malloc_record = rec;
+ nr_pkey_malloc_records++;
+}
+
+void free_pkey_malloc(void *ptr)
+{
+ long i;
+ int ret;
+ dprintf3("%s(%p)\n", __func__, ptr);
+ for (i = 0; i < nr_pkey_malloc_records; i++) {
+ struct pkey_malloc_record *rec = &pkey_malloc_records[i];
+ dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n",
+ ptr, i, rec, rec->ptr, rec->size);
+ if ((ptr < rec->ptr) ||
+ (ptr >= rec->ptr + rec->size))
+ continue;
+
+ dprintf3("found ptr %p at record[%ld/%p]: {%p, %ld}\n",
+ ptr, i, rec, rec->ptr, rec->size);
+ nr_pkey_malloc_records--;
+ ret = munmap(rec->ptr, rec->size);
+ dprintf3("munmap ret: %d\n", ret);
+ pkey_assert(!ret);
+ dprintf3("clearing rec->ptr, rec: %p\n", rec);
+ rec->ptr = NULL;
+ dprintf3("done clearing rec->ptr, rec: %p\n", rec);
+ return;
+ }
+ pkey_assert(false);
+}
+
+
+void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
+{
+ void *ptr;
+ int ret;
+
+ rdpkru();
+ dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
+ size, prot, pkey);
+ pkey_assert(pkey < NR_PKEYS);
+ ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ pkey_assert(ptr != (void *)-1);
+ ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey);
+ pkey_assert(!ret);
+ record_pkey_malloc(ptr, size, prot);
+ rdpkru();
+
+ dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr);
+ return ptr;
+}
+
+void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
+{
+ int ret;
+ void *ptr;
+
+ dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
+ size, prot, pkey);
+ /*
+ * Guarantee we can fit at least one huge page in the resulting
+ * allocation by allocating space for 2:
+ */
+ size = ALIGN_UP(size, HPAGE_SIZE * 2);
+ ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ pkey_assert(ptr != (void *)-1);
+ record_pkey_malloc(ptr, size, prot);
+ mprotect_pkey(ptr, size, prot, pkey);
+
+ dprintf1("unaligned ptr: %p\n", ptr);
+ ptr = ALIGN_PTR_UP(ptr, HPAGE_SIZE);
+ dprintf1(" aligned ptr: %p\n", ptr);
+ ret = madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE);
+ dprintf1("MADV_HUGEPAGE ret: %d\n", ret);
+ ret = madvise(ptr, HPAGE_SIZE, MADV_WILLNEED);
+ dprintf1("MADV_WILLNEED ret: %d\n", ret);
+ memset(ptr, 0, HPAGE_SIZE);
+
+ dprintf1("mmap()'d thp for pkey %d @ %p\n", pkey, ptr);
+ return ptr;
+}
+
+int hugetlb_setup_ok;
+#define GET_NR_HUGE_PAGES 10
+void setup_hugetlbfs(void)
+{
+ int err;
+ int fd;
+ char buf[] = "123";
+
+ if (geteuid() != 0) {
+ fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n");
+ return;
+ }
+
+ cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages");
+
+ /*
+ * Now go make sure that we got the pages and that they
+ * are 2M pages. Someone might have made 1G the default.
+ */
+ fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY);
+ if (fd < 0) {
+ perror("opening sysfs 2M hugetlb config");
+ return;
+ }
+
+ /* -1 to guarantee leaving the trailing \0 */
+ err = read(fd, buf, sizeof(buf)-1);
+ close(fd);
+ if (err <= 0) {
+ perror("reading sysfs 2M hugetlb config");
+ return;
+ }
+
+ if (atoi(buf) != GET_NR_HUGE_PAGES) {
+ fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n",
+ buf, GET_NR_HUGE_PAGES);
+ return;
+ }
+
+ hugetlb_setup_ok = 1;
+}
+
+void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
+{
+ void *ptr;
+ int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB;
+
+ if (!hugetlb_setup_ok)
+ return PTR_ERR_ENOTSUP;
+
+ dprintf1("doing %s(%ld, %x, %x)\n", __func__, size, prot, pkey);
+ size = ALIGN_UP(size, HPAGE_SIZE * 2);
+ pkey_assert(pkey < NR_PKEYS);
+ ptr = mmap(NULL, size, PROT_NONE, flags, -1, 0);
+ pkey_assert(ptr != (void *)-1);
+ mprotect_pkey(ptr, size, prot, pkey);
+
+ record_pkey_malloc(ptr, size, prot);
+
+ dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr);
+ return ptr;
+}
+
+void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey)
+{
+ void *ptr;
+ int fd;
+
+ dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
+ size, prot, pkey);
+ pkey_assert(pkey < NR_PKEYS);
+ fd = open("/dax/foo", O_RDWR);
+ pkey_assert(fd >= 0);
+
+ ptr = mmap(0, size, prot, MAP_SHARED, fd, 0);
+ pkey_assert(ptr != (void *)-1);
+
+ mprotect_pkey(ptr, size, prot, pkey);
+
+ record_pkey_malloc(ptr, size, prot);
+
+ dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr);
+ close(fd);
+ return ptr;
+}
+
+void *(*pkey_malloc[])(long size, int prot, u16 pkey) = {
+
+ malloc_pkey_with_mprotect,
+ malloc_pkey_anon_huge,
+ malloc_pkey_hugetlb
+/* can not do direct with the pkey_mprotect() API:
+ malloc_pkey_mmap_direct,
+ malloc_pkey_mmap_dax,
+*/
+};
+
+void *malloc_pkey(long size, int prot, u16 pkey)
+{
+ void *ret;
+ static int malloc_type;
+ int nr_malloc_types = ARRAY_SIZE(pkey_malloc);
+
+ pkey_assert(pkey < NR_PKEYS);
+
+ while (1) {
+ pkey_assert(malloc_type < nr_malloc_types);
+
+ ret = pkey_malloc[malloc_type](size, prot, pkey);
+ pkey_assert(ret != (void *)-1);
+
+ malloc_type++;
+ if (malloc_type >= nr_malloc_types)
+ malloc_type = (random()%nr_malloc_types);
+
+ /* try again if the malloc_type we tried is unsupported */
+ if (ret == PTR_ERR_ENOTSUP)
+ continue;
+
+ break;
+ }
+
+ dprintf3("%s(%ld, prot=%x, pkey=%x) returning: %p\n", __func__,
+ size, prot, pkey, ret);
+ return ret;
+}
+
+int last_pkru_faults;
+#define UNKNOWN_PKEY -2
+void expected_pk_fault(int pkey)
+{
+ dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n",
+ __func__, last_pkru_faults, pkru_faults);
+ dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey);
+ pkey_assert(last_pkru_faults + 1 == pkru_faults);
+
+ /*
+ * For exec-only memory, we do not know the pkey in
+ * advance, so skip this check.
+ */
+ if (pkey != UNKNOWN_PKEY)
+ pkey_assert(last_si_pkey == pkey);
+
+ /*
+ * The signal handler shold have cleared out PKRU to let the
+ * test program continue. We now have to restore it.
+ */
+ if (__rdpkru() != 0)
+ pkey_assert(0);
+
+ __wrpkru(shadow_pkru);
+ dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n",
+ __func__, shadow_pkru);
+ last_pkru_faults = pkru_faults;
+ last_si_pkey = -1;
+}
+
+#define do_not_expect_pk_fault(msg) do { \
+ if (last_pkru_faults != pkru_faults) \
+ dprintf0("unexpected PK fault: %s\n", msg); \
+ pkey_assert(last_pkru_faults == pkru_faults); \
+} while (0)
+
+int test_fds[10] = { -1 };
+int nr_test_fds;
+void __save_test_fd(int fd)
+{
+ pkey_assert(fd >= 0);
+ pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds));
+ test_fds[nr_test_fds] = fd;
+ nr_test_fds++;
+}
+
+int get_test_read_fd(void)
+{
+ int test_fd = open("/etc/passwd", O_RDONLY);
+ __save_test_fd(test_fd);
+ return test_fd;
+}
+
+void close_test_fds(void)
+{
+ int i;
+
+ for (i = 0; i < nr_test_fds; i++) {
+ if (test_fds[i] < 0)
+ continue;
+ close(test_fds[i]);
+ test_fds[i] = -1;
+ }
+ nr_test_fds = 0;
+}
+
+#define barrier() __asm__ __volatile__("": : :"memory")
+__attribute__((noinline)) int read_ptr(int *ptr)
+{
+ /*
+ * Keep GCC from optimizing this away somehow
+ */
+ barrier();
+ return *ptr;
+}
+
+void test_read_of_write_disabled_region(int *ptr, u16 pkey)
+{
+ int ptr_contents;
+
+ dprintf1("disabling write access to PKEY[1], doing read\n");
+ pkey_write_deny(pkey);
+ ptr_contents = read_ptr(ptr);
+ dprintf1("*ptr: %d\n", ptr_contents);
+ dprintf1("\n");
+}
+void test_read_of_access_disabled_region(int *ptr, u16 pkey)
+{
+ int ptr_contents;
+
+ dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr);
+ rdpkru();
+ pkey_access_deny(pkey);
+ ptr_contents = read_ptr(ptr);
+ dprintf1("*ptr: %d\n", ptr_contents);
+ expected_pk_fault(pkey);
+}
+void test_write_of_write_disabled_region(int *ptr, u16 pkey)
+{
+ dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey);
+ pkey_write_deny(pkey);
+ *ptr = __LINE__;
+ expected_pk_fault(pkey);
+}
+void test_write_of_access_disabled_region(int *ptr, u16 pkey)
+{
+ dprintf1("disabling access to PKEY[%02d], doing write\n", pkey);
+ pkey_access_deny(pkey);
+ *ptr = __LINE__;
+ expected_pk_fault(pkey);
+}
+void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
+{
+ int ret;
+ int test_fd = get_test_read_fd();
+
+ dprintf1("disabling access to PKEY[%02d], "
+ "having kernel read() to buffer\n", pkey);
+ pkey_access_deny(pkey);
+ ret = read(test_fd, ptr, 1);
+ dprintf1("read ret: %d\n", ret);
+ pkey_assert(ret);
+}
+void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey)
+{
+ int ret;
+ int test_fd = get_test_read_fd();
+
+ pkey_write_deny(pkey);
+ ret = read(test_fd, ptr, 100);
+ dprintf1("read ret: %d\n", ret);
+ if (ret < 0 && (DEBUG_LEVEL > 0))
+ perror("verbose read result (OK for this to be bad)");
+ pkey_assert(ret);
+}
+
+void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey)
+{
+ int pipe_ret, vmsplice_ret;
+ struct iovec iov;
+ int pipe_fds[2];
+
+ pipe_ret = pipe(pipe_fds);
+
+ pkey_assert(pipe_ret == 0);
+ dprintf1("disabling access to PKEY[%02d], "
+ "having kernel vmsplice from buffer\n", pkey);
+ pkey_access_deny(pkey);
+ iov.iov_base = ptr;
+ iov.iov_len = PAGE_SIZE;
+ vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT);
+ dprintf1("vmsplice() ret: %d\n", vmsplice_ret);
+ pkey_assert(vmsplice_ret == -1);
+
+ close(pipe_fds[0]);
+ close(pipe_fds[1]);
+}
+
+void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey)
+{
+ int ignored = 0xdada;
+ int futex_ret;
+ int some_int = __LINE__;
+
+ dprintf1("disabling write to PKEY[%02d], "
+ "doing futex gunk in buffer\n", pkey);
+ *ptr = some_int;
+ pkey_write_deny(pkey);
+ futex_ret = syscall(SYS_futex, ptr, FUTEX_WAIT, some_int-1, NULL,
+ &ignored, ignored);
+ if (DEBUG_LEVEL > 0)
+ perror("futex");
+ dprintf1("futex() ret: %d\n", futex_ret);
+}
+
+/* Assumes that all pkeys other than 'pkey' are unallocated */
+void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
+{
+ int err;
+ int i;
+
+ /* Note: 0 is the default pkey, so don't mess with it */
+ for (i = 1; i < NR_PKEYS; i++) {
+ if (pkey == i)
+ continue;
+
+ dprintf1("trying get/set/free to non-allocated pkey: %2d\n", i);
+ err = sys_pkey_free(i);
+ pkey_assert(err);
+
+ err = sys_pkey_free(i);
+ pkey_assert(err);
+
+ err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, i);
+ pkey_assert(err);
+ }
+}
+
+/* Assumes that all pkeys other than 'pkey' are unallocated */
+void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
+{
+ int err;
+ int bad_pkey = NR_PKEYS+99;
+
+ /* pass a known-invalid pkey in: */
+ err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey);
+ pkey_assert(err);
+}
+
+/* Assumes that all pkeys other than 'pkey' are unallocated */
+void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
+{
+ int err;
+ int allocated_pkeys[NR_PKEYS] = {0};
+ int nr_allocated_pkeys = 0;
+ int i;
+
+ for (i = 0; i < NR_PKEYS*2; i++) {
+ int new_pkey;
+ dprintf1("%s() alloc loop: %d\n", __func__, i);
+ new_pkey = alloc_pkey();
+ dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+ __LINE__, err, __rdpkru(), shadow_pkru);
+ rdpkru(); /* for shadow checking */
+ dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC);
+ if ((new_pkey == -1) && (errno == ENOSPC)) {
+ dprintf2("%s() failed to allocate pkey after %d tries\n",
+ __func__, nr_allocated_pkeys);
+ break;
+ }
+ pkey_assert(nr_allocated_pkeys < NR_PKEYS);
+ allocated_pkeys[nr_allocated_pkeys++] = new_pkey;
+ }
+
+ dprintf3("%s()::%d\n", __func__, __LINE__);
+
+ /*
+ * ensure it did not reach the end of the loop without
+ * failure:
+ */
+ pkey_assert(i < NR_PKEYS*2);
+
+ /*
+ * There are 16 pkeys supported in hardware. Three are
+ * allocated by the time we get here:
+ * 1. The default key (0)
+ * 2. One possibly consumed by an execute-only mapping.
+ * 3. One allocated by the test code and passed in via
+ * 'pkey' to this function.
+ * Ensure that we can allocate at least another 13 (16-3).
+ */
+ pkey_assert(i >= NR_PKEYS-3);
+
+ for (i = 0; i < nr_allocated_pkeys; i++) {
+ err = sys_pkey_free(allocated_pkeys[i]);
+ pkey_assert(!err);
+ rdpkru(); /* for shadow checking */
+ }
+}
+
+/*
+ * pkey 0 is special. It is allocated by default, so you do not
+ * have to call pkey_alloc() to use it first. Make sure that it
+ * is usable.
+ */
+void test_mprotect_with_pkey_0(int *ptr, u16 pkey)
+{
+ long size;
+ int prot;
+
+ assert(pkey_last_malloc_record);
+ size = pkey_last_malloc_record->size;
+ /*
+ * This is a bit of a hack. But mprotect() requires
+ * huge-page-aligned sizes when operating on hugetlbfs.
+ * So, make sure that we use something that's a multiple
+ * of a huge page when we can.
+ */
+ if (size >= HPAGE_SIZE)
+ size = HPAGE_SIZE;
+ prot = pkey_last_malloc_record->prot;
+
+ /* Use pkey 0 */
+ mprotect_pkey(ptr, size, prot, 0);
+
+ /* Make sure that we can set it back to the original pkey. */
+ mprotect_pkey(ptr, size, prot, pkey);
+}
+
+void test_ptrace_of_child(int *ptr, u16 pkey)
+{
+ __attribute__((__unused__)) int peek_result;
+ pid_t child_pid;
+ void *ignored = 0;
+ long ret;
+ int status;
+ /*
+ * This is the "control" for our little expermient. Make sure
+ * we can always access it when ptracing.
+ */
+ int *plain_ptr_unaligned = malloc(HPAGE_SIZE);
+ int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE);
+
+ /*
+ * Fork a child which is an exact copy of this process, of course.
+ * That means we can do all of our tests via ptrace() and then plain
+ * memory access and ensure they work differently.
+ */
+ child_pid = fork_lazy_child();
+ dprintf1("[%d] child pid: %d\n", getpid(), child_pid);
+
+ ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored);
+ if (ret)
+ perror("attach");
+ dprintf1("[%d] attach ret: %ld %d\n", getpid(), ret, __LINE__);
+ pkey_assert(ret != -1);
+ ret = waitpid(child_pid, &status, WUNTRACED);
+ if ((ret != child_pid) || !(WIFSTOPPED(status))) {
+ fprintf(stderr, "weird waitpid result %ld stat %x\n",
+ ret, status);
+ pkey_assert(0);
+ }
+ dprintf2("waitpid ret: %ld\n", ret);
+ dprintf2("waitpid status: %d\n", status);
+
+ pkey_access_deny(pkey);
+ pkey_write_deny(pkey);
+
+ /* Write access, untested for now:
+ ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data);
+ pkey_assert(ret != -1);
+ dprintf1("poke at %p: %ld\n", peek_at, ret);
+ */
+
+ /*
+ * Try to access the pkey-protected "ptr" via ptrace:
+ */
+ ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored);
+ /* expect it to work, without an error: */
+ pkey_assert(ret != -1);
+ /* Now access from the current task, and expect an exception: */
+ peek_result = read_ptr(ptr);
+ expected_pk_fault(pkey);
+
+ /*
+ * Try to access the NON-pkey-protected "plain_ptr" via ptrace:
+ */
+ ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored);
+ /* expect it to work, without an error: */
+ pkey_assert(ret != -1);
+ /* Now access from the current task, and expect NO exception: */
+ peek_result = read_ptr(plain_ptr);
+ do_not_expect_pk_fault("read plain pointer after ptrace");
+
+ ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0);
+ pkey_assert(ret != -1);
+
+ ret = kill(child_pid, SIGKILL);
+ pkey_assert(ret != -1);
+
+ wait(&status);
+
+ free(plain_ptr_unaligned);
+}
+
+void *get_pointer_to_instructions(void)
+{
+ void *p1;
+
+ p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE);
+ dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write);
+ /* lots_o_noops_around_write should be page-aligned already */
+ assert(p1 == &lots_o_noops_around_write);
+
+ /* Point 'p1' at the *second* page of the function: */
+ p1 += PAGE_SIZE;
+
+ /*
+ * Try to ensure we fault this in on next touch to ensure
+ * we get an instruction fault as opposed to a data one
+ */
+ madvise(p1, PAGE_SIZE, MADV_DONTNEED);
+
+ return p1;
+}
+
+void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
+{
+ void *p1;
+ int scratch;
+ int ptr_contents;
+ int ret;
+
+ p1 = get_pointer_to_instructions();
+ lots_o_noops_around_write(&scratch);
+ ptr_contents = read_ptr(p1);
+ dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+
+ ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey);
+ pkey_assert(!ret);
+ pkey_access_deny(pkey);
+
+ dprintf2("pkru: %x\n", rdpkru());
+
+ /*
+ * Make sure this is an *instruction* fault
+ */
+ madvise(p1, PAGE_SIZE, MADV_DONTNEED);
+ lots_o_noops_around_write(&scratch);
+ do_not_expect_pk_fault("executing on PROT_EXEC memory");
+ ptr_contents = read_ptr(p1);
+ dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+ expected_pk_fault(pkey);
+}
+
+void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
+{
+ void *p1;
+ int scratch;
+ int ptr_contents;
+ int ret;
+
+ dprintf1("%s() start\n", __func__);
+
+ p1 = get_pointer_to_instructions();
+ lots_o_noops_around_write(&scratch);
+ ptr_contents = read_ptr(p1);
+ dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+
+ /* Use a *normal* mprotect(), not mprotect_pkey(): */
+ ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
+ pkey_assert(!ret);
+
+ dprintf2("pkru: %x\n", rdpkru());
+
+ /* Make sure this is an *instruction* fault */
+ madvise(p1, PAGE_SIZE, MADV_DONTNEED);
+ lots_o_noops_around_write(&scratch);
+ do_not_expect_pk_fault("executing on PROT_EXEC memory");
+ ptr_contents = read_ptr(p1);
+ dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+ expected_pk_fault(UNKNOWN_PKEY);
+
+ /*
+ * Put the memory back to non-PROT_EXEC. Should clear the
+ * exec-only pkey off the VMA and allow it to be readable
+ * again. Go to PROT_NONE first to check for a kernel bug
+ * that did not clear the pkey when doing PROT_NONE.
+ */
+ ret = mprotect(p1, PAGE_SIZE, PROT_NONE);
+ pkey_assert(!ret);
+
+ ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC);
+ pkey_assert(!ret);
+ ptr_contents = read_ptr(p1);
+ do_not_expect_pk_fault("plain read on recently PROT_EXEC area");
+}
+
+void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
+{
+ int size = PAGE_SIZE;
+ int sret;
+
+ if (cpu_has_pku()) {
+ dprintf1("SKIP: %s: no CPU support\n", __func__);
+ return;
+ }
+
+ sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey);
+ pkey_assert(sret < 0);
+}
+
+void (*pkey_tests[])(int *ptr, u16 pkey) = {
+ test_read_of_write_disabled_region,
+ test_read_of_access_disabled_region,
+ test_write_of_write_disabled_region,
+ test_write_of_access_disabled_region,
+ test_kernel_write_of_access_disabled_region,
+ test_kernel_write_of_write_disabled_region,
+ test_kernel_gup_of_access_disabled_region,
+ test_kernel_gup_write_to_write_disabled_region,
+ test_executing_on_unreadable_memory,
+ test_implicit_mprotect_exec_only_memory,
+ test_mprotect_with_pkey_0,
+ test_ptrace_of_child,
+ test_pkey_syscalls_on_non_allocated_pkey,
+ test_pkey_syscalls_bad_args,
+ test_pkey_alloc_exhaust,
+};
+
+void run_tests_once(void)
+{
+ int *ptr;
+ int prot = PROT_READ|PROT_WRITE;
+
+ for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) {
+ int pkey;
+ int orig_pkru_faults = pkru_faults;
+
+ dprintf1("======================\n");
+ dprintf1("test %d preparing...\n", test_nr);
+
+ tracing_on();
+ pkey = alloc_random_pkey();
+ dprintf1("test %d starting with pkey: %d\n", test_nr, pkey);
+ ptr = malloc_pkey(PAGE_SIZE, prot, pkey);
+ dprintf1("test %d starting...\n", test_nr);
+ pkey_tests[test_nr](ptr, pkey);
+ dprintf1("freeing test memory: %p\n", ptr);
+ free_pkey_malloc(ptr);
+ sys_pkey_free(pkey);
+
+ dprintf1("pkru_faults: %d\n", pkru_faults);
+ dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults);
+
+ tracing_off();
+ close_test_fds();
+
+ printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr);
+ dprintf1("======================\n\n");
+ }
+ iteration_nr++;
+}
+
+void pkey_setup_shadow(void)
+{
+ shadow_pkru = __rdpkru();
+}
+
+int main(void)
+{
+ int nr_iterations = 22;
+
+ setup_handlers();
+
+ printf("has pku: %d\n", cpu_has_pku());
+
+ if (!cpu_has_pku()) {
+ int size = PAGE_SIZE;
+ int *ptr;
+
+ printf("running PKEY tests for unsupported CPU/OS\n");
+
+ ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ assert(ptr != (void *)-1);
+ test_mprotect_pkey_on_unsupported_cpu(ptr, 1);
+ exit(0);
+ }
+
+ pkey_setup_shadow();
+ printf("startup pkru: %x\n", rdpkru());
+ setup_hugetlbfs();
+
+ while (nr_iterations-- > 0)
+ run_tests_once();
+
+ printf("done (all tests OK)\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/.gitignore b/tools/testing/selftests/x86/.gitignore
index 7757f73..eb30ffd 100644
--- a/tools/testing/selftests/x86/.gitignore
+++ b/tools/testing/selftests/x86/.gitignore
@@ -11,5 +11,4 @@ ldt_gdt
iopl
mpx-mini-test
ioperm
-protection_keys
test_vdso
diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h
deleted file mode 100644
index 254e543..0000000
--- a/tools/testing/selftests/x86/pkey-helpers.h
+++ /dev/null
@@ -1,219 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _PKEYS_HELPER_H
-#define _PKEYS_HELPER_H
-#define _GNU_SOURCE
-#include <string.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <signal.h>
-#include <assert.h>
-#include <stdlib.h>
-#include <ucontext.h>
-#include <sys/mman.h>
-
-#define NR_PKEYS 16
-#define PKRU_BITS_PER_PKEY 2
-
-#ifndef DEBUG_LEVEL
-#define DEBUG_LEVEL 0
-#endif
-#define DPRINT_IN_SIGNAL_BUF_SIZE 4096
-extern int dprint_in_signal;
-extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
-static inline void sigsafe_printf(const char *format, ...)
-{
- va_list ap;
-
- if (!dprint_in_signal) {
- va_start(ap, format);
- vprintf(format, ap);
- va_end(ap);
- } else {
- int ret;
- /*
- * No printf() functions are signal-safe.
- * They deadlock easily. Write the format
- * string to get some output, even if
- * incomplete.
- */
- ret = write(1, format, strlen(format));
- if (ret < 0)
- exit(1);
- }
-}
-#define dprintf_level(level, args...) do { \
- if (level <= DEBUG_LEVEL) \
- sigsafe_printf(args); \
-} while (0)
-#define dprintf0(args...) dprintf_level(0, args)
-#define dprintf1(args...) dprintf_level(1, args)
-#define dprintf2(args...) dprintf_level(2, args)
-#define dprintf3(args...) dprintf_level(3, args)
-#define dprintf4(args...) dprintf_level(4, args)
-
-extern unsigned int shadow_pkru;
-static inline unsigned int __rdpkru(void)
-{
- unsigned int eax, edx;
- unsigned int ecx = 0;
- unsigned int pkru;
-
- asm volatile(".byte 0x0f,0x01,0xee\n\t"
- : "=a" (eax), "=d" (edx)
- : "c" (ecx));
- pkru = eax;
- return pkru;
-}
-
-static inline unsigned int _rdpkru(int line)
-{
- unsigned int pkru = __rdpkru();
-
- dprintf4("rdpkru(line=%d) pkru: %x shadow: %x\n",
- line, pkru, shadow_pkru);
- assert(pkru == shadow_pkru);
-
- return pkru;
-}
-
-#define rdpkru() _rdpkru(__LINE__)
-
-static inline void __wrpkru(unsigned int pkru)
-{
- unsigned int eax = pkru;
- unsigned int ecx = 0;
- unsigned int edx = 0;
-
- dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
- asm volatile(".byte 0x0f,0x01,0xef\n\t"
- : : "a" (eax), "c" (ecx), "d" (edx));
- assert(pkru == __rdpkru());
-}
-
-static inline void wrpkru(unsigned int pkru)
-{
- dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
- /* will do the shadow check for us: */
- rdpkru();
- __wrpkru(pkru);
- shadow_pkru = pkru;
- dprintf4("%s(%08x) pkru: %08x\n", __func__, pkru, __rdpkru());
-}
-
-/*
- * These are technically racy. since something could
- * change PKRU between the read and the write.
- */
-static inline void __pkey_access_allow(int pkey, int do_allow)
-{
- unsigned int pkru = rdpkru();
- int bit = pkey * 2;
-
- if (do_allow)
- pkru &= (1<<bit);
- else
- pkru |= (1<<bit);
-
- dprintf4("pkru now: %08x\n", rdpkru());
- wrpkru(pkru);
-}
-
-static inline void __pkey_write_allow(int pkey, int do_allow_write)
-{
- long pkru = rdpkru();
- int bit = pkey * 2 + 1;
-
- if (do_allow_write)
- pkru &= (1<<bit);
- else
- pkru |= (1<<bit);
-
- wrpkru(pkru);
- dprintf4("pkru now: %08x\n", rdpkru());
-}
-
-#define PROT_PKEY0 0x10 /* protection key value (bit 0) */
-#define PROT_PKEY1 0x20 /* protection key value (bit 1) */
-#define PROT_PKEY2 0x40 /* protection key value (bit 2) */
-#define PROT_PKEY3 0x80 /* protection key value (bit 3) */
-
-#define PAGE_SIZE 4096
-#define MB (1<<20)
-
-static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- /* ecx is often an input as well as an output. */
- asm volatile(
- "cpuid;"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx));
-}
-
-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */
-#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */
-#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */
-
-static inline int cpu_has_pku(void)
-{
- unsigned int eax;
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
-
- eax = 0x7;
- ecx = 0x0;
- __cpuid(&eax, &ebx, &ecx, &edx);
-
- if (!(ecx & X86_FEATURE_PKU)) {
- dprintf2("cpu does not have PKU\n");
- return 0;
- }
- if (!(ecx & X86_FEATURE_OSPKE)) {
- dprintf2("cpu does not have OSPKE\n");
- return 0;
- }
- return 1;
-}
-
-#define XSTATE_PKRU_BIT (9)
-#define XSTATE_PKRU 0x200
-
-int pkru_xstate_offset(void)
-{
- unsigned int eax;
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
- int xstate_offset;
- int xstate_size;
- unsigned long XSTATE_CPUID = 0xd;
- int leaf;
-
- /* assume that XSTATE_PKRU is set in XCR0 */
- leaf = XSTATE_PKRU_BIT;
- {
- eax = XSTATE_CPUID;
- ecx = leaf;
- __cpuid(&eax, &ebx, &ecx, &edx);
-
- if (leaf == XSTATE_PKRU_BIT) {
- xstate_offset = ebx;
- xstate_size = eax;
- }
- }
-
- if (xstate_size == 0) {
- printf("could not find size/offset of PKRU in xsave state\n");
- return 0;
- }
-
- return xstate_offset;
-}
-
-#endif /* _PKEYS_HELPER_H */
diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
deleted file mode 100644
index 460b4bd..0000000
--- a/tools/testing/selftests/x86/protection_keys.c
+++ /dev/null
@@ -1,1485 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Tests x86 Memory Protection Keys (see Documentation/x86/protection-keys.txt)
- *
- * There are examples in here of:
- * * how to set protection keys on memory
- * * how to set/clear bits in PKRU (the rights register)
- * * how to handle SEGV_PKRU signals and extract pkey-relevant
- * information from the siginfo
- *
- * Things to add:
- * make sure KSM and KSM COW breaking works
- * prefault pages in at malloc, or not
- * protect MPX bounds tables with protection keys?
- * make sure VMA splitting/merging is working correctly
- * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys
- * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel
- * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks
- *
- * Compile like this:
- * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
- * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
- */
-#define _GNU_SOURCE
-#include <errno.h>
-#include <linux/futex.h>
-#include <sys/time.h>
-#include <sys/syscall.h>
-#include <string.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <signal.h>
-#include <assert.h>
-#include <stdlib.h>
-#include <ucontext.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/ptrace.h>
-#include <setjmp.h>
-
-#include "pkey-helpers.h"
-
-int iteration_nr = 1;
-int test_nr;
-
-unsigned int shadow_pkru;
-
-#define HPAGE_SIZE (1UL<<21)
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
-#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
-#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
-#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
-#define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to))
-#define __stringify_1(x...) #x
-#define __stringify(x...) __stringify_1(x)
-
-#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
-
-int dprint_in_signal;
-char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
-
-extern void abort_hooks(void);
-#define pkey_assert(condition) do { \
- if (!(condition)) { \
- dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \
- __FILE__, __LINE__, \
- test_nr, iteration_nr); \
- dprintf0("errno at assert: %d", errno); \
- abort_hooks(); \
- exit(__LINE__); \
- } \
-} while (0)
-
-void cat_into_file(char *str, char *file)
-{
- int fd = open(file, O_RDWR);
- int ret;
-
- dprintf2("%s(): writing '%s' to '%s'\n", __func__, str, file);
- /*
- * these need to be raw because they are called under
- * pkey_assert()
- */
- if (fd < 0) {
- fprintf(stderr, "error opening '%s'\n", str);
- perror("error: ");
- exit(__LINE__);
- }
-
- ret = write(fd, str, strlen(str));
- if (ret != strlen(str)) {
- perror("write to file failed");
- fprintf(stderr, "filename: '%s' str: '%s'\n", file, str);
- exit(__LINE__);
- }
- close(fd);
-}
-
-#if CONTROL_TRACING > 0
-static int warned_tracing;
-int tracing_root_ok(void)
-{
- if (geteuid() != 0) {
- if (!warned_tracing)
- fprintf(stderr, "WARNING: not run as root, "
- "can not do tracing control\n");
- warned_tracing = 1;
- return 0;
- }
- return 1;
-}
-#endif
-
-void tracing_on(void)
-{
-#if CONTROL_TRACING > 0
-#define TRACEDIR "/sys/kernel/debug/tracing"
- char pidstr[32];
-
- if (!tracing_root_ok())
- return;
-
- sprintf(pidstr, "%d", getpid());
- cat_into_file("0", TRACEDIR "/tracing_on");
- cat_into_file("\n", TRACEDIR "/trace");
- if (1) {
- cat_into_file("function_graph", TRACEDIR "/current_tracer");
- cat_into_file("1", TRACEDIR "/options/funcgraph-proc");
- } else {
- cat_into_file("nop", TRACEDIR "/current_tracer");
- }
- cat_into_file(pidstr, TRACEDIR "/set_ftrace_pid");
- cat_into_file("1", TRACEDIR "/tracing_on");
- dprintf1("enabled tracing\n");
-#endif
-}
-
-void tracing_off(void)
-{
-#if CONTROL_TRACING > 0
- if (!tracing_root_ok())
- return;
- cat_into_file("0", "/sys/kernel/debug/tracing/tracing_on");
-#endif
-}
-
-void abort_hooks(void)
-{
- fprintf(stderr, "running %s()...\n", __func__);
- tracing_off();
-#ifdef SLEEP_ON_ABORT
- sleep(SLEEP_ON_ABORT);
-#endif
-}
-
-static inline void __page_o_noops(void)
-{
- /* 8-bytes of instruction * 512 bytes = 1 page */
- asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr");
-}
-
-/*
- * This attempts to have roughly a page of instructions followed by a few
- * instructions that do a write, and another page of instructions. That
- * way, we are pretty sure that the write is in the second page of
- * instructions and has at least a page of padding behind it.
- *
- * *That* lets us be sure to madvise() away the write instruction, which
- * will then fault, which makes sure that the fault code handles
- * execute-only memory properly.
- */
-__attribute__((__aligned__(PAGE_SIZE)))
-void lots_o_noops_around_write(int *write_to_me)
-{
- dprintf3("running %s()\n", __func__);
- __page_o_noops();
- /* Assume this happens in the second page of instructions: */
- *write_to_me = __LINE__;
- /* pad out by another page: */
- __page_o_noops();
- dprintf3("%s() done\n", __func__);
-}
-
-/* Define some kernel-like types */
-#define u8 uint8_t
-#define u16 uint16_t
-#define u32 uint32_t
-#define u64 uint64_t
-
-#ifdef __i386__
-
-#ifndef SYS_mprotect_key
-# define SYS_mprotect_key 380
-#endif
-
-#ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 381
-# define SYS_pkey_free 382
-#endif
-
-#define REG_IP_IDX REG_EIP
-#define si_pkey_offset 0x14
-
-#else
-
-#ifndef SYS_mprotect_key
-# define SYS_mprotect_key 329
-#endif
-
-#ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 330
-# define SYS_pkey_free 331
-#endif
-
-#define REG_IP_IDX REG_RIP
-#define si_pkey_offset 0x20
-
-#endif
-
-void dump_mem(void *dumpme, int len_bytes)
-{
- char *c = (void *)dumpme;
- int i;
-
- for (i = 0; i < len_bytes; i += sizeof(u64)) {
- u64 *ptr = (u64 *)(c + i);
- dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr);
- }
-}
-
-/* Failed address bound checks: */
-#ifndef SEGV_BNDERR
-# define SEGV_BNDERR 3
-#endif
-
-#ifndef SEGV_PKUERR
-# define SEGV_PKUERR 4
-#endif
-
-static char *si_code_str(int si_code)
-{
- if (si_code == SEGV_MAPERR)
- return "SEGV_MAPERR";
- if (si_code == SEGV_ACCERR)
- return "SEGV_ACCERR";
- if (si_code == SEGV_BNDERR)
- return "SEGV_BNDERR";
- if (si_code == SEGV_PKUERR)
- return "SEGV_PKUERR";
- return "UNKNOWN";
-}
-
-int pkru_faults;
-int last_si_pkey = -1;
-void signal_handler(int signum, siginfo_t *si, void *vucontext)
-{
- ucontext_t *uctxt = vucontext;
- int trapno;
- unsigned long ip;
- char *fpregs;
- u32 *pkru_ptr;
- u64 siginfo_pkey;
- u32 *si_pkey_ptr;
- int pkru_offset;
- fpregset_t fpregset;
-
- dprint_in_signal = 1;
- dprintf1(">>>>===============SIGSEGV============================\n");
- dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__,
- __rdpkru(), shadow_pkru);
-
- trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
- ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
- fpregset = uctxt->uc_mcontext.fpregs;
- fpregs = (void *)fpregset;
-
- dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__,
- trapno, ip, si_code_str(si->si_code), si->si_code);
-#ifdef __i386__
- /*
- * 32-bit has some extra padding so that userspace can tell whether
- * the XSTATE header is present in addition to the "legacy" FPU
- * state. We just assume that it is here.
- */
- fpregs += 0x70;
-#endif
- pkru_offset = pkru_xstate_offset();
- pkru_ptr = (void *)(&fpregs[pkru_offset]);
-
- dprintf1("siginfo: %p\n", si);
- dprintf1(" fpregs: %p\n", fpregs);
- /*
- * If we got a PKRU fault, we *HAVE* to have at least one bit set in
- * here.
- */
- dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset());
- if (DEBUG_LEVEL > 4)
- dump_mem(pkru_ptr - 128, 256);
- pkey_assert(*pkru_ptr);
-
- if ((si->si_code == SEGV_MAPERR) ||
- (si->si_code == SEGV_ACCERR) ||
- (si->si_code == SEGV_BNDERR)) {
- printf("non-PK si_code, exiting...\n");
- exit(4);
- }
-
- si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
- dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
- dump_mem((u8 *)si_pkey_ptr - 8, 24);
- siginfo_pkey = *si_pkey_ptr;
- pkey_assert(siginfo_pkey < NR_PKEYS);
- last_si_pkey = siginfo_pkey;
-
- dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr);
- /* need __rdpkru() version so we do not do shadow_pkru checking */
- dprintf1("signal pkru from pkru: %08x\n", __rdpkru());
- dprintf1("pkey from siginfo: %jx\n", siginfo_pkey);
- *(u64 *)pkru_ptr = 0x00000000;
- dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
- pkru_faults++;
- dprintf1("<<<<==================================================\n");
- dprint_in_signal = 0;
-}
-
-int wait_all_children(void)
-{
- int status;
- return waitpid(-1, &status, 0);
-}
-
-void sig_chld(int x)
-{
- dprint_in_signal = 1;
- dprintf2("[%d] SIGCHLD: %d\n", getpid(), x);
- dprint_in_signal = 0;
-}
-
-void setup_sigsegv_handler(void)
-{
- int r, rs;
- struct sigaction newact;
- struct sigaction oldact;
-
- /* #PF is mapped to sigsegv */
- int signum = SIGSEGV;
-
- newact.sa_handler = 0;
- newact.sa_sigaction = signal_handler;
-
- /*sigset_t - signals to block while in the handler */
- /* get the old signal mask. */
- rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask);
- pkey_assert(rs == 0);
-
- /* call sa_sigaction, not sa_handler*/
- newact.sa_flags = SA_SIGINFO;
-
- newact.sa_restorer = 0; /* void(*)(), obsolete */
- r = sigaction(signum, &newact, &oldact);
- r = sigaction(SIGALRM, &newact, &oldact);
- pkey_assert(r == 0);
-}
-
-void setup_handlers(void)
-{
- signal(SIGCHLD, &sig_chld);
- setup_sigsegv_handler();
-}
-
-pid_t fork_lazy_child(void)
-{
- pid_t forkret;
-
- forkret = fork();
- pkey_assert(forkret >= 0);
- dprintf3("[%d] fork() ret: %d\n", getpid(), forkret);
-
- if (!forkret) {
- /* in the child */
- while (1) {
- dprintf1("child sleeping...\n");
- sleep(30);
- }
- }
- return forkret;
-}
-
-#ifndef PKEY_DISABLE_ACCESS
-# define PKEY_DISABLE_ACCESS 0x1
-#endif
-
-#ifndef PKEY_DISABLE_WRITE
-# define PKEY_DISABLE_WRITE 0x2
-#endif
-
-static u32 hw_pkey_get(int pkey, unsigned long flags)
-{
- u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
- u32 pkru = __rdpkru();
- u32 shifted_pkru;
- u32 masked_pkru;
-
- dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n",
- __func__, pkey, flags, 0, 0);
- dprintf2("%s() raw pkru: %x\n", __func__, pkru);
-
- shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY));
- dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru);
- masked_pkru = shifted_pkru & mask;
- dprintf2("%s() masked pkru: %x\n", __func__, masked_pkru);
- /*
- * shift down the relevant bits to the lowest two, then
- * mask off all the other high bits.
- */
- return masked_pkru;
-}
-
-static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
-{
- u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
- u32 old_pkru = __rdpkru();
- u32 new_pkru;
-
- /* make sure that 'rights' only contains the bits we expect: */
- assert(!(rights & ~mask));
-
- /* copy old pkru */
- new_pkru = old_pkru;
- /* mask out bits from pkey in old value: */
- new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY));
- /* OR in new bits for pkey: */
- new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY));
-
- __wrpkru(new_pkru);
-
- dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n",
- __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru);
- return 0;
-}
-
-void pkey_disable_set(int pkey, int flags)
-{
- unsigned long syscall_flags = 0;
- int ret;
- int pkey_rights;
- u32 orig_pkru = rdpkru();
-
- dprintf1("START->%s(%d, 0x%x)\n", __func__,
- pkey, flags);
- pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
-
- pkey_rights = hw_pkey_get(pkey, syscall_flags);
-
- dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
- pkey, pkey, pkey_rights);
- pkey_assert(pkey_rights >= 0);
-
- pkey_rights |= flags;
-
- ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
- assert(!ret);
- /*pkru and flags have the same format */
- shadow_pkru |= flags << (pkey * 2);
- dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru);
-
- pkey_assert(ret >= 0);
-
- pkey_rights = hw_pkey_get(pkey, syscall_flags);
- dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
- pkey, pkey, pkey_rights);
-
- dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
- if (flags)
- pkey_assert(rdpkru() > orig_pkru);
- dprintf1("END<---%s(%d, 0x%x)\n", __func__,
- pkey, flags);
-}
-
-void pkey_disable_clear(int pkey, int flags)
-{
- unsigned long syscall_flags = 0;
- int ret;
- int pkey_rights = hw_pkey_get(pkey, syscall_flags);
- u32 orig_pkru = rdpkru();
-
- pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
-
- dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
- pkey, pkey, pkey_rights);
- pkey_assert(pkey_rights >= 0);
-
- pkey_rights |= flags;
-
- ret = hw_pkey_set(pkey, pkey_rights, 0);
- /* pkru and flags have the same format */
- shadow_pkru &= ~(flags << (pkey * 2));
- pkey_assert(ret >= 0);
-
- pkey_rights = hw_pkey_get(pkey, syscall_flags);
- dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
- pkey, pkey, pkey_rights);
-
- dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
- if (flags)
- assert(rdpkru() > orig_pkru);
-}
-
-void pkey_write_allow(int pkey)
-{
- pkey_disable_clear(pkey, PKEY_DISABLE_WRITE);
-}
-void pkey_write_deny(int pkey)
-{
- pkey_disable_set(pkey, PKEY_DISABLE_WRITE);
-}
-void pkey_access_allow(int pkey)
-{
- pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS);
-}
-void pkey_access_deny(int pkey)
-{
- pkey_disable_set(pkey, PKEY_DISABLE_ACCESS);
-}
-
-int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
- unsigned long pkey)
-{
- int sret;
-
- dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__,
- ptr, size, orig_prot, pkey);
-
- errno = 0;
- sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey);
- if (errno) {
- dprintf2("SYS_mprotect_key sret: %d\n", sret);
- dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot);
- dprintf2("SYS_mprotect_key failed, errno: %d\n", errno);
- if (DEBUG_LEVEL >= 2)
- perror("SYS_mprotect_pkey");
- }
- return sret;
-}
-
-int sys_pkey_alloc(unsigned long flags, unsigned long init_val)
-{
- int ret = syscall(SYS_pkey_alloc, flags, init_val);
- dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n",
- __func__, flags, init_val, ret, errno);
- return ret;
-}
-
-int alloc_pkey(void)
-{
- int ret;
- unsigned long init_val = 0x0;
-
- dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n",
- __LINE__, __rdpkru(), shadow_pkru);
- ret = sys_pkey_alloc(0, init_val);
- /*
- * pkey_alloc() sets PKRU, so we need to reflect it in
- * shadow_pkru:
- */
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
- if (ret) {
- /* clear both the bits: */
- shadow_pkru &= ~(0x3 << (ret * 2));
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
- /*
- * move the new state in from init_val
- * (remember, we cheated and init_val == pkru format)
- */
- shadow_pkru |= (init_val << (ret * 2));
- }
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
- dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno);
- /* for shadow checking: */
- rdpkru();
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
- return ret;
-}
-
-int sys_pkey_free(unsigned long pkey)
-{
- int ret = syscall(SYS_pkey_free, pkey);
- dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret);
- return ret;
-}
-
-/*
- * I had a bug where pkey bits could be set by mprotect() but
- * not cleared. This ensures we get lots of random bit sets
- * and clears on the vma and pte pkey bits.
- */
-int alloc_random_pkey(void)
-{
- int max_nr_pkey_allocs;
- int ret;
- int i;
- int alloced_pkeys[NR_PKEYS];
- int nr_alloced = 0;
- int random_index;
- memset(alloced_pkeys, 0, sizeof(alloced_pkeys));
-
- /* allocate every possible key and make a note of which ones we got */
- max_nr_pkey_allocs = NR_PKEYS;
- max_nr_pkey_allocs = 1;
- for (i = 0; i < max_nr_pkey_allocs; i++) {
- int new_pkey = alloc_pkey();
- if (new_pkey < 0)
- break;
- alloced_pkeys[nr_alloced++] = new_pkey;
- }
-
- pkey_assert(nr_alloced > 0);
- /* select a random one out of the allocated ones */
- random_index = rand() % nr_alloced;
- ret = alloced_pkeys[random_index];
- /* now zero it out so we don't free it next */
- alloced_pkeys[random_index] = 0;
-
- /* go through the allocated ones that we did not want and free them */
- for (i = 0; i < nr_alloced; i++) {
- int free_ret;
- if (!alloced_pkeys[i])
- continue;
- free_ret = sys_pkey_free(alloced_pkeys[i]);
- pkey_assert(!free_ret);
- }
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
- return ret;
-}
-
-int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
- unsigned long pkey)
-{
- int nr_iterations = random() % 100;
- int ret;
-
- while (0) {
- int rpkey = alloc_random_pkey();
- ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
- dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
- ptr, size, orig_prot, pkey, ret);
- if (nr_iterations-- < 0)
- break;
-
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
- sys_pkey_free(rpkey);
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
- }
- pkey_assert(pkey < NR_PKEYS);
-
- ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
- dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
- ptr, size, orig_prot, pkey, ret);
- pkey_assert(!ret);
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
- return ret;
-}
-
-struct pkey_malloc_record {
- void *ptr;
- long size;
- int prot;
-};
-struct pkey_malloc_record *pkey_malloc_records;
-struct pkey_malloc_record *pkey_last_malloc_record;
-long nr_pkey_malloc_records;
-void record_pkey_malloc(void *ptr, long size, int prot)
-{
- long i;
- struct pkey_malloc_record *rec = NULL;
-
- for (i = 0; i < nr_pkey_malloc_records; i++) {
- rec = &pkey_malloc_records[i];
- /* find a free record */
- if (rec)
- break;
- }
- if (!rec) {
- /* every record is full */
- size_t old_nr_records = nr_pkey_malloc_records;
- size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1);
- size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record);
- dprintf2("new_nr_records: %zd\n", new_nr_records);
- dprintf2("new_size: %zd\n", new_size);
- pkey_malloc_records = realloc(pkey_malloc_records, new_size);
- pkey_assert(pkey_malloc_records != NULL);
- rec = &pkey_malloc_records[nr_pkey_malloc_records];
- /*
- * realloc() does not initialize memory, so zero it from
- * the first new record all the way to the end.
- */
- for (i = 0; i < new_nr_records - old_nr_records; i++)
- memset(rec + i, 0, sizeof(*rec));
- }
- dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n",
- (int)(rec - pkey_malloc_records), rec, ptr, size);
- rec->ptr = ptr;
- rec->size = size;
- rec->prot = prot;
- pkey_last_malloc_record = rec;
- nr_pkey_malloc_records++;
-}
-
-void free_pkey_malloc(void *ptr)
-{
- long i;
- int ret;
- dprintf3("%s(%p)\n", __func__, ptr);
- for (i = 0; i < nr_pkey_malloc_records; i++) {
- struct pkey_malloc_record *rec = &pkey_malloc_records[i];
- dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n",
- ptr, i, rec, rec->ptr, rec->size);
- if ((ptr < rec->ptr) ||
- (ptr >= rec->ptr + rec->size))
- continue;
-
- dprintf3("found ptr %p at record[%ld/%p]: {%p, %ld}\n",
- ptr, i, rec, rec->ptr, rec->size);
- nr_pkey_malloc_records--;
- ret = munmap(rec->ptr, rec->size);
- dprintf3("munmap ret: %d\n", ret);
- pkey_assert(!ret);
- dprintf3("clearing rec->ptr, rec: %p\n", rec);
- rec->ptr = NULL;
- dprintf3("done clearing rec->ptr, rec: %p\n", rec);
- return;
- }
- pkey_assert(false);
-}
-
-
-void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
-{
- void *ptr;
- int ret;
-
- rdpkru();
- dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
- size, prot, pkey);
- pkey_assert(pkey < NR_PKEYS);
- ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
- pkey_assert(ptr != (void *)-1);
- ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey);
- pkey_assert(!ret);
- record_pkey_malloc(ptr, size, prot);
- rdpkru();
-
- dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr);
- return ptr;
-}
-
-void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
-{
- int ret;
- void *ptr;
-
- dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
- size, prot, pkey);
- /*
- * Guarantee we can fit at least one huge page in the resulting
- * allocation by allocating space for 2:
- */
- size = ALIGN_UP(size, HPAGE_SIZE * 2);
- ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
- pkey_assert(ptr != (void *)-1);
- record_pkey_malloc(ptr, size, prot);
- mprotect_pkey(ptr, size, prot, pkey);
-
- dprintf1("unaligned ptr: %p\n", ptr);
- ptr = ALIGN_PTR_UP(ptr, HPAGE_SIZE);
- dprintf1(" aligned ptr: %p\n", ptr);
- ret = madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE);
- dprintf1("MADV_HUGEPAGE ret: %d\n", ret);
- ret = madvise(ptr, HPAGE_SIZE, MADV_WILLNEED);
- dprintf1("MADV_WILLNEED ret: %d\n", ret);
- memset(ptr, 0, HPAGE_SIZE);
-
- dprintf1("mmap()'d thp for pkey %d @ %p\n", pkey, ptr);
- return ptr;
-}
-
-int hugetlb_setup_ok;
-#define GET_NR_HUGE_PAGES 10
-void setup_hugetlbfs(void)
-{
- int err;
- int fd;
- char buf[] = "123";
-
- if (geteuid() != 0) {
- fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n");
- return;
- }
-
- cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages");
-
- /*
- * Now go make sure that we got the pages and that they
- * are 2M pages. Someone might have made 1G the default.
- */
- fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY);
- if (fd < 0) {
- perror("opening sysfs 2M hugetlb config");
- return;
- }
-
- /* -1 to guarantee leaving the trailing \0 */
- err = read(fd, buf, sizeof(buf)-1);
- close(fd);
- if (err <= 0) {
- perror("reading sysfs 2M hugetlb config");
- return;
- }
-
- if (atoi(buf) != GET_NR_HUGE_PAGES) {
- fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n",
- buf, GET_NR_HUGE_PAGES);
- return;
- }
-
- hugetlb_setup_ok = 1;
-}
-
-void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
-{
- void *ptr;
- int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB;
-
- if (!hugetlb_setup_ok)
- return PTR_ERR_ENOTSUP;
-
- dprintf1("doing %s(%ld, %x, %x)\n", __func__, size, prot, pkey);
- size = ALIGN_UP(size, HPAGE_SIZE * 2);
- pkey_assert(pkey < NR_PKEYS);
- ptr = mmap(NULL, size, PROT_NONE, flags, -1, 0);
- pkey_assert(ptr != (void *)-1);
- mprotect_pkey(ptr, size, prot, pkey);
-
- record_pkey_malloc(ptr, size, prot);
-
- dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr);
- return ptr;
-}
-
-void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey)
-{
- void *ptr;
- int fd;
-
- dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
- size, prot, pkey);
- pkey_assert(pkey < NR_PKEYS);
- fd = open("/dax/foo", O_RDWR);
- pkey_assert(fd >= 0);
-
- ptr = mmap(0, size, prot, MAP_SHARED, fd, 0);
- pkey_assert(ptr != (void *)-1);
-
- mprotect_pkey(ptr, size, prot, pkey);
-
- record_pkey_malloc(ptr, size, prot);
-
- dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr);
- close(fd);
- return ptr;
-}
-
-void *(*pkey_malloc[])(long size, int prot, u16 pkey) = {
-
- malloc_pkey_with_mprotect,
- malloc_pkey_anon_huge,
- malloc_pkey_hugetlb
-/* can not do direct with the pkey_mprotect() API:
- malloc_pkey_mmap_direct,
- malloc_pkey_mmap_dax,
-*/
-};
-
-void *malloc_pkey(long size, int prot, u16 pkey)
-{
- void *ret;
- static int malloc_type;
- int nr_malloc_types = ARRAY_SIZE(pkey_malloc);
-
- pkey_assert(pkey < NR_PKEYS);
-
- while (1) {
- pkey_assert(malloc_type < nr_malloc_types);
-
- ret = pkey_malloc[malloc_type](size, prot, pkey);
- pkey_assert(ret != (void *)-1);
-
- malloc_type++;
- if (malloc_type >= nr_malloc_types)
- malloc_type = (random()%nr_malloc_types);
-
- /* try again if the malloc_type we tried is unsupported */
- if (ret == PTR_ERR_ENOTSUP)
- continue;
-
- break;
- }
-
- dprintf3("%s(%ld, prot=%x, pkey=%x) returning: %p\n", __func__,
- size, prot, pkey, ret);
- return ret;
-}
-
-int last_pkru_faults;
-#define UNKNOWN_PKEY -2
-void expected_pk_fault(int pkey)
-{
- dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n",
- __func__, last_pkru_faults, pkru_faults);
- dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey);
- pkey_assert(last_pkru_faults + 1 == pkru_faults);
-
- /*
- * For exec-only memory, we do not know the pkey in
- * advance, so skip this check.
- */
- if (pkey != UNKNOWN_PKEY)
- pkey_assert(last_si_pkey == pkey);
-
- /*
- * The signal handler shold have cleared out PKRU to let the
- * test program continue. We now have to restore it.
- */
- if (__rdpkru() != 0)
- pkey_assert(0);
-
- __wrpkru(shadow_pkru);
- dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n",
- __func__, shadow_pkru);
- last_pkru_faults = pkru_faults;
- last_si_pkey = -1;
-}
-
-#define do_not_expect_pk_fault(msg) do { \
- if (last_pkru_faults != pkru_faults) \
- dprintf0("unexpected PK fault: %s\n", msg); \
- pkey_assert(last_pkru_faults == pkru_faults); \
-} while (0)
-
-int test_fds[10] = { -1 };
-int nr_test_fds;
-void __save_test_fd(int fd)
-{
- pkey_assert(fd >= 0);
- pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds));
- test_fds[nr_test_fds] = fd;
- nr_test_fds++;
-}
-
-int get_test_read_fd(void)
-{
- int test_fd = open("/etc/passwd", O_RDONLY);
- __save_test_fd(test_fd);
- return test_fd;
-}
-
-void close_test_fds(void)
-{
- int i;
-
- for (i = 0; i < nr_test_fds; i++) {
- if (test_fds[i] < 0)
- continue;
- close(test_fds[i]);
- test_fds[i] = -1;
- }
- nr_test_fds = 0;
-}
-
-#define barrier() __asm__ __volatile__("": : :"memory")
-__attribute__((noinline)) int read_ptr(int *ptr)
-{
- /*
- * Keep GCC from optimizing this away somehow
- */
- barrier();
- return *ptr;
-}
-
-void test_read_of_write_disabled_region(int *ptr, u16 pkey)
-{
- int ptr_contents;
-
- dprintf1("disabling write access to PKEY[1], doing read\n");
- pkey_write_deny(pkey);
- ptr_contents = read_ptr(ptr);
- dprintf1("*ptr: %d\n", ptr_contents);
- dprintf1("\n");
-}
-void test_read_of_access_disabled_region(int *ptr, u16 pkey)
-{
- int ptr_contents;
-
- dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr);
- rdpkru();
- pkey_access_deny(pkey);
- ptr_contents = read_ptr(ptr);
- dprintf1("*ptr: %d\n", ptr_contents);
- expected_pk_fault(pkey);
-}
-void test_write_of_write_disabled_region(int *ptr, u16 pkey)
-{
- dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey);
- pkey_write_deny(pkey);
- *ptr = __LINE__;
- expected_pk_fault(pkey);
-}
-void test_write_of_access_disabled_region(int *ptr, u16 pkey)
-{
- dprintf1("disabling access to PKEY[%02d], doing write\n", pkey);
- pkey_access_deny(pkey);
- *ptr = __LINE__;
- expected_pk_fault(pkey);
-}
-void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
-{
- int ret;
- int test_fd = get_test_read_fd();
-
- dprintf1("disabling access to PKEY[%02d], "
- "having kernel read() to buffer\n", pkey);
- pkey_access_deny(pkey);
- ret = read(test_fd, ptr, 1);
- dprintf1("read ret: %d\n", ret);
- pkey_assert(ret);
-}
-void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey)
-{
- int ret;
- int test_fd = get_test_read_fd();
-
- pkey_write_deny(pkey);
- ret = read(test_fd, ptr, 100);
- dprintf1("read ret: %d\n", ret);
- if (ret < 0 && (DEBUG_LEVEL > 0))
- perror("verbose read result (OK for this to be bad)");
- pkey_assert(ret);
-}
-
-void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey)
-{
- int pipe_ret, vmsplice_ret;
- struct iovec iov;
- int pipe_fds[2];
-
- pipe_ret = pipe(pipe_fds);
-
- pkey_assert(pipe_ret == 0);
- dprintf1("disabling access to PKEY[%02d], "
- "having kernel vmsplice from buffer\n", pkey);
- pkey_access_deny(pkey);
- iov.iov_base = ptr;
- iov.iov_len = PAGE_SIZE;
- vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT);
- dprintf1("vmsplice() ret: %d\n", vmsplice_ret);
- pkey_assert(vmsplice_ret == -1);
-
- close(pipe_fds[0]);
- close(pipe_fds[1]);
-}
-
-void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey)
-{
- int ignored = 0xdada;
- int futex_ret;
- int some_int = __LINE__;
-
- dprintf1("disabling write to PKEY[%02d], "
- "doing futex gunk in buffer\n", pkey);
- *ptr = some_int;
- pkey_write_deny(pkey);
- futex_ret = syscall(SYS_futex, ptr, FUTEX_WAIT, some_int-1, NULL,
- &ignored, ignored);
- if (DEBUG_LEVEL > 0)
- perror("futex");
- dprintf1("futex() ret: %d\n", futex_ret);
-}
-
-/* Assumes that all pkeys other than 'pkey' are unallocated */
-void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
-{
- int err;
- int i;
-
- /* Note: 0 is the default pkey, so don't mess with it */
- for (i = 1; i < NR_PKEYS; i++) {
- if (pkey == i)
- continue;
-
- dprintf1("trying get/set/free to non-allocated pkey: %2d\n", i);
- err = sys_pkey_free(i);
- pkey_assert(err);
-
- err = sys_pkey_free(i);
- pkey_assert(err);
-
- err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, i);
- pkey_assert(err);
- }
-}
-
-/* Assumes that all pkeys other than 'pkey' are unallocated */
-void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
-{
- int err;
- int bad_pkey = NR_PKEYS+99;
-
- /* pass a known-invalid pkey in: */
- err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey);
- pkey_assert(err);
-}
-
-/* Assumes that all pkeys other than 'pkey' are unallocated */
-void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
-{
- int err;
- int allocated_pkeys[NR_PKEYS] = {0};
- int nr_allocated_pkeys = 0;
- int i;
-
- for (i = 0; i < NR_PKEYS*2; i++) {
- int new_pkey;
- dprintf1("%s() alloc loop: %d\n", __func__, i);
- new_pkey = alloc_pkey();
- dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, err, __rdpkru(), shadow_pkru);
- rdpkru(); /* for shadow checking */
- dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC);
- if ((new_pkey == -1) && (errno == ENOSPC)) {
- dprintf2("%s() failed to allocate pkey after %d tries\n",
- __func__, nr_allocated_pkeys);
- break;
- }
- pkey_assert(nr_allocated_pkeys < NR_PKEYS);
- allocated_pkeys[nr_allocated_pkeys++] = new_pkey;
- }
-
- dprintf3("%s()::%d\n", __func__, __LINE__);
-
- /*
- * ensure it did not reach the end of the loop without
- * failure:
- */
- pkey_assert(i < NR_PKEYS*2);
-
- /*
- * There are 16 pkeys supported in hardware. Three are
- * allocated by the time we get here:
- * 1. The default key (0)
- * 2. One possibly consumed by an execute-only mapping.
- * 3. One allocated by the test code and passed in via
- * 'pkey' to this function.
- * Ensure that we can allocate at least another 13 (16-3).
- */
- pkey_assert(i >= NR_PKEYS-3);
-
- for (i = 0; i < nr_allocated_pkeys; i++) {
- err = sys_pkey_free(allocated_pkeys[i]);
- pkey_assert(!err);
- rdpkru(); /* for shadow checking */
- }
-}
-
-/*
- * pkey 0 is special. It is allocated by default, so you do not
- * have to call pkey_alloc() to use it first. Make sure that it
- * is usable.
- */
-void test_mprotect_with_pkey_0(int *ptr, u16 pkey)
-{
- long size;
- int prot;
-
- assert(pkey_last_malloc_record);
- size = pkey_last_malloc_record->size;
- /*
- * This is a bit of a hack. But mprotect() requires
- * huge-page-aligned sizes when operating on hugetlbfs.
- * So, make sure that we use something that's a multiple
- * of a huge page when we can.
- */
- if (size >= HPAGE_SIZE)
- size = HPAGE_SIZE;
- prot = pkey_last_malloc_record->prot;
-
- /* Use pkey 0 */
- mprotect_pkey(ptr, size, prot, 0);
-
- /* Make sure that we can set it back to the original pkey. */
- mprotect_pkey(ptr, size, prot, pkey);
-}
-
-void test_ptrace_of_child(int *ptr, u16 pkey)
-{
- __attribute__((__unused__)) int peek_result;
- pid_t child_pid;
- void *ignored = 0;
- long ret;
- int status;
- /*
- * This is the "control" for our little expermient. Make sure
- * we can always access it when ptracing.
- */
- int *plain_ptr_unaligned = malloc(HPAGE_SIZE);
- int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE);
-
- /*
- * Fork a child which is an exact copy of this process, of course.
- * That means we can do all of our tests via ptrace() and then plain
- * memory access and ensure they work differently.
- */
- child_pid = fork_lazy_child();
- dprintf1("[%d] child pid: %d\n", getpid(), child_pid);
-
- ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored);
- if (ret)
- perror("attach");
- dprintf1("[%d] attach ret: %ld %d\n", getpid(), ret, __LINE__);
- pkey_assert(ret != -1);
- ret = waitpid(child_pid, &status, WUNTRACED);
- if ((ret != child_pid) || !(WIFSTOPPED(status))) {
- fprintf(stderr, "weird waitpid result %ld stat %x\n",
- ret, status);
- pkey_assert(0);
- }
- dprintf2("waitpid ret: %ld\n", ret);
- dprintf2("waitpid status: %d\n", status);
-
- pkey_access_deny(pkey);
- pkey_write_deny(pkey);
-
- /* Write access, untested for now:
- ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data);
- pkey_assert(ret != -1);
- dprintf1("poke at %p: %ld\n", peek_at, ret);
- */
-
- /*
- * Try to access the pkey-protected "ptr" via ptrace:
- */
- ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored);
- /* expect it to work, without an error: */
- pkey_assert(ret != -1);
- /* Now access from the current task, and expect an exception: */
- peek_result = read_ptr(ptr);
- expected_pk_fault(pkey);
-
- /*
- * Try to access the NON-pkey-protected "plain_ptr" via ptrace:
- */
- ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored);
- /* expect it to work, without an error: */
- pkey_assert(ret != -1);
- /* Now access from the current task, and expect NO exception: */
- peek_result = read_ptr(plain_ptr);
- do_not_expect_pk_fault("read plain pointer after ptrace");
-
- ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0);
- pkey_assert(ret != -1);
-
- ret = kill(child_pid, SIGKILL);
- pkey_assert(ret != -1);
-
- wait(&status);
-
- free(plain_ptr_unaligned);
-}
-
-void *get_pointer_to_instructions(void)
-{
- void *p1;
-
- p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE);
- dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write);
- /* lots_o_noops_around_write should be page-aligned already */
- assert(p1 == &lots_o_noops_around_write);
-
- /* Point 'p1' at the *second* page of the function: */
- p1 += PAGE_SIZE;
-
- /*
- * Try to ensure we fault this in on next touch to ensure
- * we get an instruction fault as opposed to a data one
- */
- madvise(p1, PAGE_SIZE, MADV_DONTNEED);
-
- return p1;
-}
-
-void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
-{
- void *p1;
- int scratch;
- int ptr_contents;
- int ret;
-
- p1 = get_pointer_to_instructions();
- lots_o_noops_around_write(&scratch);
- ptr_contents = read_ptr(p1);
- dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
-
- ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey);
- pkey_assert(!ret);
- pkey_access_deny(pkey);
-
- dprintf2("pkru: %x\n", rdpkru());
-
- /*
- * Make sure this is an *instruction* fault
- */
- madvise(p1, PAGE_SIZE, MADV_DONTNEED);
- lots_o_noops_around_write(&scratch);
- do_not_expect_pk_fault("executing on PROT_EXEC memory");
- ptr_contents = read_ptr(p1);
- dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
- expected_pk_fault(pkey);
-}
-
-void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
-{
- void *p1;
- int scratch;
- int ptr_contents;
- int ret;
-
- dprintf1("%s() start\n", __func__);
-
- p1 = get_pointer_to_instructions();
- lots_o_noops_around_write(&scratch);
- ptr_contents = read_ptr(p1);
- dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
-
- /* Use a *normal* mprotect(), not mprotect_pkey(): */
- ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
- pkey_assert(!ret);
-
- dprintf2("pkru: %x\n", rdpkru());
-
- /* Make sure this is an *instruction* fault */
- madvise(p1, PAGE_SIZE, MADV_DONTNEED);
- lots_o_noops_around_write(&scratch);
- do_not_expect_pk_fault("executing on PROT_EXEC memory");
- ptr_contents = read_ptr(p1);
- dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
- expected_pk_fault(UNKNOWN_PKEY);
-
- /*
- * Put the memory back to non-PROT_EXEC. Should clear the
- * exec-only pkey off the VMA and allow it to be readable
- * again. Go to PROT_NONE first to check for a kernel bug
- * that did not clear the pkey when doing PROT_NONE.
- */
- ret = mprotect(p1, PAGE_SIZE, PROT_NONE);
- pkey_assert(!ret);
-
- ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC);
- pkey_assert(!ret);
- ptr_contents = read_ptr(p1);
- do_not_expect_pk_fault("plain read on recently PROT_EXEC area");
-}
-
-void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
-{
- int size = PAGE_SIZE;
- int sret;
-
- if (cpu_has_pku()) {
- dprintf1("SKIP: %s: no CPU support\n", __func__);
- return;
- }
-
- sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey);
- pkey_assert(sret < 0);
-}
-
-void (*pkey_tests[])(int *ptr, u16 pkey) = {
- test_read_of_write_disabled_region,
- test_read_of_access_disabled_region,
- test_write_of_write_disabled_region,
- test_write_of_access_disabled_region,
- test_kernel_write_of_access_disabled_region,
- test_kernel_write_of_write_disabled_region,
- test_kernel_gup_of_access_disabled_region,
- test_kernel_gup_write_to_write_disabled_region,
- test_executing_on_unreadable_memory,
- test_implicit_mprotect_exec_only_memory,
- test_mprotect_with_pkey_0,
- test_ptrace_of_child,
- test_pkey_syscalls_on_non_allocated_pkey,
- test_pkey_syscalls_bad_args,
- test_pkey_alloc_exhaust,
-};
-
-void run_tests_once(void)
-{
- int *ptr;
- int prot = PROT_READ|PROT_WRITE;
-
- for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) {
- int pkey;
- int orig_pkru_faults = pkru_faults;
-
- dprintf1("======================\n");
- dprintf1("test %d preparing...\n", test_nr);
-
- tracing_on();
- pkey = alloc_random_pkey();
- dprintf1("test %d starting with pkey: %d\n", test_nr, pkey);
- ptr = malloc_pkey(PAGE_SIZE, prot, pkey);
- dprintf1("test %d starting...\n", test_nr);
- pkey_tests[test_nr](ptr, pkey);
- dprintf1("freeing test memory: %p\n", ptr);
- free_pkey_malloc(ptr);
- sys_pkey_free(pkey);
-
- dprintf1("pkru_faults: %d\n", pkru_faults);
- dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults);
-
- tracing_off();
- close_test_fds();
-
- printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr);
- dprintf1("======================\n\n");
- }
- iteration_nr++;
-}
-
-void pkey_setup_shadow(void)
-{
- shadow_pkru = __rdpkru();
-}
-
-int main(void)
-{
- int nr_iterations = 22;
-
- setup_handlers();
-
- printf("has pku: %d\n", cpu_has_pku());
-
- if (!cpu_has_pku()) {
- int size = PAGE_SIZE;
- int *ptr;
-
- printf("running PKEY tests for unsupported CPU/OS\n");
-
- ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
- assert(ptr != (void *)-1);
- test_mprotect_pkey_on_unsupported_cpu(ptr, 1);
- exit(0);
- }
-
- pkey_setup_shadow();
- printf("startup pkru: %x\n", rdpkru());
- setup_hugetlbfs();
-
- while (nr_iterations-- > 0)
- run_tests_once();
-
- printf("done (all tests OK)\n");
- return 0;
-}
--
1.7.1
^ permalink raw reply related
* [PATCH v13 00/24] selftests, powerpc, x86 : Memory Protection Keys
From: Ram Pai @ 2018-06-14 0:44 UTC (permalink / raw)
To: shuahkh, linux-kselftest
Cc: mpe, linuxppc-dev, linux-mm, x86, linux-arch, mingo, dave.hansen,
mhocko, bauerman, linuxram, fweimer, msuchanek, aneesh.kumar
Memory protection keys enables an application to protect its address space from
inadvertent access by its own code.
This feature is now enabled on powerpc architecture and integrated in
4.16-rc1. The patches move the selftests to arch neutral directory
and enhance their test coverage.
Test
----
Verified for correctness on powerpc. Need help verifying on x86.
Compiles on x86.
History:
-------
version 13:
(1) Incorporated comments for Dave Hansen.
(2) Added one more test for correct pkey-0 behavior.
version 12:
(1) fixed the offset of pkey field in the siginfo structure for
x86_64 and powerpc. And tries to use the actual field
if the headers have it defined.
version 11:
(1) fixed a deadlock in the ptrace testcase.
version 10 and prior:
(1) moved the testcase to arch neutral directory
(2) split the changes into incremental patches.
Ram Pai (22):
selftests/x86: Move protecton key selftest to arch neutral directory
selftests/vm: rename all references to pkru to a generic name
selftests/vm: move generic definitions to header file
selftests/vm: typecast the pkey register
selftests/vm: generic function to handle shadow key register
selftests/vm: fix the wrong assert in pkey_disable_set()
selftests/vm: fixed bugs in pkey_disable_clear()
selftests/vm: clear the bits in shadow reg when a pkey is freed.
selftests/vm: fix alloc_random_pkey() to make it really random
selftests/vm: introduce two arch independent abstraction
selftests/vm: pkey register should match shadow pkey
selftests/vm: generic cleanup
selftests/vm: powerpc implementation for generic abstraction
selftests/vm: clear the bits in shadow reg when a pkey is freed.
selftests/vm: powerpc implementation to check support for pkey
selftests/vm: fix an assertion in test_pkey_alloc_exhaust()
selftests/vm: associate key on a mapped page and detect access
violation
selftests/vm: associate key on a mapped page and detect write
violation
selftests/vm: detect write violation on a mapped access-denied-key
page
selftests/vm: testcases must restore pkey-permissions
selftests/vm: sub-page allocator
selftests/vm: test correct behavior of pkey-0
Thiago Jung Bauermann (2):
selftests/vm: move arch-specific definitions to arch-specific header
selftests/vm: Make gcc check arguments of sigsafe_printf()
tools/testing/selftests/vm/.gitignore | 1 +
tools/testing/selftests/vm/Makefile | 1 +
tools/testing/selftests/vm/pkey-helpers.h | 214 ++++
tools/testing/selftests/vm/pkey-powerpc.h | 126 ++
tools/testing/selftests/vm/pkey-x86.h | 184 +++
tools/testing/selftests/vm/protection_keys.c | 1598 +++++++++++++++++++++++++
tools/testing/selftests/x86/.gitignore | 1 -
tools/testing/selftests/x86/pkey-helpers.h | 219 ----
tools/testing/selftests/x86/protection_keys.c | 1485 -----------------------
9 files changed, 2124 insertions(+), 1705 deletions(-)
create mode 100644 tools/testing/selftests/vm/pkey-helpers.h
create mode 100644 tools/testing/selftests/vm/pkey-powerpc.h
create mode 100644 tools/testing/selftests/vm/pkey-x86.h
create mode 100644 tools/testing/selftests/vm/protection_keys.c
delete mode 100644 tools/testing/selftests/x86/pkey-helpers.h
delete mode 100644 tools/testing/selftests/x86/protection_keys.c
^ permalink raw reply
* Re: [PATCH] powerpc/64s/radix: Fix MADV_[FREE|DONTNEED] TLB flush miss problem with THP
From: kbuild test robot @ 2018-06-14 0:35 UTC (permalink / raw)
To: Nicholas Piggin
Cc: kbuild-all, linuxppc-dev, Aneesh Kumar K . V, Nicholas Piggin
In-Reply-To: <20180613095858.31078-1-npiggin@gmail.com>
[-- Attachment #1: Type: text/plain, Size: 20268 bytes --]
Hi Nicholas,
I love your patch! Yet something to improve:
[auto build test ERROR on powerpc/next]
[also build test ERROR on next-20180613]
[cannot apply to v4.17]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Nicholas-Piggin/powerpc-64s-radix-Fix-MADV_-FREE-DONTNEED-TLB-flush-miss-problem-with-THP/20180613-180928
base: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-cell_defconfig (attached as .config)
compiler: powerpc64-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
GCC_VERSION=7.2.0 make.cross ARCH=powerpc
All error/warnings (new ones prefixed by >>):
In file included from include/asm-generic/bug.h:5:0,
from arch/powerpc/include/asm/bug.h:128,
from include/linux/bug.h:5,
from include/linux/mmdebug.h:5,
from include/linux/mm.h:9,
from arch/powerpc/mm/tlb-radix.c:12:
In function '__radix__flush_tlb_range',
inlined from 'radix__tlb_flush' at arch/powerpc/mm/tlb-radix.c:898:3:
>> include/linux/compiler.h:339:38: error: call to '__compiletime_assert_745' declared with attribute error: BUILD_BUG failed
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
^
include/linux/compiler.h:319:4: note: in definition of macro '__compiletime_assert'
prefix ## suffix(); \
^~~~~~
include/linux/compiler.h:339:2: note: in expansion of macro '_compiletime_assert'
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
^~~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:45:37: note: in expansion of macro 'compiletime_assert'
#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
^~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:79:21: note: in expansion of macro 'BUILD_BUG_ON_MSG'
#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
^~~~~~~~~~~~~~~~
>> include/linux/huge_mm.h:251:27: note: in expansion of macro 'BUILD_BUG'
#define HPAGE_PMD_SIZE ({ BUILD_BUG(); 0; })
^~~~~~~~~
>> arch/powerpc/mm/tlb-radix.c:745:22: note: in expansion of macro 'HPAGE_PMD_SIZE'
hstart = (start + HPAGE_PMD_SIZE - 1) & HPAGE_PMD_MASK;
^~~~~~~~~~~~~~
>> include/linux/compiler.h:339:38: error: call to '__compiletime_assert_745' declared with attribute error: BUILD_BUG failed
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
^
include/linux/compiler.h:319:4: note: in definition of macro '__compiletime_assert'
prefix ## suffix(); \
^~~~~~
include/linux/compiler.h:339:2: note: in expansion of macro '_compiletime_assert'
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
^~~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:45:37: note: in expansion of macro 'compiletime_assert'
#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
^~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:79:21: note: in expansion of macro 'BUILD_BUG_ON_MSG'
#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
^~~~~~~~~~~~~~~~
include/linux/huge_mm.h:250:27: note: in expansion of macro 'BUILD_BUG'
#define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
^~~~~~~~~
>> arch/powerpc/mm/tlb-radix.c:745:44: note: in expansion of macro 'HPAGE_PMD_MASK'
hstart = (start + HPAGE_PMD_SIZE - 1) & HPAGE_PMD_MASK;
^~~~~~~~~~~~~~
include/linux/compiler.h:339:38: error: call to '__compiletime_assert_746' declared with attribute error: BUILD_BUG failed
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
^
include/linux/compiler.h:319:4: note: in definition of macro '__compiletime_assert'
prefix ## suffix(); \
^~~~~~
include/linux/compiler.h:339:2: note: in expansion of macro '_compiletime_assert'
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
^~~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:45:37: note: in expansion of macro 'compiletime_assert'
#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
^~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:79:21: note: in expansion of macro 'BUILD_BUG_ON_MSG'
#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
^~~~~~~~~~~~~~~~
include/linux/huge_mm.h:250:27: note: in expansion of macro 'BUILD_BUG'
#define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
^~~~~~~~~
arch/powerpc/mm/tlb-radix.c:746:17: note: in expansion of macro 'HPAGE_PMD_MASK'
hend = end & HPAGE_PMD_MASK;
^~~~~~~~~~~~~~
include/linux/compiler.h:339:38: error: call to '__compiletime_assert_752' declared with attribute error: BUILD_BUG failed
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
^
include/linux/compiler.h:319:4: note: in definition of macro '__compiletime_assert'
prefix ## suffix(); \
^~~~~~
include/linux/compiler.h:339:2: note: in expansion of macro '_compiletime_assert'
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
^~~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:45:37: note: in expansion of macro 'compiletime_assert'
#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
^~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:79:21: note: in expansion of macro 'BUILD_BUG_ON_MSG'
#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
^~~~~~~~~~~~~~~~
include/linux/huge_mm.h:255:27: note: in expansion of macro 'BUILD_BUG'
#define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; })
^~~~~~~~~
>> arch/powerpc/mm/tlb-radix.c:752:22: note: in expansion of macro 'HPAGE_PUD_SIZE'
gstart = (start + HPAGE_PUD_SIZE - 1) & HPAGE_PUD_MASK;
^~~~~~~~~~~~~~
include/linux/compiler.h:339:38: error: call to '__compiletime_assert_752' declared with attribute error: BUILD_BUG failed
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
^
include/linux/compiler.h:319:4: note: in definition of macro '__compiletime_assert'
prefix ## suffix(); \
^~~~~~
include/linux/compiler.h:339:2: note: in expansion of macro '_compiletime_assert'
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
^~~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:45:37: note: in expansion of macro 'compiletime_assert'
#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
^~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:79:21: note: in expansion of macro 'BUILD_BUG_ON_MSG'
#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
^~~~~~~~~~~~~~~~
include/linux/huge_mm.h:254:27: note: in expansion of macro 'BUILD_BUG'
#define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; })
^~~~~~~~~
>> arch/powerpc/mm/tlb-radix.c:752:44: note: in expansion of macro 'HPAGE_PUD_MASK'
gstart = (start + HPAGE_PUD_SIZE - 1) & HPAGE_PUD_MASK;
^~~~~~~~~~~~~~
include/linux/compiler.h:339:38: error: call to '__compiletime_assert_753' declared with attribute error: BUILD_BUG failed
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
^
include/linux/compiler.h:319:4: note: in definition of macro '__compiletime_assert'
prefix ## suffix(); \
^~~~~~
include/linux/compiler.h:339:2: note: in expansion of macro '_compiletime_assert'
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
^~~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:45:37: note: in expansion of macro 'compiletime_assert'
#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
^~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:79:21: note: in expansion of macro 'BUILD_BUG_ON_MSG'
#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
^~~~~~~~~~~~~~~~
include/linux/huge_mm.h:254:27: note: in expansion of macro 'BUILD_BUG'
#define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; })
^~~~~~~~~
arch/powerpc/mm/tlb-radix.c:753:17: note: in expansion of macro 'HPAGE_PUD_MASK'
gend = end & HPAGE_PUD_MASK;
^~~~~~~~~~~~~~
>> include/linux/compiler.h:339:38: error: call to '__compiletime_assert_745' declared with attribute error: BUILD_BUG failed
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
^
include/linux/compiler.h:319:4: note: in definition of macro '__compiletime_assert'
prefix ## suffix(); \
^~~~~~
include/linux/compiler.h:339:2: note: in expansion of macro '_compiletime_assert'
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
^~~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:45:37: note: in expansion of macro 'compiletime_assert'
#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
^~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:79:21: note: in expansion of macro 'BUILD_BUG_ON_MSG'
#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
^~~~~~~~~~~~~~~~
>> include/linux/huge_mm.h:251:27: note: in expansion of macro 'BUILD_BUG'
#define HPAGE_PMD_SIZE ({ BUILD_BUG(); 0; })
^~~~~~~~~
>> arch/powerpc/mm/tlb-radix.c:745:22: note: in expansion of macro 'HPAGE_PMD_SIZE'
hstart = (start + HPAGE_PMD_SIZE - 1) & HPAGE_PMD_MASK;
^~~~~~~~~~~~~~
>> include/linux/compiler.h:339:38: error: call to '__compiletime_assert_745' declared with attribute error: BUILD_BUG failed
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
^
include/linux/compiler.h:319:4: note: in definition of macro '__compiletime_assert'
prefix ## suffix(); \
^~~~~~
include/linux/compiler.h:339:2: note: in expansion of macro '_compiletime_assert'
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
^~~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:45:37: note: in expansion of macro 'compiletime_assert'
#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
^~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:79:21: note: in expansion of macro 'BUILD_BUG_ON_MSG'
#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
^~~~~~~~~~~~~~~~
include/linux/huge_mm.h:250:27: note: in expansion of macro 'BUILD_BUG'
#define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
^~~~~~~~~
>> arch/powerpc/mm/tlb-radix.c:745:44: note: in expansion of macro 'HPAGE_PMD_MASK'
hstart = (start + HPAGE_PMD_SIZE - 1) & HPAGE_PMD_MASK;
^~~~~~~~~~~~~~
include/linux/compiler.h:339:38: error: call to '__compiletime_assert_746' declared with attribute error: BUILD_BUG failed
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
^
include/linux/compiler.h:319:4: note: in definition of macro '__compiletime_assert'
prefix ## suffix(); \
^~~~~~
include/linux/compiler.h:339:2: note: in expansion of macro '_compiletime_assert'
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
^~~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:45:37: note: in expansion of macro 'compiletime_assert'
#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
^~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:79:21: note: in expansion of macro 'BUILD_BUG_ON_MSG'
#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
^~~~~~~~~~~~~~~~
include/linux/huge_mm.h:250:27: note: in expansion of macro 'BUILD_BUG'
#define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
^~~~~~~~~
arch/powerpc/mm/tlb-radix.c:746:17: note: in expansion of macro 'HPAGE_PMD_MASK'
hend = end & HPAGE_PMD_MASK;
^~~~~~~~~~~~~~
include/linux/compiler.h:339:38: error: call to '__compiletime_assert_752' declared with attribute error: BUILD_BUG failed
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
^
include/linux/compiler.h:319:4: note: in definition of macro '__compiletime_assert'
prefix ## suffix(); \
^~~~~~
include/linux/compiler.h:339:2: note: in expansion of macro '_compiletime_assert'
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
^~~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:45:37: note: in expansion of macro 'compiletime_assert'
#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
^~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:79:21: note: in expansion of macro 'BUILD_BUG_ON_MSG'
#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
^~~~~~~~~~~~~~~~
include/linux/huge_mm.h:255:27: note: in expansion of macro 'BUILD_BUG'
#define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; })
^~~~~~~~~
>> arch/powerpc/mm/tlb-radix.c:752:22: note: in expansion of macro 'HPAGE_PUD_SIZE'
gstart = (start + HPAGE_PUD_SIZE - 1) & HPAGE_PUD_MASK;
^~~~~~~~~~~~~~
include/linux/compiler.h:339:38: error: call to '__compiletime_assert_752' declared with attribute error: BUILD_BUG failed
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
^
include/linux/compiler.h:319:4: note: in definition of macro '__compiletime_assert'
prefix ## suffix(); \
^~~~~~
include/linux/compiler.h:339:2: note: in expansion of macro '_compiletime_assert'
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
^~~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:45:37: note: in expansion of macro 'compiletime_assert'
#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
^~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:79:21: note: in expansion of macro 'BUILD_BUG_ON_MSG'
#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
^~~~~~~~~~~~~~~~
include/linux/huge_mm.h:254:27: note: in expansion of macro 'BUILD_BUG'
#define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; })
^~~~~~~~~
>> arch/powerpc/mm/tlb-radix.c:752:44: note: in expansion of macro 'HPAGE_PUD_MASK'
gstart = (start + HPAGE_PUD_SIZE - 1) & HPAGE_PUD_MASK;
^~~~~~~~~~~~~~
include/linux/compiler.h:339:38: error: call to '__compiletime_assert_753' declared with attribute error: BUILD_BUG failed
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
^
include/linux/compiler.h:319:4: note: in definition of macro '__compiletime_assert'
prefix ## suffix(); \
^~~~~~
include/linux/compiler.h:339:2: note: in expansion of macro '_compiletime_assert'
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
^~~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:45:37: note: in expansion of macro 'compiletime_assert'
#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
^~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:79:21: note: in expansion of macro 'BUILD_BUG_ON_MSG'
#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
^~~~~~~~~~~~~~~~
include/linux/huge_mm.h:254:27: note: in expansion of macro 'BUILD_BUG'
#define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; })
^~~~~~~~~
arch/powerpc/mm/tlb-radix.c:753:17: note: in expansion of macro 'HPAGE_PUD_MASK'
gend = end & HPAGE_PUD_MASK;
^~~~~~~~~~~~~~
vim +/HPAGE_PMD_SIZE +745 arch/powerpc/mm/tlb-radix.c
691
692 static inline void __radix__flush_tlb_range(struct mm_struct *mm,
693 unsigned long start, unsigned long end,
694 bool flush_all_sizes)
695
696 {
697 unsigned long pid;
698 unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
699 unsigned long page_size = 1UL << page_shift;
700 unsigned long nr_pages = (end - start) >> page_shift;
701 bool local, full;
702
703 pid = mm->context.id;
704 if (unlikely(pid == MMU_NO_CONTEXT))
705 return;
706
707 preempt_disable();
708 smp_mb(); /* see radix__flush_tlb_mm */
709 if (!mm_is_thread_local(mm)) {
710 if (unlikely(mm_is_singlethreaded(mm))) {
711 if (end != TLB_FLUSH_ALL) {
712 exit_flush_lazy_tlbs(mm);
713 goto is_local;
714 }
715 }
716 local = false;
717 full = (end == TLB_FLUSH_ALL ||
718 nr_pages > tlb_single_page_flush_ceiling);
719 } else {
720 is_local:
721 local = true;
722 full = (end == TLB_FLUSH_ALL ||
723 nr_pages > tlb_local_single_page_flush_ceiling);
724 }
725
726 if (full) {
727 if (local) {
728 _tlbiel_pid(pid, RIC_FLUSH_TLB);
729 } else {
730 if (mm_needs_flush_escalation(mm))
731 _tlbie_pid(pid, RIC_FLUSH_ALL);
732 else
733 _tlbie_pid(pid, RIC_FLUSH_TLB);
734 }
735 } else {
736 bool hflush = flush_all_sizes;
737 bool gflush = flush_all_sizes;
738 unsigned long hstart, hend;
739 unsigned long gstart, gend;
740
741 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
742 hflush = true;
743
744 if (hflush) {
> 745 hstart = (start + HPAGE_PMD_SIZE - 1) & HPAGE_PMD_MASK;
746 hend = end & HPAGE_PMD_MASK;
747 if (hstart == hend)
748 hflush = false;
749 }
750
751 if (gflush) {
> 752 gstart = (start + HPAGE_PUD_SIZE - 1) & HPAGE_PUD_MASK;
753 gend = end & HPAGE_PUD_MASK;
754 if (gstart == gend)
755 gflush = false;
756 }
757
758 asm volatile("ptesync": : :"memory");
759 if (local) {
760 __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
761 if (hflush)
762 __tlbiel_va_range(hstart, hend, pid,
763 HPAGE_PMD_SIZE, MMU_PAGE_2M);
764 if (gflush)
765 __tlbiel_va_range(gstart, gend, pid,
766 HPAGE_PUD_SIZE, MMU_PAGE_1G);
767 asm volatile("ptesync": : :"memory");
768 } else {
769 __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
770 if (hflush)
771 __tlbie_va_range(hstart, hend, pid,
772 HPAGE_PMD_SIZE, MMU_PAGE_2M);
773 if (gflush)
774 __tlbie_va_range(gstart, gend, pid,
775 HPAGE_PUD_SIZE, MMU_PAGE_1G);
776 fixup_tlbie();
777 asm volatile("eieio; tlbsync; ptesync": : :"memory");
778 }
779 }
780 preempt_enable();
781 }
782
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 18788 bytes --]
^ permalink raw reply
* [PATCH v2 6/6] powerpc/pkeys: Deny read/write/execute by default
From: Ram Pai @ 2018-06-14 0:29 UTC (permalink / raw)
To: mpe
Cc: linuxppc-dev, dave.hansen, aneesh.kumar, bsingharora, hbabu,
mhocko, bauerman, linuxram, Ulrich.Weigand, fweimer, luto,
msuchanek
In-Reply-To: <1528936144-6696-1-git-send-email-linuxram@us.ibm.com>
Deny all permissions on all keys, with some exceptions. pkey-0 must
allow all permissions, or else everything comes to a screaching halt.
Execute-only key must allow execute permission.
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
---
arch/powerpc/mm/pkeys.c | 10 ++++------
1 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c
index 9098605..cec990c 100644
--- a/arch/powerpc/mm/pkeys.c
+++ b/arch/powerpc/mm/pkeys.c
@@ -128,13 +128,11 @@ int pkey_initialize(void)
/* register mask is in BE format */
pkey_amr_mask = ~0x0ul;
- pkey_iamr_mask = ~0x0ul;
+ pkey_amr_mask &= ~(0x3ul << pkeyshift(PKEY_0));
- for (i = 0; i < (pkeys_total - os_reserved); i++) {
- pkey_amr_mask &= ~(0x3ul << pkeyshift(i));
- pkey_iamr_mask &= ~(0x1ul << pkeyshift(i));
- }
- pkey_amr_mask |= (AMR_RD_BIT|AMR_WR_BIT) << pkeyshift(EXECUTE_ONLY_KEY);
+ pkey_iamr_mask = ~0x0ul;
+ pkey_iamr_mask &= ~(0x3ul << pkeyshift(PKEY_0));
+ pkey_iamr_mask &= ~(0x3ul << pkeyshift(EXECUTE_ONLY_KEY));
pkey_uamor_mask = ~0x0ul;
pkey_uamor_mask &= ~(0x3ul << pkeyshift(PKEY_0));
--
1.7.1
^ permalink raw reply related
* [PATCH v2 5/6] powerpc/pkeys: make protection key 0 less special
From: Ram Pai @ 2018-06-14 0:29 UTC (permalink / raw)
To: mpe
Cc: linuxppc-dev, dave.hansen, aneesh.kumar, bsingharora, hbabu,
mhocko, bauerman, linuxram, Ulrich.Weigand, fweimer, luto,
msuchanek
In-Reply-To: <1528936144-6696-1-git-send-email-linuxram@us.ibm.com>
Applications need the ability to associate an address-range with some
key and latter revert to its initial default key. Pkey-0 comes close to
providing this function but falls short, because the current
implementation disallows applications to explicitly associate pkey-0 to
the address range.
Lets make pkey-0 less special and treat it almost like any other key.
Thus it can be explicitly associated with any address range, and can be
freed. This gives the application more flexibility and power. The
ability to free pkey-0 must be used responsibily, since pkey-0 is
associated with almost all address-range by default.
Even with this change pkey-0 continues to be slightly more special
from the following point of view.
(a) it is implicitly allocated.
(b) it is the default key assigned to any address-range.
(c) its permissions cannot be modified by userspace.
NOTE: (c) is specific to powerpc only. pkey-0 is associated by default
with all pages including kernel pages, and pkeys are also active in
kernel mode. If any permission is denied on pkey-0, the kernel running
in the context of the application will be unable to operate.
Tested on powerpc.
cc: Thomas Gleixner <tglx@linutronix.de>
cc: Dave Hansen <dave.hansen@intel.com>
cc: Michael Ellermen <mpe@ellerman.id.au>
cc: Ingo Molnar <mingo@kernel.org>
cc: Andrew Morton <akpm@linux-foundation.org>
cc: Thiago Jung Bauermann <bauerman@linux.ibm.com>
cc: Michal Such谩nek <msuchanek@suse.de
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
-----------------------------------------------------------------------
History:
v5: . no changes since version.
v4: . introduced PKEY_0 macro. No bug fixes. Code
re-arrangement to save a few cycles.
v3: . Corrected a comment in arch_set_user_pkey_access(). .
Clarified the header, to capture the notion that pkey-0
permissions cannot be modified by userspace on powerpc.
-- comment from Thiago
v2: . mm_pkey_is_allocated() continued to treat pkey-0 special.
fixed it.
---
arch/powerpc/include/asm/pkeys.h | 29 +++++++++++++++++++++++------
arch/powerpc/mm/pkeys.c | 13 ++++++-------
2 files changed, 29 insertions(+), 13 deletions(-)
diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h
index 5ba80cf..c824528 100644
--- a/arch/powerpc/include/asm/pkeys.h
+++ b/arch/powerpc/include/asm/pkeys.h
@@ -13,7 +13,10 @@
DECLARE_STATIC_KEY_TRUE(pkey_disabled);
extern int pkeys_total; /* total pkeys as per device tree */
-extern u32 initial_allocation_mask; /* bits set for reserved keys */
+extern u32 initial_allocation_mask; /* bits set for the initially allocated keys */
+extern u32 reserved_allocation_mask; /* bits set for reserved keys */
+
+#define PKEY_0 0
#define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | \
VM_PKEY_BIT3 | VM_PKEY_BIT4)
@@ -83,15 +86,19 @@ static inline u16 pte_to_pkey_bits(u64 pteflags)
#define __mm_pkey_is_allocated(mm, pkey) \
(mm_pkey_allocation_map(mm) & pkey_alloc_mask(pkey))
-#define __mm_pkey_is_reserved(pkey) (initial_allocation_mask & \
+#define __mm_pkey_is_reserved(pkey) (reserved_allocation_mask & \
pkey_alloc_mask(pkey))
static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
{
- /* A reserved key is never considered as 'explicitly allocated' */
- return ((pkey < arch_max_pkey()) &&
- !__mm_pkey_is_reserved(pkey) &&
- __mm_pkey_is_allocated(mm, pkey));
+ if (pkey < 0 || pkey >= arch_max_pkey())
+ return false;
+
+ /* Reserved keys are never allocated. */
+ if (__mm_pkey_is_reserved(pkey))
+ return false;
+
+ return __mm_pkey_is_allocated(mm, pkey);
}
extern void __arch_activate_pkey(int pkey);
@@ -187,6 +194,16 @@ static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
{
if (static_branch_likely(&pkey_disabled))
return -EINVAL;
+
+ /*
+ * userspace should not change pkey-0 permissions.
+ * pkey-0 is associated with every page in the kernel.
+ * If userspace denies any permission on pkey-0, the
+ * kernel cannot operate.
+ */
+ if (pkey == PKEY_0)
+ return init_val ? -EINVAL : 0;
+
return __arch_set_user_pkey_access(tsk, pkey, init_val);
}
diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c
index 1f2389f..9098605 100644
--- a/arch/powerpc/mm/pkeys.c
+++ b/arch/powerpc/mm/pkeys.c
@@ -14,7 +14,8 @@
bool pkey_execute_disable_supported;
int pkeys_total; /* Total pkeys as per device tree */
bool pkeys_devtree_defined; /* pkey property exported by device tree */
-u32 initial_allocation_mask; /* Bits set for reserved keys */
+u32 initial_allocation_mask; /* Bits set for the initially allocated keys */
+u32 reserved_allocation_mask; /* Bits set for reserved keys */
u64 pkey_amr_mask; /* Bits in AMR not to be touched */
u64 pkey_iamr_mask; /* Bits in AMR not to be touched */
u64 pkey_uamor_mask; /* Bits in UMOR not to be touched */
@@ -121,8 +122,9 @@ int pkey_initialize(void)
#else
os_reserved = 0;
#endif
- initial_allocation_mask = (0x1 << 0) | (0x1 << 1) |
- (0x1 << EXECUTE_ONLY_KEY);
+ /* Bits are in LE format. */
+ reserved_allocation_mask = (0x1 << 1) | (0x1 << EXECUTE_ONLY_KEY);
+ initial_allocation_mask = reserved_allocation_mask | (0x1 << PKEY_0);
/* register mask is in BE format */
pkey_amr_mask = ~0x0ul;
@@ -135,7 +137,7 @@ int pkey_initialize(void)
pkey_amr_mask |= (AMR_RD_BIT|AMR_WR_BIT) << pkeyshift(EXECUTE_ONLY_KEY);
pkey_uamor_mask = ~0x0ul;
- pkey_uamor_mask &= ~(0x3ul << pkeyshift(0));
+ pkey_uamor_mask &= ~(0x3ul << pkeyshift(PKEY_0));
pkey_uamor_mask &= ~(0x3ul << pkeyshift(EXECUTE_ONLY_KEY));
for (i = (pkeys_total - os_reserved); i < pkeys_total; i++)
@@ -374,9 +376,6 @@ static bool pkey_access_permitted(int pkey, bool write, bool execute)
int pkey_shift;
u64 amr;
- if (!pkey)
- return true;
-
if (!is_pkey_enabled(pkey))
return true;
--
1.7.1
^ permalink raw reply related
* [PATCH v2 4/6] powerpc/pkeys: Preallocate execute-only key
From: Ram Pai @ 2018-06-14 0:29 UTC (permalink / raw)
To: mpe
Cc: linuxppc-dev, dave.hansen, aneesh.kumar, bsingharora, hbabu,
mhocko, bauerman, linuxram, Ulrich.Weigand, fweimer, luto,
msuchanek
In-Reply-To: <1528936144-6696-1-git-send-email-linuxram@us.ibm.com>
execute-only key is allocated dynamically. This is a problem. When a
thread implicitly creates a execute-only key, and resets UAMOR for that
key, the UAMOR value does not percolate to all the other threads. Any
other thread may ignorantly change the permissions on the key. This can
cause the key to be not execute-only for that thread.
Preallocate the execute-only key and ensure that no thread can change
the permission of the key, by resetting the corresponding bit in UAMOR.
CC: Andy Lutomirski <luto@kernel.org>
CC: Florian Weimer <fweimer@redhat.com>
CC: Thiago Jung Bauermann <bauerman@linux.ibm.com>
CC: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
---
arch/powerpc/mm/pkeys.c | 53 +++++++---------------------------------------
1 files changed, 8 insertions(+), 45 deletions(-)
diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c
index b681bec..1f2389f 100644
--- a/arch/powerpc/mm/pkeys.c
+++ b/arch/powerpc/mm/pkeys.c
@@ -25,6 +25,7 @@
#define IAMR_EX_BIT 0x1UL
#define PKEY_REG_BITS (sizeof(u64)*8)
#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey+1) * AMR_BITS_PER_PKEY))
+#define EXECUTE_ONLY_KEY 2
static void scan_pkey_feature(void)
{
@@ -120,7 +121,8 @@ int pkey_initialize(void)
#else
os_reserved = 0;
#endif
- initial_allocation_mask = (0x1 << 0) | (0x1 << 1);
+ initial_allocation_mask = (0x1 << 0) | (0x1 << 1) |
+ (0x1 << EXECUTE_ONLY_KEY);
/* register mask is in BE format */
pkey_amr_mask = ~0x0ul;
@@ -130,9 +132,12 @@ int pkey_initialize(void)
pkey_amr_mask &= ~(0x3ul << pkeyshift(i));
pkey_iamr_mask &= ~(0x1ul << pkeyshift(i));
}
+ pkey_amr_mask |= (AMR_RD_BIT|AMR_WR_BIT) << pkeyshift(EXECUTE_ONLY_KEY);
pkey_uamor_mask = ~0x0ul;
pkey_uamor_mask &= ~(0x3ul << pkeyshift(0));
+ pkey_uamor_mask &= ~(0x3ul << pkeyshift(EXECUTE_ONLY_KEY));
+
for (i = (pkeys_total - os_reserved); i < pkeys_total; i++)
pkey_uamor_mask &= ~(0x3ul << pkeyshift(i));
@@ -146,8 +151,7 @@ void pkey_mm_init(struct mm_struct *mm)
if (static_branch_likely(&pkey_disabled))
return;
mm_pkey_allocation_map(mm) = initial_allocation_mask;
- /* -1 means unallocated or invalid */
- mm->context.execute_only_pkey = -1;
+ mm->context.execute_only_pkey = EXECUTE_ONLY_KEY;
}
static inline u64 read_amr(void)
@@ -326,48 +330,7 @@ static inline bool pkey_allows_readwrite(int pkey)
int __execute_only_pkey(struct mm_struct *mm)
{
- bool need_to_set_mm_pkey = false;
- int execute_only_pkey = mm->context.execute_only_pkey;
- int ret;
-
- /* Do we need to assign a pkey for mm's execute-only maps? */
- if (execute_only_pkey == -1) {
- /* Go allocate one to use, which might fail */
- execute_only_pkey = mm_pkey_alloc(mm);
- if (execute_only_pkey < 0)
- return -1;
- need_to_set_mm_pkey = true;
- }
-
- /*
- * We do not want to go through the relatively costly dance to set AMR
- * if we do not need to. Check it first and assume that if the
- * execute-only pkey is readwrite-disabled than we do not have to set it
- * ourselves.
- */
- if (!need_to_set_mm_pkey && !pkey_allows_readwrite(execute_only_pkey))
- return execute_only_pkey;
-
- /*
- * Set up AMR so that it denies access for everything other than
- * execution.
- */
- ret = __arch_set_user_pkey_access(current, execute_only_pkey,
- PKEY_DISABLE_ACCESS |
- PKEY_DISABLE_WRITE);
- /*
- * If the AMR-set operation failed somehow, just return 0 and
- * effectively disable execute-only support.
- */
- if (ret) {
- mm_pkey_free(mm, execute_only_pkey);
- return -1;
- }
-
- /* We got one, store it and use it from here on out */
- if (need_to_set_mm_pkey)
- mm->context.execute_only_pkey = execute_only_pkey;
- return execute_only_pkey;
+ return mm->context.execute_only_pkey;
}
static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma)
--
1.7.1
^ permalink raw reply related
* [PATCH v2 3/6] powerpc/pkeys: fix calculation of total pkeys.
From: Ram Pai @ 2018-06-14 0:29 UTC (permalink / raw)
To: mpe
Cc: linuxppc-dev, dave.hansen, aneesh.kumar, bsingharora, hbabu,
mhocko, bauerman, linuxram, Ulrich.Weigand, fweimer, luto,
msuchanek
In-Reply-To: <1528936144-6696-1-git-send-email-linuxram@us.ibm.com>
Total number of pkeys calculation is off by 1. Fix it.
CC: Florian Weimer <fweimer@redhat.com>
CC: Michael Ellerman <mpe@ellerman.id.au>
CC: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
---
arch/powerpc/mm/pkeys.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c
index 6529f4e..b681bec 100644
--- a/arch/powerpc/mm/pkeys.c
+++ b/arch/powerpc/mm/pkeys.c
@@ -92,7 +92,7 @@ int pkey_initialize(void)
* arch-neutral code.
*/
pkeys_total = min_t(int, pkeys_total,
- (ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT));
+ ((ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT)+1));
if (!pkey_mmu_enabled() || radix_enabled() || !pkeys_total)
static_branch_enable(&pkey_disabled);
--
1.7.1
^ permalink raw reply related
* [PATCH v2 2/6] powerpc/pkeys: Save the pkey registers before fork
From: Ram Pai @ 2018-06-14 0:29 UTC (permalink / raw)
To: mpe
Cc: linuxppc-dev, dave.hansen, aneesh.kumar, bsingharora, hbabu,
mhocko, bauerman, linuxram, Ulrich.Weigand, fweimer, luto,
msuchanek
In-Reply-To: <1528936144-6696-1-git-send-email-linuxram@us.ibm.com>
When a thread forks the contents of AMR, IAMR, UAMOR registers in the
newly forked thread are not inherited.
Save the registers before forking, for content of those
registers to be automatically copied into the new thread.
CC: Michael Ellerman <mpe@ellerman.id.au>
CC: Florian Weimer <fweimer@redhat.com>
CC: Andy Lutomirski <luto@kernel.org>
CC: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
---
arch/powerpc/kernel/process.c | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 9ef4aea..991d097 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -583,6 +583,7 @@ static void save_all(struct task_struct *tsk)
__giveup_spe(tsk);
msr_check_and_clear(msr_all_available);
+ thread_pkey_regs_save(&tsk->thread);
}
void flush_all_to_thread(struct task_struct *tsk)
--
1.7.1
^ permalink raw reply related
* [PATCH v2 1/6] powerpc/pkeys: Enable all user-allocatable pkeys at init.
From: Ram Pai @ 2018-06-14 0:28 UTC (permalink / raw)
To: mpe
Cc: linuxppc-dev, dave.hansen, aneesh.kumar, bsingharora, hbabu,
mhocko, bauerman, linuxram, Ulrich.Weigand, fweimer, luto,
msuchanek
In-Reply-To: <1528936144-6696-1-git-send-email-linuxram@us.ibm.com>
In a multithreaded application, a key allocated by one thread must
be activate and usable on all threads.
Currently this is not the case, because the UAMOR bits for all keys are
disabled by default. When a new key is allocated in one thread, though
the corresponding UAMOR bits for that thread get enabled, the UAMOR bits
for all other existing threads continue to have their bits disabled.
Other threads have no way to set permissions on the key, effectively
making the key useless.
Enable the UAMOR bits for all keys, at process creation. Since the
contents of UAMOR are inherited at fork, all threads are capable of
modifying the permissions on any key.
BTW: changing the permission on unallocated keys has no effect, till
those keys are not associated with any PTEs. The kernel will anyway
disallow to association of unallocated keys with PTEs.
CC: Andy Lutomirski <luto@kernel.org>
CC: Florian Weimer <fweimer@redhat.com>
CC: Thiago Jung Bauermann <bauerman@linux.ibm.com>
CC: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
---
arch/powerpc/mm/pkeys.c | 40 ++++++++++++++++++++++------------------
1 files changed, 22 insertions(+), 18 deletions(-)
diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c
index e6f500f..6529f4e 100644
--- a/arch/powerpc/mm/pkeys.c
+++ b/arch/powerpc/mm/pkeys.c
@@ -15,8 +15,9 @@
int pkeys_total; /* Total pkeys as per device tree */
bool pkeys_devtree_defined; /* pkey property exported by device tree */
u32 initial_allocation_mask; /* Bits set for reserved keys */
-u64 pkey_amr_uamor_mask; /* Bits in AMR/UMOR not to be touched */
+u64 pkey_amr_mask; /* Bits in AMR not to be touched */
u64 pkey_iamr_mask; /* Bits in AMR not to be touched */
+u64 pkey_uamor_mask; /* Bits in UMOR not to be touched */
#define AMR_BITS_PER_PKEY 2
#define AMR_RD_BIT 0x1UL
@@ -119,20 +120,22 @@ int pkey_initialize(void)
#else
os_reserved = 0;
#endif
- initial_allocation_mask = ~0x0;
- pkey_amr_uamor_mask = ~0x0ul;
+ initial_allocation_mask = (0x1 << 0) | (0x1 << 1);
+
+ /* register mask is in BE format */
+ pkey_amr_mask = ~0x0ul;
pkey_iamr_mask = ~0x0ul;
- /*
- * key 0, 1 are reserved.
- * key 0 is the default key, which allows read/write/execute.
- * key 1 is recommended not to be used. PowerISA(3.0) page 1015,
- * programming note.
- */
- for (i = 2; i < (pkeys_total - os_reserved); i++) {
- initial_allocation_mask &= ~(0x1 << i);
- pkey_amr_uamor_mask &= ~(0x3ul << pkeyshift(i));
+
+ for (i = 0; i < (pkeys_total - os_reserved); i++) {
+ pkey_amr_mask &= ~(0x3ul << pkeyshift(i));
pkey_iamr_mask &= ~(0x1ul << pkeyshift(i));
}
+
+ pkey_uamor_mask = ~0x0ul;
+ pkey_uamor_mask &= ~(0x3ul << pkeyshift(0));
+ for (i = (pkeys_total - os_reserved); i < pkeys_total; i++)
+ pkey_uamor_mask &= ~(0x3ul << pkeyshift(i));
+
return 0;
}
@@ -289,9 +292,6 @@ void thread_pkey_regs_restore(struct thread_struct *new_thread,
if (static_branch_likely(&pkey_disabled))
return;
- /*
- * TODO: Just set UAMOR to zero if @new_thread hasn't used any keys yet.
- */
if (old_thread->amr != new_thread->amr)
write_amr(new_thread->amr);
if (old_thread->iamr != new_thread->iamr)
@@ -305,9 +305,13 @@ void thread_pkey_regs_init(struct thread_struct *thread)
if (static_branch_likely(&pkey_disabled))
return;
- thread->amr = read_amr() & pkey_amr_uamor_mask;
- thread->iamr = read_iamr() & pkey_iamr_mask;
- thread->uamor = read_uamor() & pkey_amr_uamor_mask;
+ thread->amr = pkey_amr_mask;
+ thread->iamr = pkey_iamr_mask;
+ thread->uamor = pkey_uamor_mask;
+
+ write_uamor(pkey_uamor_mask);
+ write_amr(pkey_amr_mask);
+ write_iamr(pkey_iamr_mask);
}
static inline bool pkey_allows_readwrite(int pkey)
--
1.7.1
^ permalink raw reply related
* [PATCH v2 0/6] powerpc/pkeys: fixes to pkeys
From: Ram Pai @ 2018-06-14 0:28 UTC (permalink / raw)
To: mpe
Cc: linuxppc-dev, dave.hansen, aneesh.kumar, bsingharora, hbabu,
mhocko, bauerman, linuxram, Ulrich.Weigand, fweimer, luto,
msuchanek
Assortment of fixes to pkey.
Patch 1 makes pkey consumable in multithreaded applications.
Patch 2 fixes fork behavior to inherit the key attributes.
Patch 3 A off-by-one bug made one key unusable. Fixes it.
Patch 4 Execute-only key is preallocated.
Patch 5 Makes pkey-0 less special.
Patch 6 Deny by default permissions on all unallocated keys.
Passes all selftests on powerpc. Also behavior verified to be correct
by Florian.
Changelog:
v2: . fixed merge conflict with upstream code.
. Add patch 6. Makes the behavior consistent
with that on x86.
Ram Pai (6):
powerpc/pkeys: Enable all user-allocatable pkeys at init.
powerpc/pkeys: Save the pkey registers before fork
powerpc/pkeys: fix calculation of total pkeys.
powerpc/pkeys: Preallocate execute-only key
powerpc/pkeys: make protection key 0 less special
powerpc/pkeys: Deny read/write/execute by default
arch/powerpc/include/asm/pkeys.h | 29 +++++++++--
arch/powerpc/kernel/process.c | 1 +
arch/powerpc/mm/pkeys.c | 102 ++++++++++++-------------------------
3 files changed, 57 insertions(+), 75 deletions(-)
^ permalink raw reply
* Re: [RFC,00/12] Deal with TM on kernel entry and exit
From: Breno Leitao @ 2018-06-13 22:38 UTC (permalink / raw)
To: Cyril Bur, mikey, benh; +Cc: linuxppc-dev
In-Reply-To: <20180220002241.29648-1-cyrilbur@gmail.com>
Hi Cyril,
On 02/19/2018 09:22 PM, Cyril Bur wrote:
> This is very much a proof of concept and if it isn't clear from the
> commit names, still a work in progress.
> I believe I have something that works - all the powerpc selftests
> pass. I would like to get some eyes on it to a) see if I've missed
> anything big and b) some opinions on if it is looking like a net
> improvement.
I started to look at this patchset. The patchset apply cleanly on top of the
current kernel and I started to run some tests.
It seems there is a different behavior when there is a trap (as a illegal
instruction or a 'trap' instruction) inside the transaction.
In this case, the signal handler does not seem to be called, and and the task
segfaults. On current upstream, the signal handler is called and the program
can continue.
4.17 pristine
-------------
$ ./illegal
Failure
4.17 plus your patches
----------------------
$ ./illegal
[1] 2504 segmentation fault ./illegal
Here is a minimal example that is able to recreate this behaviour:
#include <stdio.h>
#include <signal.h>
int htm(){
asm goto ("tbegin. \n\t"
"beq %l[failure] \n\t"
"li 3, 3 \n\t"
"trap \n\t"
"tend. \n\t"
: : : : failure);
return 0;
failure:
printf("Failure\n");
return 1;
}
void signal_handler(int signo, siginfo_t *si, void *data) {
// Do nothing
}
int main(){
struct sigaction sa;
sa.sa_flags = SA_SIGINFO;
sa.sa_sigaction = signal_handler;
sigaction(SIGTRAP, &sa, NULL);
sigaction(SIGILL, &sa, NULL);
return htm();
}
^ permalink raw reply
* powerpc: use time64_t in read_persistent_clock
From: Mathieu Malaterre @ 2018-06-13 20:24 UTC (permalink / raw)
To: Arnd Bergmann; +Cc: Benjamin Herrenschmidt, Michael Ellerman, linuxppc-dev
Arnd,
In 5bfd643583b2e I can see that you changed:
$ git show 5bfd643583b2e -- arch/powerpc/platforms/powermac/time.c
[...]
#ifdef CONFIG_ADB_PMU
-static unsigned long pmu_get_time(void)
+static time64_t pmu_get_time(void)
{
struct adb_request req;
- unsigned int now;
+ time64_t now;
if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0)
return 0;
@@ -160,10 +151,10 @@ static unsigned long pmu_get_time(void)
req.reply_len);
now = (req.reply[0] << 24) + (req.reply[1] << 16)
+ (req.reply[2] << 8) + req.reply[3];
- return ((unsigned long)now) - RTC_OFFSET;
+ return now - RTC_OFFSET;
}
[...]
As far as I can tell the old function would never return a negative
value and rely on unsigned long roll over. Now I am seeing negative
value being returned and it seems to break later on in the rtc
library.
Could you comment why you removed the cast to (unsigned long) in your commit ?
Thanks
^ permalink raw reply
* Re: [PATCH] powerpc/64s: Report SLB multi-hit rather than parity error
From: Nicholas Piggin @ 2018-06-13 14:40 UTC (permalink / raw)
To: Michael Ellerman; +Cc: linuxppc-dev
In-Reply-To: <20180613132414.32207-1-mpe@ellerman.id.au>
On Wed, 13 Jun 2018 23:24:14 +1000
Michael Ellerman <mpe@ellerman.id.au> wrote:
> When we take an SLB multi-hit on bare metal, we see both the multi-hit
> and parity error bits set in DSISR. The user manuals indicates this is
> expected to always happen on Power8, whereas on Power9 it says a
> multi-hit will "usually" also cause a parity error.
>
> We decide what to do based on the various error tables in mce_power.c,
> and because we process them in order and only report the first, we
> currently always report a parity error but not the multi-hit, eg:
>
> Severe Machine check interrupt [Recovered]
> Initiator: CPU
> Error type: SLB [Parity]
> Effective address: c000000ffffd4300
>
> Although this is correct, it leaves the user wondering why they got a
> parity error. It would be clearer instead if we reported the
> multi-hit because that is more likely to be simply a software bug,
> whereas a true parity error is possibly an indication of a bad core.
>
> We can do that simply by reordering the error tables so that multi-hit
> appears before parity. That doesn't affect the error recovery at all,
> because we flush the SLB either way.
Yeah this is a good idea. I wonder if there are any other conditions
like this that should be reordered.
I think the i-side should not have to be changed here because it
matches the value not bits, so that shouldn't matter.
A bit of a shame we don't report i/d side, and ideally we'd be able
to report multiple conditions. The reporting APIs really want to be
massaged a bit, but for now this is a good step.
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
>
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
> ---
> arch/powerpc/kernel/mce_power.c | 36 ++++++++++++++++++------------------
> 1 file changed, 18 insertions(+), 18 deletions(-)
>
> diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
> index 38c5b4764bfe..1e450d0c4f72 100644
> --- a/arch/powerpc/kernel/mce_power.c
> +++ b/arch/powerpc/kernel/mce_power.c
> @@ -140,12 +140,12 @@ static const struct mce_ierror_table mce_p7_ierror_table[] = {
> { 0x00000000001c0000, 0x0000000000040000, true,
> MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> +{ 0x00000000001c0000, 0x00000000000c0000, true,
> + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
> + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> { 0x00000000001c0000, 0x0000000000080000, true,
> MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> -{ 0x00000000001c0000, 0x00000000000c0000, true,
> - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
> - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> { 0x00000000001c0000, 0x0000000000100000, true,
> MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> @@ -164,12 +164,12 @@ static const struct mce_ierror_table mce_p8_ierror_table[] = {
> { 0x00000000081c0000, 0x0000000000040000, true,
> MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> +{ 0x00000000081c0000, 0x00000000000c0000, true,
> + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
> + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> { 0x00000000081c0000, 0x0000000000080000, true,
> MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> -{ 0x00000000081c0000, 0x00000000000c0000, true,
> - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
> - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> { 0x00000000081c0000, 0x0000000000100000, true,
> MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT,
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> @@ -194,12 +194,12 @@ static const struct mce_ierror_table mce_p9_ierror_table[] = {
> { 0x00000000081c0000, 0x0000000000040000, true,
> MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> +{ 0x00000000081c0000, 0x00000000000c0000, true,
> + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
> + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> { 0x00000000081c0000, 0x0000000000080000, true,
> MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> -{ 0x00000000081c0000, 0x00000000000c0000, true,
> - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
> - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> { 0x00000000081c0000, 0x0000000000100000, true,
> MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT,
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> @@ -257,12 +257,12 @@ static const struct mce_derror_table mce_p7_derror_table[] = {
> { 0x00000400, true,
> MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> +{ 0x00000080, true,
> + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
> + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> { 0x00000100, true,
> MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> -{ 0x00000080, true,
> - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
> - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> { 0x00000040, true,
> MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> @@ -290,12 +290,12 @@ static const struct mce_derror_table mce_p8_derror_table[] = {
> { 0x00000200, true,
> MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, /* SECONDARY ERAT */
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> +{ 0x00000080, true,
> + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
> + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> { 0x00000100, true,
> MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> -{ 0x00000080, true,
> - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
> - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> { 0, false, 0, 0, 0, 0 } };
>
> static const struct mce_derror_table mce_p9_derror_table[] = {
> @@ -320,12 +320,12 @@ static const struct mce_derror_table mce_p9_derror_table[] = {
> { 0x00000200, false,
> MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE,
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> +{ 0x00000080, true,
> + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
> + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> { 0x00000100, true,
> MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> -{ 0x00000080, true,
> - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
> - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
> { 0x00000040, true,
> MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD,
> MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox