- * [RFC 1/9] mm, arm64: Update PR_SET/GET_TAGGED_ADDR_CTRL interface
  2021-02-05 15:16 [RFC 0/9] Linear Address Masking enabling Kirill A. Shutemov
@ 2021-02-05 15:16 ` Kirill A. Shutemov
  2021-02-11 16:57   ` Catalin Marinas
  2021-02-05 15:16 ` [QEMU] x86: Implement Linear Address Masking support Kirill A. Shutemov
                   ` (11 subsequent siblings)
  12 siblings, 1 reply; 33+ messages in thread
From: Kirill A. Shutemov @ 2021-02-05 15:16 UTC (permalink / raw)
  To: Dave Hansen, Andy Lutomirski, Peter Zijlstra
  Cc: x86, Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov,
	Catalin Marinas, Will Deacon, H . J . Lu, Andi Kleen, linux-mm,
	linux-kernel, Kirill A. Shutemov
The interface for enabling tagged addresses is very inflexible. It
implies tag size and tag shift implemented by ARM TBI.
Rework the interface to accommodate different shifts and tag sizes.
PR_SET_TAGGED_ADDR_CTRL now accepts two new arguments:
 - nr_bits is pointer to int. The caller specifies the tag size it
   wants. Kernel updates the value of actual tag size that can be
   larger.
 - offset is pointer to int. Kernel returns there a shift of tag in the
   address.
The change doesn't break existing users of the interface: if any of
these pointers are NULL (as we had before the change), the user expects
ARM TBI implementation: nr_bits == 8 && offset == 56 as it was implied
before.
The initial implementation checked that these argument are NULL and the
change wouldn't not break any legacy users.
If tagging is enabled, GET_TAGGED_ADDR_CTRL would return size of tags
and offset in the additional arguments.
If tagging is disable, GET_TAGGED_ADDR_CTRL would return the maximum tag
size in nr_bits.
The selftest is updated accordingly and moved out of arm64-specific
directory as we going to enable the interface on x86.
As alternative to this approach we could introduce a totally new API and
leave the legacy one as is. But it would slow down adoption: new
prctl(2) flag wound need to propogate to the userspace headers.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/arm64/include/asm/processor.h            | 12 ++--
 arch/arm64/kernel/process.c                   | 45 ++++++++++++---
 arch/arm64/kernel/ptrace.c                    |  4 +-
 kernel/sys.c                                  | 14 +++--
 .../testing/selftests/arm64/tags/tags_test.c  | 31 ----------
 .../selftests/{arm64 => vm}/tags/.gitignore   |  0
 .../selftests/{arm64 => vm}/tags/Makefile     |  0
 .../{arm64 => vm}/tags/run_tags_test.sh       |  0
 tools/testing/selftests/vm/tags/tags_test.c   | 57 +++++++++++++++++++
 9 files changed, 113 insertions(+), 50 deletions(-)
 delete mode 100644 tools/testing/selftests/arm64/tags/tags_test.c
 rename tools/testing/selftests/{arm64 => vm}/tags/.gitignore (100%)
 rename tools/testing/selftests/{arm64 => vm}/tags/Makefile (100%)
 rename tools/testing/selftests/{arm64 => vm}/tags/run_tags_test.sh (100%)
 create mode 100644 tools/testing/selftests/vm/tags/tags_test.c
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index fce8cbecd6bc..77b91e6d3c85 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -305,10 +305,14 @@ extern void __init minsigstksz_setup(void);
 
 #ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
 /* PR_{SET,GET}_TAGGED_ADDR_CTRL prctl */
-long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg);
-long get_tagged_addr_ctrl(struct task_struct *task);
-#define SET_TAGGED_ADDR_CTRL(arg)	set_tagged_addr_ctrl(current, arg)
-#define GET_TAGGED_ADDR_CTRL()		get_tagged_addr_ctrl(current)
+long set_tagged_addr_ctrl(struct task_struct *task, unsigned long flags,
+			  int __user *nr_bits, int __user *offset);
+long get_tagged_addr_ctrl(struct task_struct *task,
+			  int __user *nr_bits, int __user *offset);
+#define SET_TAGGED_ADDR_CTRL(flags, nr_bits, offset)	\
+	set_tagged_addr_ctrl(current, flags, nr_bits, offset)
+#define GET_TAGGED_ADDR_CTRL(nr_bits, offset)		\
+	get_tagged_addr_ctrl(current, nr_bits, offset)
 #endif
 
 /*
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index ed919f633ed8..a3007f80e889 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -630,15 +630,21 @@ void arch_setup_new_exec(void)
 }
 
 #ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
+
+#define TBI_TAG_BITS	8
+#define TBI_TAG_SHIFT	56
+
 /*
  * Control the relaxed ABI allowing tagged user addresses into the kernel.
  */
 static unsigned int tagged_addr_disabled;
 
-long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg)
+long set_tagged_addr_ctrl(struct task_struct *task, unsigned long flags,
+			  int __user *nr_bits, int __user *offset)
 {
 	unsigned long valid_mask = PR_TAGGED_ADDR_ENABLE;
 	struct thread_info *ti = task_thread_info(task);
+	int val;
 
 	if (is_compat_thread(ti))
 		return -EINVAL;
@@ -646,25 +652,41 @@ long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg)
 	if (system_supports_mte())
 		valid_mask |= PR_MTE_TCF_MASK | PR_MTE_TAG_MASK;
 
-	if (arg & ~valid_mask)
+	if (flags & ~valid_mask)
 		return -EINVAL;
 
+	if (nr_bits) {
+		if (get_user(val, nr_bits))
+			return -EFAULT;
+		if (val > TBI_TAG_BITS || val < 1)
+			return -EINVAL;
+	}
+
 	/*
 	 * Do not allow the enabling of the tagged address ABI if globally
 	 * disabled via sysctl abi.tagged_addr_disabled.
 	 */
-	if (arg & PR_TAGGED_ADDR_ENABLE && tagged_addr_disabled)
+	if (flags & PR_TAGGED_ADDR_ENABLE && tagged_addr_disabled)
 		return -EINVAL;
 
-	if (set_mte_ctrl(task, arg) != 0)
+	if (set_mte_ctrl(task, flags) != 0)
 		return -EINVAL;
 
-	update_ti_thread_flag(ti, TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE);
+	if (flags & PR_TAGGED_ADDR_ENABLE) {
+		if (nr_bits && put_user(TBI_TAG_BITS, nr_bits))
+			return -EFAULT;
+		if (offset && put_user(TBI_TAG_SHIFT, offset))
+			return -EFAULT;
+	}
+
+	update_ti_thread_flag(ti, TIF_TAGGED_ADDR,
+			      flags & PR_TAGGED_ADDR_ENABLE);
 
 	return 0;
 }
 
-long get_tagged_addr_ctrl(struct task_struct *task)
+long get_tagged_addr_ctrl(struct task_struct *task,
+			  int __user *nr_bits, int __user *offset)
 {
 	long ret = 0;
 	struct thread_info *ti = task_thread_info(task);
@@ -672,8 +694,17 @@ long get_tagged_addr_ctrl(struct task_struct *task)
 	if (is_compat_thread(ti))
 		return -EINVAL;
 
-	if (test_ti_thread_flag(ti, TIF_TAGGED_ADDR))
+	if (test_ti_thread_flag(ti, TIF_TAGGED_ADDR)) {
 		ret = PR_TAGGED_ADDR_ENABLE;
+		if (nr_bits && put_user(TBI_TAG_BITS, nr_bits))
+			return -EFAULT;
+		if (offset && put_user(TBI_TAG_SHIFT, offset))
+			return -EFAULT;
+	} else {
+		/* Report maximum tag size */
+		if (nr_bits && put_user(TBI_TAG_BITS, nr_bits))
+		    return -EFAULT;
+	}
 
 	ret |= get_mte_ctrl(task);
 
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index f49b349e16a3..3010db7ef93e 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1038,7 +1038,7 @@ static int tagged_addr_ctrl_get(struct task_struct *target,
 				const struct user_regset *regset,
 				struct membuf to)
 {
-	long ctrl = get_tagged_addr_ctrl(target);
+	long ctrl = get_tagged_addr_ctrl(target, NULL, NULL);
 
 	if (IS_ERR_VALUE(ctrl))
 		return ctrl;
@@ -1058,7 +1058,7 @@ static int tagged_addr_ctrl_set(struct task_struct *target, const struct
 	if (ret)
 		return ret;
 
-	return set_tagged_addr_ctrl(target, ctrl);
+	return set_tagged_addr_ctrl(target, ctrl, NULL, NULL);
 }
 #endif
 
diff --git a/kernel/sys.c b/kernel/sys.c
index a730c03ee607..7e968d8331cc 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -120,10 +120,10 @@
 # define PAC_RESET_KEYS(a, b)	(-EINVAL)
 #endif
 #ifndef SET_TAGGED_ADDR_CTRL
-# define SET_TAGGED_ADDR_CTRL(a)	(-EINVAL)
+# define SET_TAGGED_ADDR_CTRL(a, b, c)	(-EINVAL)
 #endif
 #ifndef GET_TAGGED_ADDR_CTRL
-# define GET_TAGGED_ADDR_CTRL()		(-EINVAL)
+# define GET_TAGGED_ADDR_CTRL(a, b)	(-EINVAL)
 #endif
 
 /*
@@ -2498,14 +2498,16 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		error = PAC_RESET_KEYS(me, arg2);
 		break;
 	case PR_SET_TAGGED_ADDR_CTRL:
-		if (arg3 || arg4 || arg5)
+		if (arg5)
 			return -EINVAL;
-		error = SET_TAGGED_ADDR_CTRL(arg2);
+		error = SET_TAGGED_ADDR_CTRL(arg2, (int __user *)arg3,
+					     (int __user *)arg4);
 		break;
 	case PR_GET_TAGGED_ADDR_CTRL:
-		if (arg2 || arg3 || arg4 || arg5)
+		if (arg4 || arg5)
 			return -EINVAL;
-		error = GET_TAGGED_ADDR_CTRL();
+		error = GET_TAGGED_ADDR_CTRL((int __user *)arg2,
+					     (int __user *)arg3);
 		break;
 	case PR_SET_IO_FLUSHER:
 		if (!capable(CAP_SYS_RESOURCE))
diff --git a/tools/testing/selftests/arm64/tags/tags_test.c b/tools/testing/selftests/arm64/tags/tags_test.c
deleted file mode 100644
index 5701163460ef..000000000000
--- a/tools/testing/selftests/arm64/tags/tags_test.c
+++ /dev/null
@@ -1,31 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <stdint.h>
-#include <sys/prctl.h>
-#include <sys/utsname.h>
-
-#define SHIFT_TAG(tag)		((uint64_t)(tag) << 56)
-#define SET_TAG(ptr, tag)	(((uint64_t)(ptr) & ~SHIFT_TAG(0xff)) | \
-					SHIFT_TAG(tag))
-
-int main(void)
-{
-	static int tbi_enabled = 0;
-	unsigned long tag = 0;
-	struct utsname *ptr;
-	int err;
-
-	if (prctl(PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE, 0, 0, 0) == 0)
-		tbi_enabled = 1;
-	ptr = (struct utsname *)malloc(sizeof(*ptr));
-	if (tbi_enabled)
-		tag = 0x42;
-	ptr = (struct utsname *)SET_TAG(ptr, tag);
-	err = uname(ptr);
-	free(ptr);
-
-	return err;
-}
diff --git a/tools/testing/selftests/arm64/tags/.gitignore b/tools/testing/selftests/vm/tags/.gitignore
similarity index 100%
rename from tools/testing/selftests/arm64/tags/.gitignore
rename to tools/testing/selftests/vm/tags/.gitignore
diff --git a/tools/testing/selftests/arm64/tags/Makefile b/tools/testing/selftests/vm/tags/Makefile
similarity index 100%
rename from tools/testing/selftests/arm64/tags/Makefile
rename to tools/testing/selftests/vm/tags/Makefile
diff --git a/tools/testing/selftests/arm64/tags/run_tags_test.sh b/tools/testing/selftests/vm/tags/run_tags_test.sh
similarity index 100%
rename from tools/testing/selftests/arm64/tags/run_tags_test.sh
rename to tools/testing/selftests/vm/tags/run_tags_test.sh
diff --git a/tools/testing/selftests/vm/tags/tags_test.c b/tools/testing/selftests/vm/tags/tags_test.c
new file mode 100644
index 000000000000..ec10a409388d
--- /dev/null
+++ b/tools/testing/selftests/vm/tags/tags_test.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/prctl.h>
+#include <sys/utsname.h>
+
+static int tag_bits;
+static int tag_offset;
+
+#define SHIFT_TAG(tag)		((uint64_t)(tag) << tag_offset)
+#define SET_TAG(ptr, tag)	(((uint64_t)(ptr) & ~SHIFT_TAG((1 << tag_bits) - 1)) | SHIFT_TAG(tag))
+
+static int max_tag_bits(void)
+{
+	int nr;
+
+	if (prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0) < 0)
+		return 0;
+
+	if (prctl(PR_GET_TAGGED_ADDR_CTRL, &nr, 0, 0) < 0)
+		return 8; /* Assume ARM TBI */
+
+	return nr;
+}
+
+int main(void)
+{
+	static int tags_enabled = 0;
+	unsigned long tag = 0;
+	struct utsname *ptr;
+	int err;
+
+	tag_bits = max_tag_bits();
+
+	if (tag_bits && !prctl(PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE,
+			       &tag_bits, &tag_offset, 0)) {
+		tags_enabled = 1;
+	} else if (tag_bits == 8 && !prctl(PR_SET_TAGGED_ADDR_CTRL,
+					   PR_TAGGED_ADDR_ENABLE, 0, 0)) {
+		/* ARM TBI with legacy interface*/
+		tags_enabled = 1;
+		tag_offset = 56;
+	}
+
+	ptr = (struct utsname *)malloc(sizeof(*ptr));
+	if (tags_enabled)
+		tag = (1UL << tag_bits) - 1;
+	ptr = (struct utsname *)SET_TAG(ptr, tag);
+	err = uname(ptr);
+	printf("Sysname: %s\n", ptr->sysname);
+	free(ptr);
+
+	return err;
+}
-- 
2.26.2
^ permalink raw reply related	[flat|nested] 33+ messages in thread
- * Re: [RFC 1/9] mm, arm64: Update PR_SET/GET_TAGGED_ADDR_CTRL interface
  2021-02-05 15:16 ` [RFC 1/9] mm, arm64: Update PR_SET/GET_TAGGED_ADDR_CTRL interface Kirill A. Shutemov
@ 2021-02-11 16:57   ` Catalin Marinas
  2021-02-11 17:06     ` Dave Hansen
  0 siblings, 1 reply; 33+ messages in thread
From: Catalin Marinas @ 2021-02-11 16:57 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Dave Hansen, Andy Lutomirski, Peter Zijlstra, x86,
	Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov, Will Deacon,
	H . J . Lu, Andi Kleen, linux-mm, linux-kernel
Hi Kirill,
On Fri, Feb 05, 2021 at 06:16:21PM +0300, Kirill A. Shutemov wrote:
> The interface for enabling tagged addresses is very inflexible. It
> implies tag size and tag shift implemented by ARM TBI.
> 
> Rework the interface to accommodate different shifts and tag sizes.
> 
> PR_SET_TAGGED_ADDR_CTRL now accepts two new arguments:
> 
>  - nr_bits is pointer to int. The caller specifies the tag size it
>    wants. Kernel updates the value of actual tag size that can be
>    larger.
> 
>  - offset is pointer to int. Kernel returns there a shift of tag in the
>    address.
OK, so the expectation is that it's not always the top nr_bits (with
offset 64 - nr_bits). We can, for example, have a 4-bit tag with a 56
offset (arm64 MTE is kind like this in terms of meaningful bits but bits
60-63 are also ignored, so it doesn't make a difference from the tagged
address ABI perspective).
Does the offset also need to be validated?
> The change doesn't break existing users of the interface: if any of
> these pointers are NULL (as we had before the change), the user expects
> ARM TBI implementation: nr_bits == 8 && offset == 56 as it was implied
> before.
> 
> The initial implementation checked that these argument are NULL and the
> change wouldn't not break any legacy users.
> 
> If tagging is enabled, GET_TAGGED_ADDR_CTRL would return size of tags
> and offset in the additional arguments.
> 
> If tagging is disable, GET_TAGGED_ADDR_CTRL would return the maximum tag
> size in nr_bits.
Why not the offset as well? I guess I should read a bit on the x86
feature.
> The selftest is updated accordingly and moved out of arm64-specific
> directory as we going to enable the interface on x86.
> 
> As alternative to this approach we could introduce a totally new API and
> leave the legacy one as is. But it would slow down adoption: new
> prctl(2) flag wound need to propogate to the userspace headers.
Sharing the same prctl() is fine by me. We helped ourselves to lots of
bits in the first argument already for MTE but I don't think it matters
much. If x86 would need any, they can overlap with the arm64 bits as
they are interpreted in the arch code anyway.
The patch looks fine, though with a couple of nits/questions below:
> @@ -646,25 +652,41 @@ long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg)
>  	if (system_supports_mte())
>  		valid_mask |= PR_MTE_TCF_MASK | PR_MTE_TAG_MASK;
>  
> -	if (arg & ~valid_mask)
> +	if (flags & ~valid_mask)
>  		return -EINVAL;
>  
> +	if (nr_bits) {
> +		if (get_user(val, nr_bits))
> +			return -EFAULT;
> +		if (val > TBI_TAG_BITS || val < 1)
> +			return -EINVAL;
> +	}
Do we need to validate the offset as well?
> +
>  	/*
>  	 * Do not allow the enabling of the tagged address ABI if globally
>  	 * disabled via sysctl abi.tagged_addr_disabled.
>  	 */
> -	if (arg & PR_TAGGED_ADDR_ENABLE && tagged_addr_disabled)
> +	if (flags & PR_TAGGED_ADDR_ENABLE && tagged_addr_disabled)
>  		return -EINVAL;
>  
> -	if (set_mte_ctrl(task, arg) != 0)
> +	if (set_mte_ctrl(task, flags) != 0)
>  		return -EINVAL;
>  
> -	update_ti_thread_flag(ti, TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE);
> +	if (flags & PR_TAGGED_ADDR_ENABLE) {
> +		if (nr_bits && put_user(TBI_TAG_BITS, nr_bits))
> +			return -EFAULT;
> +		if (offset && put_user(TBI_TAG_SHIFT, offset))
> +			return -EFAULT;
> +	}
> +
> +	update_ti_thread_flag(ti, TIF_TAGGED_ADDR,
> +			      flags & PR_TAGGED_ADDR_ENABLE);
>  
>  	return 0;
>  }
>  
> -long get_tagged_addr_ctrl(struct task_struct *task)
> +long get_tagged_addr_ctrl(struct task_struct *task,
> +			  int __user *nr_bits, int __user *offset)
>  {
>  	long ret = 0;
>  	struct thread_info *ti = task_thread_info(task);
> @@ -672,8 +694,17 @@ long get_tagged_addr_ctrl(struct task_struct *task)
>  	if (is_compat_thread(ti))
>  		return -EINVAL;
>  
> -	if (test_ti_thread_flag(ti, TIF_TAGGED_ADDR))
> +	if (test_ti_thread_flag(ti, TIF_TAGGED_ADDR)) {
>  		ret = PR_TAGGED_ADDR_ENABLE;
> +		if (nr_bits && put_user(TBI_TAG_BITS, nr_bits))
> +			return -EFAULT;
> +		if (offset && put_user(TBI_TAG_SHIFT, offset))
> +			return -EFAULT;
> +	} else {
> +		/* Report maximum tag size */
> +		if (nr_bits && put_user(TBI_TAG_BITS, nr_bits))
> +		    return -EFAULT;
> +	}
Should this also populate the minimum offset allowed?
-- 
Catalin
^ permalink raw reply	[flat|nested] 33+ messages in thread
- * Re: [RFC 1/9] mm, arm64: Update PR_SET/GET_TAGGED_ADDR_CTRL interface
  2021-02-11 16:57   ` Catalin Marinas
@ 2021-02-11 17:06     ` Dave Hansen
  2021-02-11 18:26       ` Catalin Marinas
  0 siblings, 1 reply; 33+ messages in thread
From: Dave Hansen @ 2021-02-11 17:06 UTC (permalink / raw)
  To: Catalin Marinas, Kirill A. Shutemov
  Cc: Dave Hansen, Andy Lutomirski, Peter Zijlstra, x86,
	Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov, Will Deacon,
	H . J . Lu, Andi Kleen, linux-mm, linux-kernel
Hi Catalin,
I noticed there are some ELF bits for ARM's BTI feature:
	GNU_PROPERTY_AARCH64_FEATURE_1_BTI
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/elf.h#n453
There's been talk of needing a similar set of bits on x86 for tagged
pointers (LAM).  Do you have any plans to do something similar (ELF
property bits) for any of the pointer tagging features?
^ permalink raw reply	[flat|nested] 33+ messages in thread 
- * Re: [RFC 1/9] mm, arm64: Update PR_SET/GET_TAGGED_ADDR_CTRL interface
  2021-02-11 17:06     ` Dave Hansen
@ 2021-02-11 18:26       ` Catalin Marinas
  0 siblings, 0 replies; 33+ messages in thread
From: Catalin Marinas @ 2021-02-11 18:26 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Kirill A. Shutemov, Dave Hansen, Andy Lutomirski, Peter Zijlstra,
	x86, Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov,
	Will Deacon, H . J . Lu, Andi Kleen, linux-mm, linux-kernel
Hi Dave,
On Thu, Feb 11, 2021 at 09:06:12AM -0800, Dave Hansen wrote:
> I noticed there are some ELF bits for ARM's BTI feature:
> 
> 	GNU_PROPERTY_AARCH64_FEATURE_1_BTI
> 
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/elf.h#n453
> 
> There's been talk of needing a similar set of bits on x86 for tagged
> pointers (LAM).  Do you have any plans to do something similar (ELF
> property bits) for any of the pointer tagging features?
Not at the moment but see below.
Tagged addresses were supported on arm64 from day 0, though they were
not used much (I think some JITs attempted to encode pointer types in
the top byte). Until recently, we haven't allowed such tagged pointers
as syscall arguments.
The need for wider use of tagged pointers and the syscall ABI relaxation
came from the ASan/HWASan work on LLVM and the subsequent Memory Tagging
Extensions (MTE). With the latter, the user code doesn't need to be
recompiled, only the right heap allocator to be loaded. So you could do
an LD_PRELOAD to override malloc/free to return tagged pointers (I did
this in the past to run a standard Debian + LTP testing). So we decided
that it's easier for the C library to invoke a prctl() if needed rather
than having some ELF property.
MTE also requires additional control (like synchronous/asynchronous tag
checking) which we added to the same prctl() to be done in a single
call. That's, again, the decision of the C library together with using
mmap(PROT_MTE) for the heap regions.
That said, since HWASan requires code instrumentation, it would have
been nice if some GNU property was present as not to mix&match such
objects. I guess since it's mostly a debug feature, people didn't
bother but it would probably fall apart if you mix it with MTE.
With MTE, at some point we may deploy stack tagging which requires
instrumentation of the function prologue/epilogue and mapping the stack
with PROT_MTE. That's not widely used at the moment since such
instructions are undef on previous CPU implementations. We may end up
with an ELF annotation to distinguish such objects but I think that's
still up for discussion.
The reason for the BTI property was static binaries that needed PROT_BTI
on their exec sections to be set by the kernel. MTE did not have such
need (the C library would pass PROT_MTE explicitly on heap allocations)
and neither did the top-byte-ignore feature.
For the LAM feature, if the code is identical (e.g. it's the C library
deciding whether malloc() returns tagged pointers), I don't see much use
for an ELF annotation. However, if the plan is for something similar to
HWASan with different code generation, it may be safer to just add a
GNU note.
-- 
Catalin
^ permalink raw reply	[flat|nested] 33+ messages in thread 
 
 
 
- * [QEMU] x86: Implement Linear Address Masking support
  2021-02-05 15:16 [RFC 0/9] Linear Address Masking enabling Kirill A. Shutemov
  2021-02-05 15:16 ` [RFC 1/9] mm, arm64: Update PR_SET/GET_TAGGED_ADDR_CTRL interface Kirill A. Shutemov
@ 2021-02-05 15:16 ` Kirill A. Shutemov
  2021-02-05 15:16 ` [RFC 2/9] x86/mm: Fix CR3_ADDR_MASK Kirill A. Shutemov
                   ` (10 subsequent siblings)
  12 siblings, 0 replies; 33+ messages in thread
From: Kirill A. Shutemov @ 2021-02-05 15:16 UTC (permalink / raw)
  To: Dave Hansen, Andy Lutomirski, Peter Zijlstra
  Cc: x86, Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov,
	Catalin Marinas, Will Deacon, H . J . Lu, Andi Kleen, linux-mm,
	linux-kernel, Kirill A. Shutemov
Linear Address Masking feature makes CPU ignore some bits of the virtual
address. These bits can be used to encode metadata.
The feature is enumerated with CPUID.(EAX=07H, ECX=01H):EAX.LAM[bit 26].
CR3.LAM_U57[bit 62] allows to encode 6 bits of metadata in bits 62:57 of
user pointers.
CR3.LAM_U48[bit 61] allows to encode 15 bits of metadata in bits 62:48
of user pointers.
CR4.LAM_SUP[bit 28] allows to encode metadata of supervisor pointers.
If 5-level paging is in use, 6 bits of metadata can be encoded in 62:57.
For 4-level paging, 15 bits of metadata can be encoded in bits 62:48.
QEMU strips address from the metadata bits and gets it to canonical
shape before handling memory access. It has to be done very early before
TLB lookup.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 accel/tcg/cputlb.c        | 54 +++++++++++++++++++++++----------------
 include/hw/core/cpu.h     |  1 +
 target/i386/cpu.c         |  5 ++--
 target/i386/cpu.h         |  7 +++++
 target/i386/excp_helper.c | 28 +++++++++++++++++++-
 target/i386/helper.c      |  2 +-
 6 files changed, 71 insertions(+), 26 deletions(-)
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 42ab79c1a582..f2d27134474f 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1271,6 +1271,17 @@ static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
     return ram_addr;
 }
 
+static vaddr clean_addr(CPUState *cpu, vaddr addr)
+{
+    CPUClass *cc = CPU_GET_CLASS(cpu);
+
+    if (cc->do_clean_addr) {
+        addr = cc->do_clean_addr(cpu, addr);
+    }
+
+    return addr;
+}
+
 /*
  * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the
  * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must
@@ -1702,9 +1713,11 @@ bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
 
 /* Probe for a read-modify-write atomic operation.  Do not allow unaligned
  * operations, or io operations to proceed.  Return the host address.  */
-static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
+static void *atomic_mmu_lookup(CPUArchState *env, target_ulong address,
                                TCGMemOpIdx oi, uintptr_t retaddr)
 {
+    CPUState *cpu = env_cpu(env);
+    target_ulong addr = clean_addr(cpu, address);
     size_t mmu_idx = get_mmuidx(oi);
     uintptr_t index = tlb_index(env, mmu_idx, addr);
     CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
@@ -1720,8 +1733,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
     /* Enforce guest required alignment.  */
     if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) {
         /* ??? Maybe indicate atomic op to cpu_unaligned_access */
-        cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
-                             mmu_idx, retaddr);
+        cpu_unaligned_access(cpu, addr, MMU_DATA_STORE, mmu_idx, retaddr);
     }
 
     /* Enforce qemu required alignment.  */
@@ -1736,8 +1748,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
     /* Check TLB entry and enforce page permissions.  */
     if (!tlb_hit(tlb_addr, addr)) {
         if (!VICTIM_TLB_HIT(addr_write, addr)) {
-            tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE,
-                     mmu_idx, retaddr);
+            tlb_fill(cpu, addr, 1 << s_bits, MMU_DATA_STORE, mmu_idx, retaddr);
             index = tlb_index(env, mmu_idx, addr);
             tlbe = tlb_entry(env, mmu_idx, addr);
         }
@@ -1753,8 +1764,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
 
     /* Let the guest notice RMW on a write-only page.  */
     if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
-        tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD,
-                 mmu_idx, retaddr);
+        tlb_fill(cpu, addr, 1 << s_bits, MMU_DATA_LOAD, mmu_idx, retaddr);
         /* Since we don't support reads and writes to different addresses,
            and we do have the proper page loaded for write, this shouldn't
            ever return.  But just in case, handle via stop-the-world.  */
@@ -1764,14 +1774,14 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
     hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
 
     if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
-        notdirty_write(env_cpu(env), addr, 1 << s_bits,
+        notdirty_write(cpu, addr, 1 << s_bits,
                        &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr);
     }
 
     return hostaddr;
 
  stop_the_world:
-    cpu_loop_exit_atomic(env_cpu(env), retaddr);
+    cpu_loop_exit_atomic(cpu, retaddr);
 }
 
 /*
@@ -1810,10 +1820,12 @@ load_memop(const void *haddr, MemOp op)
 }
 
 static inline uint64_t QEMU_ALWAYS_INLINE
-load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
+load_helper(CPUArchState *env, target_ulong address, TCGMemOpIdx oi,
             uintptr_t retaddr, MemOp op, bool code_read,
             FullLoadHelper *full_load)
 {
+    CPUState *cpu = env_cpu(env);
+    target_ulong addr = clean_addr(cpu, address);
     uintptr_t mmu_idx = get_mmuidx(oi);
     uintptr_t index = tlb_index(env, mmu_idx, addr);
     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
@@ -1829,16 +1841,14 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
 
     /* Handle CPU specific unaligned behaviour */
     if (addr & ((1 << a_bits) - 1)) {
-        cpu_unaligned_access(env_cpu(env), addr, access_type,
-                             mmu_idx, retaddr);
+        cpu_unaligned_access(cpu, addr, access_type, mmu_idx, retaddr);
     }
 
     /* If the TLB entry is for a different page, reload and try again.  */
     if (!tlb_hit(tlb_addr, addr)) {
         if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
                             addr & TARGET_PAGE_MASK)) {
-            tlb_fill(env_cpu(env), addr, size,
-                     access_type, mmu_idx, retaddr);
+            tlb_fill(cpu, addr, size, access_type, mmu_idx, retaddr);
             index = tlb_index(env, mmu_idx, addr);
             entry = tlb_entry(env, mmu_idx, addr);
         }
@@ -1861,7 +1871,7 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
         /* Handle watchpoints.  */
         if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
             /* On watchpoint hit, this will longjmp out.  */
-            cpu_check_watchpoint(env_cpu(env), addr, size,
+            cpu_check_watchpoint(cpu, addr, size,
                                  iotlbentry->attrs, BP_MEM_READ, retaddr);
         }
 
@@ -2341,9 +2351,11 @@ store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
 }
 
 static inline void QEMU_ALWAYS_INLINE
-store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
+store_helper(CPUArchState *env, target_ulong address, uint64_t val,
              TCGMemOpIdx oi, uintptr_t retaddr, MemOp op)
 {
+    CPUState *cpu = env_cpu(env);
+    target_ulong addr = clean_addr(cpu, address);
     uintptr_t mmu_idx = get_mmuidx(oi);
     uintptr_t index = tlb_index(env, mmu_idx, addr);
     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
@@ -2355,16 +2367,14 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
 
     /* Handle CPU specific unaligned behaviour */
     if (addr & ((1 << a_bits) - 1)) {
-        cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
-                             mmu_idx, retaddr);
+        cpu_unaligned_access(cpu, addr, MMU_DATA_STORE, mmu_idx, retaddr);
     }
 
     /* If the TLB entry is for a different page, reload and try again.  */
     if (!tlb_hit(tlb_addr, addr)) {
         if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
             addr & TARGET_PAGE_MASK)) {
-            tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
-                     mmu_idx, retaddr);
+            tlb_fill(cpu, addr, size, MMU_DATA_STORE, mmu_idx, retaddr);
             index = tlb_index(env, mmu_idx, addr);
             entry = tlb_entry(env, mmu_idx, addr);
         }
@@ -2386,7 +2396,7 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
         /* Handle watchpoints.  */
         if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
             /* On watchpoint hit, this will longjmp out.  */
-            cpu_check_watchpoint(env_cpu(env), addr, size,
+            cpu_check_watchpoint(cpu, addr, size,
                                  iotlbentry->attrs, BP_MEM_WRITE, retaddr);
         }
 
@@ -2406,7 +2416,7 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
 
         /* Handle clean RAM pages.  */
         if (tlb_addr & TLB_NOTDIRTY) {
-            notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
+            notdirty_write(cpu, addr, size, iotlbentry, retaddr);
         }
 
         haddr = (void *)((uintptr_t)addr + entry->addend);
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 3d92c967fffa..64817bc10f1b 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -171,6 +171,7 @@ struct CPUClass {
     int reset_dump_flags;
     bool (*has_work)(CPUState *cpu);
     void (*do_interrupt)(CPUState *cpu);
+    vaddr (*do_clean_addr)(CPUState *cpu, vaddr addr);
     void (*do_unaligned_access)(CPUState *cpu, vaddr addr,
                                 MMUAccessType access_type,
                                 int mmu_idx, uintptr_t retaddr);
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 5a8c96072e41..f819f0673103 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -666,7 +666,7 @@ static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
           /* CPUID_7_0_ECX_OSPKE is dynamic */ \
           CPUID_7_0_ECX_LA57)
 #define TCG_7_0_EDX_FEATURES 0
-#define TCG_7_1_EAX_FEATURES 0
+#define TCG_7_1_EAX_FEATURES CPUID_7_1_EAX_LAM
 #define TCG_APM_FEATURES 0
 #define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT
 #define TCG_XSAVE_FEATURES (CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XGETBV1)
@@ -997,7 +997,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
             NULL, NULL, NULL, NULL,
             NULL, NULL, NULL, NULL,
             NULL, NULL, NULL, NULL,
-            NULL, NULL, NULL, NULL,
+            NULL, NULL, "lam", NULL,
             NULL, NULL, NULL, NULL,
         },
         .cpuid = {
@@ -7290,6 +7290,7 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data)
 #ifdef CONFIG_TCG
     cc->tcg_initialize = tcg_x86_init;
     cc->tlb_fill = x86_cpu_tlb_fill;
+    cc->do_clean_addr = x86_cpu_clean_addr;
 #endif
     cc->disas_set_info = x86_disas_set_info;
 
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 88e8586f8fb4..f8477e16685d 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -229,6 +229,9 @@ typedef enum X86Seg {
 #define CR0_AM_MASK  (1U << 18)
 #define CR0_PG_MASK  (1U << 31)
 
+#define CR3_LAM_U48  (1ULL << 61)
+#define CR3_LAM_U57  (1ULL << 62)
+
 #define CR4_VME_MASK  (1U << 0)
 #define CR4_PVI_MASK  (1U << 1)
 #define CR4_TSD_MASK  (1U << 2)
@@ -250,6 +253,7 @@ typedef enum X86Seg {
 #define CR4_SMEP_MASK   (1U << 20)
 #define CR4_SMAP_MASK   (1U << 21)
 #define CR4_PKE_MASK   (1U << 22)
+#define CR4_LAM_SUP    (1U << 28)
 
 #define DR6_BD          (1 << 13)
 #define DR6_BS          (1 << 14)
@@ -796,6 +800,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
 
 /* AVX512 BFloat16 Instruction */
 #define CPUID_7_1_EAX_AVX512_BF16       (1U << 5)
+/* Linear Address Masking */
+#define CPUID_7_1_EAX_LAM               (1U << 26)
 
 /* CLZERO instruction */
 #define CPUID_8000_0008_EBX_CLZERO      (1U << 0)
@@ -1924,6 +1930,7 @@ bool x86_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
                       MMUAccessType access_type, int mmu_idx,
                       bool probe, uintptr_t retaddr);
 void x86_cpu_set_a20(X86CPU *cpu, int a20_state);
+vaddr x86_cpu_clean_addr(CPUState *cpu, vaddr addr);
 
 #ifndef CONFIG_USER_ONLY
 static inline int x86_asidx_from_attrs(CPUState *cs, MemTxAttrs attrs)
diff --git a/target/i386/excp_helper.c b/target/i386/excp_helper.c
index 191471749fbf..edf8194574b2 100644
--- a/target/i386/excp_helper.c
+++ b/target/i386/excp_helper.c
@@ -406,7 +406,7 @@ static int handle_mmu_fault(CPUState *cs, vaddr addr, int size,
             }
 
             if (la57) {
-                pml5e_addr = ((env->cr[3] & ~0xfff) +
+                pml5e_addr = ((env->cr[3] & PG_ADDRESS_MASK) +
                         (((addr >> 48) & 0x1ff) << 3)) & a20_mask;
                 pml5e_addr = get_hphys(cs, pml5e_addr, MMU_DATA_STORE, NULL);
                 pml5e = x86_ldq_phys(cs, pml5e_addr);
@@ -700,3 +700,29 @@ bool x86_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
     return true;
 #endif
 }
+
+static inline int64_t sign_extend64(uint64_t value, int index)
+{
+    int shift = 63 - index;
+    return (int64_t)(value << shift) >> shift;
+}
+
+vaddr x86_cpu_clean_addr(CPUState *cs, vaddr addr)
+{
+    CPUX86State *env = &X86_CPU(cs)->env;
+    bool la57 = env->cr[4] & CR4_LA57_MASK;
+
+    if (addr >> 63) {
+        if (env->cr[4] & CR4_LAM_SUP) {
+            return sign_extend64(addr, la57 ? 56 : 47);
+        }
+    } else {
+        if (env->cr[3] & CR3_LAM_U57) {
+            return sign_extend64(addr, 56);
+        } else if (env->cr[3] & CR3_LAM_U48) {
+            return sign_extend64(addr, 47);
+        }
+    }
+
+    return addr;
+}
diff --git a/target/i386/helper.c b/target/i386/helper.c
index 034f46bcc210..6c099443ce13 100644
--- a/target/i386/helper.c
+++ b/target/i386/helper.c
@@ -753,7 +753,7 @@ hwaddr x86_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr,
             }
 
             if (la57) {
-                pml5e_addr = ((env->cr[3] & ~0xfff) +
+                pml5e_addr = ((env->cr[3] & PG_ADDRESS_MASK) +
                         (((addr >> 48) & 0x1ff) << 3)) & a20_mask;
                 pml5e = x86_ldq_phys(cs, pml5e_addr);
                 if (!(pml5e & PG_PRESENT_MASK)) {
-- 
2.26.2
^ permalink raw reply related	[flat|nested] 33+ messages in thread
- * [RFC 2/9] x86/mm: Fix CR3_ADDR_MASK
  2021-02-05 15:16 [RFC 0/9] Linear Address Masking enabling Kirill A. Shutemov
  2021-02-05 15:16 ` [RFC 1/9] mm, arm64: Update PR_SET/GET_TAGGED_ADDR_CTRL interface Kirill A. Shutemov
  2021-02-05 15:16 ` [QEMU] x86: Implement Linear Address Masking support Kirill A. Shutemov
@ 2021-02-05 15:16 ` Kirill A. Shutemov
  2021-02-05 15:16 ` [RFC 3/9] x86: CPUID and CR3/CR4 flags for Linear Address Masking Kirill A. Shutemov
                   ` (9 subsequent siblings)
  12 siblings, 0 replies; 33+ messages in thread
From: Kirill A. Shutemov @ 2021-02-05 15:16 UTC (permalink / raw)
  To: Dave Hansen, Andy Lutomirski, Peter Zijlstra
  Cc: x86, Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov,
	Catalin Marinas, Will Deacon, H . J . Lu, Andi Kleen, linux-mm,
	linux-kernel, Kirill A. Shutemov
The mask must not include bits above physical address mask. These bits
are reserved and can be used for other things. Bits 61 and 62 are used
for Linear Address Masking.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/include/asm/processor-flags.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index 02c2cbda4a74..a7f3d9100adb 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -35,7 +35,7 @@
  */
 #ifdef CONFIG_X86_64
 /* Mask off the address space ID and SME encryption bits. */
-#define CR3_ADDR_MASK	__sme_clr(0x7FFFFFFFFFFFF000ull)
+#define CR3_ADDR_MASK	__sme_clr(PHYSICAL_PAGE_MASK)
 #define CR3_PCID_MASK	0xFFFull
 #define CR3_NOFLUSH	BIT_ULL(63)
 
-- 
2.26.2
^ permalink raw reply related	[flat|nested] 33+ messages in thread
- * [RFC 3/9] x86: CPUID and CR3/CR4 flags for Linear Address Masking
  2021-02-05 15:16 [RFC 0/9] Linear Address Masking enabling Kirill A. Shutemov
                   ` (2 preceding siblings ...)
  2021-02-05 15:16 ` [RFC 2/9] x86/mm: Fix CR3_ADDR_MASK Kirill A. Shutemov
@ 2021-02-05 15:16 ` Kirill A. Shutemov
  2021-02-05 15:16 ` [RFC 4/9] x86/mm: Introduce TIF_LAM_U57 and TIF_LAM_U48 Kirill A. Shutemov
                   ` (8 subsequent siblings)
  12 siblings, 0 replies; 33+ messages in thread
From: Kirill A. Shutemov @ 2021-02-05 15:16 UTC (permalink / raw)
  To: Dave Hansen, Andy Lutomirski, Peter Zijlstra
  Cc: x86, Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov,
	Catalin Marinas, Will Deacon, H . J . Lu, Andi Kleen, linux-mm,
	linux-kernel, Kirill A. Shutemov
Enumerate Linear Address Masking and provide defines for CR3 and CR4
flags.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/include/asm/cpufeatures.h          | 1 +
 arch/x86/include/uapi/asm/processor-flags.h | 6 ++++++
 2 files changed, 7 insertions(+)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index dad350d42ecf..3ae25d88216e 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -293,6 +293,7 @@
 
 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
 #define X86_FEATURE_AVX512_BF16		(12*32+ 5) /* AVX512 BFLOAT16 instructions */
+#define X86_FEATURE_LAM			(12*32+26) /* Linear Address Masking */
 
 /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
 #define X86_FEATURE_CLZERO		(13*32+ 0) /* CLZERO instruction */
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
index bcba3c643e63..f2a4a53308e2 100644
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -82,6 +82,10 @@
 #define X86_CR3_PCID_BITS	12
 #define X86_CR3_PCID_MASK	(_AC((1UL << X86_CR3_PCID_BITS) - 1, UL))
 
+#define X86_CR3_LAM_U48_BIT	61 /* Activate LAM for userspace, 62:48 bits masked */
+#define X86_CR3_LAM_U48		_BITULL(X86_CR3_LAM_U48_BIT)
+#define X86_CR3_LAM_U57_BIT	62 /* Activate LAM for userspace, 62:57 bits masked */
+#define X86_CR3_LAM_U57		_BITULL(X86_CR3_LAM_U57_BIT)
 #define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */
 #define X86_CR3_PCID_NOFLUSH    _BITULL(X86_CR3_PCID_NOFLUSH_BIT)
 
@@ -130,6 +134,8 @@
 #define X86_CR4_SMAP		_BITUL(X86_CR4_SMAP_BIT)
 #define X86_CR4_PKE_BIT		22 /* enable Protection Keys support */
 #define X86_CR4_PKE		_BITUL(X86_CR4_PKE_BIT)
+#define X86_CR4_LAM_SUP_BIT	28 /* LAM for supervisor pointers */
+#define X86_CR4_LAM_SUP		_BITUL(X86_CR4_LAM_SUP_BIT)
 
 /*
  * x86-64 Task Priority Register, CR8
-- 
2.26.2
^ permalink raw reply related	[flat|nested] 33+ messages in thread
- * [RFC 4/9] x86/mm: Introduce TIF_LAM_U57 and TIF_LAM_U48
  2021-02-05 15:16 [RFC 0/9] Linear Address Masking enabling Kirill A. Shutemov
                   ` (3 preceding siblings ...)
  2021-02-05 15:16 ` [RFC 3/9] x86: CPUID and CR3/CR4 flags for Linear Address Masking Kirill A. Shutemov
@ 2021-02-05 15:16 ` Kirill A. Shutemov
  2021-02-05 15:16 ` [RFC 5/9] x86/mm: Provide untagged_addr() helper Kirill A. Shutemov
                   ` (7 subsequent siblings)
  12 siblings, 0 replies; 33+ messages in thread
From: Kirill A. Shutemov @ 2021-02-05 15:16 UTC (permalink / raw)
  To: Dave Hansen, Andy Lutomirski, Peter Zijlstra
  Cc: x86, Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov,
	Catalin Marinas, Will Deacon, H . J . Lu, Andi Kleen, linux-mm,
	linux-kernel, Kirill A. Shutemov
The new thread flags indicate that the thread has Linear Address Masking
enabled.
switch_mm_irqs_off() now respects these flags and set CR3 accordingly.
The active LAM mode gets recorded in the tlb_state.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/include/asm/thread_info.h |  9 ++-
 arch/x86/include/asm/tlbflush.h    |  5 ++
 arch/x86/mm/tlb.c                  | 96 +++++++++++++++++++++++++-----
 3 files changed, 93 insertions(+), 17 deletions(-)
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 44733a4bfc42..e2ae8fcb3492 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -93,6 +93,7 @@ struct thread_info {
 #define TIF_NOTSC		16	/* TSC is not accessible in userland */
 #define TIF_IA32		17	/* IA32 compatibility process */
 #define TIF_SLD			18	/* Restore split lock detection on context switch */
+#define TIF_X32			19	/* 32-bit native x86-64 binary */
 #define TIF_MEMDIE		20	/* is terminating due to OOM killer */
 #define TIF_POLLING_NRFLAG	21	/* idle is polling for TIF_NEED_RESCHED */
 #define TIF_IO_BITMAP		22	/* uses I/O bitmap */
@@ -101,7 +102,9 @@ struct thread_info {
 #define TIF_LAZY_MMU_UPDATES	27	/* task is updating the mmu lazily */
 #define TIF_SYSCALL_TRACEPOINT	28	/* syscall tracepoint instrumentation */
 #define TIF_ADDR32		29	/* 32-bit address space on 64 bits */
-#define TIF_X32			30	/* 32-bit native x86-64 binary */
+#define TIF_LAM_U57		30	/* LAM for userspace pointers, 6 bits */
+#define TIF_LAM_U48		31	/* LAM for userspace pointers, 15 bits */
+
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
@@ -122,6 +125,7 @@ struct thread_info {
 #define _TIF_NOTSC		(1 << TIF_NOTSC)
 #define _TIF_IA32		(1 << TIF_IA32)
 #define _TIF_SLD		(1 << TIF_SLD)
+#define _TIF_X32		(1 << TIF_X32)
 #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
 #define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)
 #define _TIF_FORCED_TF		(1 << TIF_FORCED_TF)
@@ -129,7 +133,8 @@ struct thread_info {
 #define _TIF_LAZY_MMU_UPDATES	(1 << TIF_LAZY_MMU_UPDATES)
 #define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
 #define _TIF_ADDR32		(1 << TIF_ADDR32)
-#define _TIF_X32		(1 << TIF_X32)
+#define _TIF_LAM_U57		(1 << TIF_LAM_U57)
+#define _TIF_LAM_U48		(1 << TIF_LAM_U48)
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW_BASE					\
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 8c87a2e0b660..7e124fd71a67 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -17,6 +17,10 @@ void __flush_tlb_all(void);
 
 #define TLB_FLUSH_ALL	-1UL
 
+#define LAM_NONE	0
+#define LAM_U57		1
+#define LAM_U48		2
+
 void cr4_update_irqsoff(unsigned long set, unsigned long clear);
 unsigned long cr4_read_shadow(void);
 
@@ -88,6 +92,7 @@ struct tlb_state {
 
 	u16 loaded_mm_asid;
 	u16 next_asid;
+	u8 lam;
 
 	/*
 	 * We can be in one of several states:
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 569ac1d57f55..138d4748aa97 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -145,17 +145,73 @@ static inline u16 user_pcid(u16 asid)
 	return ret;
 }
 
-static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
+#ifdef CONFIG_X86_64
+static inline unsigned long lam_to_cr3(u8 lam)
+{
+	switch (lam) {
+	case LAM_NONE:
+		return 0;
+	case LAM_U57:
+		return X86_CR3_LAM_U57;
+	case LAM_U48:
+		return X86_CR3_LAM_U48;
+	default:
+		WARN_ON_ONCE(1);
+		return 0;
+	}
+}
+
+static inline u8 cr3_to_lam(unsigned long cr3)
+{
+	if (cr3 & X86_CR3_LAM_U57)
+		return LAM_U57;
+	if (cr3 & X86_CR3_LAM_U48)
+		return LAM_U48;
+	return 0;
+}
+
+static u8 gen_lam(struct task_struct *tsk, struct mm_struct *mm)
+{
+	struct thread_info *ti = task_thread_info(tsk);
+	if (!tsk)
+		return LAM_NONE;
+
+	if (test_ti_thread_flag(ti, TIF_LAM_U57))
+		return LAM_U57;
+	if (test_ti_thread_flag(ti, TIF_LAM_U48))
+		return LAM_U48;
+	return LAM_NONE;
+}
+
+#else
+
+static inline unsigned long lam_to_cr3(u8 lam)
+{
+	return 0;
+}
+
+static inline u8 cr3_to_lam(unsigned long cr3)
+{
+	return LAM_NONE;
+}
+
+static u8 gen_lam(struct task_struct *tsk, struct mm_struct *mm)
+{
+	return LAM_NONE;
+}
+#endif
+
+static inline unsigned long build_cr3(pgd_t *pgd, u16 asid, u8 lam)
 {
 	if (static_cpu_has(X86_FEATURE_PCID)) {
-		return __sme_pa(pgd) | kern_pcid(asid);
+		return __sme_pa(pgd) | kern_pcid(asid) | lam_to_cr3(lam);
 	} else {
 		VM_WARN_ON_ONCE(asid != 0);
-		return __sme_pa(pgd);
+		return __sme_pa(pgd) | lam_to_cr3(lam);
 	}
 }
 
-static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
+static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid, u8 lam)
 {
 	VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
 	/*
@@ -164,7 +220,7 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
 	 * boot because all CPU's the have same capabilities:
 	 */
 	VM_WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_PCID));
-	return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
+	return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH | lam_to_cr3(lam);
 }
 
 /*
@@ -265,15 +321,15 @@ static inline void invalidate_user_asid(u16 asid)
 		  (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
 }
 
-static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
+static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, u8 lam, bool need_flush)
 {
 	unsigned long new_mm_cr3;
 
 	if (need_flush) {
 		invalidate_user_asid(new_asid);
-		new_mm_cr3 = build_cr3(pgdir, new_asid);
+		new_mm_cr3 = build_cr3(pgdir, new_asid, lam);
 	} else {
-		new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
+		new_mm_cr3 = build_cr3_noflush(pgdir, new_asid, lam);
 	}
 
 	/*
@@ -424,6 +480,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 {
 	struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
 	u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+	u8 prev_lam = this_cpu_read(cpu_tlbstate.lam);
+	u8 new_lam = gen_lam(tsk, next);
 	bool was_lazy = this_cpu_read(cpu_tlbstate.is_lazy);
 	unsigned cpu = smp_processor_id();
 	u64 next_tlb_gen;
@@ -437,6 +495,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 	 * cpu_tlbstate.loaded_mm) matches next.
 	 *
 	 * NB: leave_mm() calls us with prev == NULL and tsk == NULL.
+	 *
+	 * NB: Initial LAM enabling calls us with prev == next. We must update
+	 * CR3 if prev_lam doesn't match the new one.
 	 */
 
 	/* We don't want flush_tlb_func_* to run concurrently with us. */
@@ -453,7 +514,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 	 * isn't free.
 	 */
 #ifdef CONFIG_DEBUG_VM
-	if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
+	if (WARN_ON_ONCE(__read_cr3() !=
+			 build_cr3(real_prev->pgd, prev_asid, prev_lam))) {
 		/*
 		 * If we were to BUG here, we'd be very likely to kill
 		 * the system so hard that we don't see the call trace.
@@ -483,7 +545,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 	 * provides that full memory barrier and core serializing
 	 * instruction.
 	 */
-	if (real_prev == next) {
+	if (real_prev == next && prev_lam == new_lam) {
 		VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
 			   next->context.ctx_id);
 
@@ -555,15 +617,16 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 		barrier();
 	}
 
+	this_cpu_write(cpu_tlbstate.lam, new_lam);
 	if (need_flush) {
 		this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
 		this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
-		load_new_mm_cr3(next->pgd, new_asid, true);
+		load_new_mm_cr3(next->pgd, new_asid, new_lam, true);
 
 		trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
 	} else {
 		/* The new ASID is already up to date. */
-		load_new_mm_cr3(next->pgd, new_asid, false);
+		load_new_mm_cr3(next->pgd, new_asid, new_lam, false);
 
 		trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
 	}
@@ -620,6 +683,7 @@ void initialize_tlbstate_and_flush(void)
 	struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm);
 	u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen);
 	unsigned long cr3 = __read_cr3();
+	u8 lam = cr3_to_lam(cr3);
 
 	/* Assert that CR3 already references the right mm. */
 	WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
@@ -633,7 +697,7 @@ void initialize_tlbstate_and_flush(void)
 		!(cr4_read_shadow() & X86_CR4_PCIDE));
 
 	/* Force ASID 0 and force a TLB flush. */
-	write_cr3(build_cr3(mm->pgd, 0));
+	write_cr3(build_cr3(mm->pgd, 0, lam));
 
 	/* Reinitialize tlbstate. */
 	this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB);
@@ -970,8 +1034,10 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
  */
 unsigned long __get_current_cr3_fast(void)
 {
-	unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
-		this_cpu_read(cpu_tlbstate.loaded_mm_asid));
+	unsigned long cr3 =
+		build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
+		this_cpu_read(cpu_tlbstate.loaded_mm_asid),
+		this_cpu_read(cpu_tlbstate.lam));
 
 	/* For now, be very restrictive about when this can be called. */
 	VM_WARN_ON(in_nmi() || preemptible());
-- 
2.26.2
^ permalink raw reply related	[flat|nested] 33+ messages in thread
- * [RFC 5/9] x86/mm: Provide untagged_addr() helper
  2021-02-05 15:16 [RFC 0/9] Linear Address Masking enabling Kirill A. Shutemov
                   ` (4 preceding siblings ...)
  2021-02-05 15:16 ` [RFC 4/9] x86/mm: Introduce TIF_LAM_U57 and TIF_LAM_U48 Kirill A. Shutemov
@ 2021-02-05 15:16 ` Kirill A. Shutemov
  2021-02-05 15:16 ` [RFC 6/9] x86/uaccess: Remove tags from the address before checking Kirill A. Shutemov
                   ` (6 subsequent siblings)
  12 siblings, 0 replies; 33+ messages in thread
From: Kirill A. Shutemov @ 2021-02-05 15:16 UTC (permalink / raw)
  To: Dave Hansen, Andy Lutomirski, Peter Zijlstra
  Cc: x86, Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov,
	Catalin Marinas, Will Deacon, H . J . Lu, Andi Kleen, linux-mm,
	linux-kernel, Kirill A. Shutemov
The helper used by the core-mm to strip tag bits and get the address to
the canonical shape. In only handles userspace addresses.
For LAM, the address gets sanitized according to the thread flags.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/include/asm/page_32.h |  3 +++
 arch/x86/include/asm/page_64.h | 19 +++++++++++++++++++
 2 files changed, 22 insertions(+)
diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h
index 94dbd51df58f..a829c46ab977 100644
--- a/arch/x86/include/asm/page_32.h
+++ b/arch/x86/include/asm/page_32.h
@@ -15,6 +15,9 @@ extern unsigned long __phys_addr(unsigned long);
 #define __phys_addr_symbol(x)	__phys_addr(x)
 #define __phys_reloc_hide(x)	RELOC_HIDE((x), 0)
 
+#define untagged_addr(addr)	(addr)
+#define untagged_ptr(ptr)	(ptr)
+
 #ifdef CONFIG_FLATMEM
 #define pfn_valid(pfn)		((pfn) < max_mapnr)
 #endif /* CONFIG_FLATMEM */
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 939b1cff4a7b..67cb434efdf6 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -56,6 +56,25 @@ static inline void clear_page(void *page)
 
 void copy_page(void *to, void *from);
 
+#define __untagged_addr(addr, n)	\
+	((__force __typeof__(addr))sign_extend64((__force u64)(addr), n))
+
+#define untagged_addr(addr)	({					\
+	u64 __addr = (__force u64)(addr);				\
+	if (__addr >> 63 == 0) {					\
+		if (test_thread_flag(TIF_LAM_U57))			\
+			__addr &= __untagged_addr(__addr, 56);		\
+		else if (test_thread_flag(TIF_LAM_U48))			\
+			__addr &= __untagged_addr(__addr, 47);		\
+	}								\
+	(__force __typeof__(addr))__addr;				\
+})
+
+#define untagged_ptr(ptr)	({					\
+	u64 __ptrval = (__force u64)(ptr);				\
+	__ptrval = untagged_addr(__ptrval);				\
+	(__force __typeof__(*(ptr)) *)__ptrval;				\
+})
 #endif	/* !__ASSEMBLY__ */
 
 #ifdef CONFIG_X86_VSYSCALL_EMULATION
-- 
2.26.2
^ permalink raw reply related	[flat|nested] 33+ messages in thread
- * [RFC 6/9] x86/uaccess: Remove tags from the address before checking
  2021-02-05 15:16 [RFC 0/9] Linear Address Masking enabling Kirill A. Shutemov
                   ` (5 preceding siblings ...)
  2021-02-05 15:16 ` [RFC 5/9] x86/mm: Provide untagged_addr() helper Kirill A. Shutemov
@ 2021-02-05 15:16 ` Kirill A. Shutemov
  2021-02-05 15:16 ` [RFC 7/9] x86/mm: Handle tagged memory accesses from kernel threads Kirill A. Shutemov
                   ` (5 subsequent siblings)
  12 siblings, 0 replies; 33+ messages in thread
From: Kirill A. Shutemov @ 2021-02-05 15:16 UTC (permalink / raw)
  To: Dave Hansen, Andy Lutomirski, Peter Zijlstra
  Cc: x86, Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov,
	Catalin Marinas, Will Deacon, H . J . Lu, Andi Kleen, linux-mm,
	linux-kernel, Kirill A. Shutemov
The tags must not be included into check whether it's okay to access the
userspace address.
__chk_range_not_ok() is at the core or access_ok() and it's handly to
strip tags there.
get_user() and put_user() don't use access_ok(), but check access
against TASK_SIZE direcly in assembly. Strip tags, before calling into
the assembly helper.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/include/asm/uaccess.h | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index c9fa7be3df82..ee0a482b2f1f 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -18,6 +18,9 @@
  */
 static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, unsigned long limit)
 {
+	/* Remove tags from the address before comparing to the limit */
+	addr = untagged_addr(addr);
+
 	/*
 	 * If we have used "sizeof()" for the size,
 	 * we know it won't overflow the limit (but
@@ -152,7 +155,12 @@ extern int __get_user_bad(void);
  * Return: zero on success, or -EFAULT on error.
  * On error, the variable @x is set to zero.
  */
-#define get_user(x,ptr) ({ might_fault(); do_get_user_call(get_user,x,ptr); })
+#define get_user(x,ptr)							\
+({									\
+	__typeof__(*(ptr)) *__ptr_clean = untagged_ptr(ptr);		\
+	might_fault();							\
+	do_get_user_call(get_user,x,__ptr_clean);			\
+})
 
 /**
  * __get_user - Get a simple variable from user space, with less checking.
@@ -249,7 +257,11 @@ extern void __put_user_nocheck_8(void);
  *
  * Return: zero on success, or -EFAULT on error.
  */
-#define put_user(x, ptr) ({ might_fault(); do_put_user_call(put_user,x,ptr); })
+#define put_user(x, ptr) ({						\
+	__typeof__(*(ptr)) *__ptr_clean = untagged_ptr(ptr);		\
+	might_fault();							\
+	do_put_user_call(put_user,x,__ptr_clean);			\
+})
 
 /**
  * __put_user - Write a simple value into user space, with less checking.
-- 
2.26.2
^ permalink raw reply related	[flat|nested] 33+ messages in thread
- * [RFC 7/9] x86/mm: Handle tagged memory accesses from kernel threads
  2021-02-05 15:16 [RFC 0/9] Linear Address Masking enabling Kirill A. Shutemov
                   ` (6 preceding siblings ...)
  2021-02-05 15:16 ` [RFC 6/9] x86/uaccess: Remove tags from the address before checking Kirill A. Shutemov
@ 2021-02-05 15:16 ` Kirill A. Shutemov
  2021-02-05 15:16 ` [RFC 8/9] x86/mm: Make LAM_U48 and mappings above 47-bits mutually exclusive Kirill A. Shutemov
                   ` (4 subsequent siblings)
  12 siblings, 0 replies; 33+ messages in thread
From: Kirill A. Shutemov @ 2021-02-05 15:16 UTC (permalink / raw)
  To: Dave Hansen, Andy Lutomirski, Peter Zijlstra
  Cc: x86, Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov,
	Catalin Marinas, Will Deacon, H . J . Lu, Andi Kleen, linux-mm,
	linux-kernel, Kirill A. Shutemov
When a kernel thread performs memory access on behalf of a process (like
in async I/O, io_uring, etc.) it has to respect tagging setup of the
process as user addresses can include tags.
Normally, LAM setup is per-thread and recorded in thread flags, but for
this use case we also track LAM setup per-mm. mm->context.lam would
record LAM that allows the most tag bits among the threads of the mm.
The info used by switch_mm_irqs_off() to construct CR3 if the task is
kernel thread. Thread flags of the kernel thread get updated according
to mm->context.lam. It allows untagged_addr() to work correctly.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/include/asm/mmu.h |  1 +
 arch/x86/mm/tlb.c          | 28 ++++++++++++++++++++++++++++
 2 files changed, 29 insertions(+)
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 9257667d13c5..fb05d6a11538 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -35,6 +35,7 @@ typedef struct {
 #ifdef CONFIG_X86_64
 	/* True if mm supports a task running in 32 bit compatibility mode. */
 	unsigned short ia32_compat;
+	u8 lam;
 #endif
 
 	struct mutex lock;
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 138d4748aa97..1f9749da12e4 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -176,6 +176,34 @@ static u8 gen_lam(struct task_struct *tsk, struct mm_struct *mm)
 	if (!tsk)
 		return LAM_NONE;
 
+	if (tsk->flags & PF_KTHREAD) {
+		/*
+		 * For kernel thread use the most permissive LAM
+		 * used by the mm. It's required to handle kernel thread
+		 * memory accesses on behalf of a process.
+		 *
+		 * Adjust thread flags accodringly, so untagged_addr() would
+		 * work correctly.
+		 */
+		switch (mm->context.lam) {
+		case LAM_NONE:
+			clear_thread_flag(TIF_LAM_U48);
+			clear_thread_flag(TIF_LAM_U57);
+			return LAM_NONE;
+		case LAM_U57:
+			clear_thread_flag(TIF_LAM_U48);
+			set_thread_flag(TIF_LAM_U57);
+			return LAM_U57;
+		case LAM_U48:
+			set_thread_flag(TIF_LAM_U48);
+			clear_thread_flag(TIF_LAM_U57);
+			return LAM_U48;
+		default:
+			WARN_ON_ONCE(1);
+			return LAM_NONE;
+		}
+	}
+
 	if (test_ti_thread_flag(ti, TIF_LAM_U57))
 		return LAM_U57;
 	if (test_ti_thread_flag(ti, TIF_LAM_U48))
-- 
2.26.2
^ permalink raw reply related	[flat|nested] 33+ messages in thread
- * [RFC 8/9] x86/mm: Make LAM_U48 and mappings above 47-bits mutually exclusive
  2021-02-05 15:16 [RFC 0/9] Linear Address Masking enabling Kirill A. Shutemov
                   ` (7 preceding siblings ...)
  2021-02-05 15:16 ` [RFC 7/9] x86/mm: Handle tagged memory accesses from kernel threads Kirill A. Shutemov
@ 2021-02-05 15:16 ` Kirill A. Shutemov
  2021-02-05 15:16 ` [RFC 9/9] x86/mm: Implement PR_SET/GET_TAGGED_ADDR_CTRL with LAM Kirill A. Shutemov
                   ` (3 subsequent siblings)
  12 siblings, 0 replies; 33+ messages in thread
From: Kirill A. Shutemov @ 2021-02-05 15:16 UTC (permalink / raw)
  To: Dave Hansen, Andy Lutomirski, Peter Zijlstra
  Cc: x86, Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov,
	Catalin Marinas, Will Deacon, H . J . Lu, Andi Kleen, linux-mm,
	linux-kernel, Kirill A. Shutemov
LAM_U48 steals bits above 47-bit for tags and makes it impossible for
userspace to use full address space on 5-level paging machine.
Make these features mutually exclusive: whichever gets enabled first
blocks the othe one.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/include/asm/elf.h         |  3 ++-
 arch/x86/include/asm/mmu_context.h | 13 +++++++++++++
 arch/x86/kernel/sys_x86_64.c       |  5 +++--
 arch/x86/mm/hugetlbpage.c          |  6 ++++--
 arch/x86/mm/mmap.c                 |  9 ++++++++-
 5 files changed, 30 insertions(+), 6 deletions(-)
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index b9a5d488f1a5..8ba405dfb861 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -326,7 +326,8 @@ static inline int mmap_is_ia32(void)
 extern unsigned long task_size_32bit(void);
 extern unsigned long task_size_64bit(int full_addr_space);
 extern unsigned long get_mmap_base(int is_legacy);
-extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len);
+extern bool mmap_address_hint_valid(struct mm_struct *mm,
+				    unsigned long addr, unsigned long len);
 
 #ifdef CONFIG_X86_32
 
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index d98016b83755..e338beb031b2 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -214,4 +214,17 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
 
 unsigned long __get_current_cr3_fast(void);
 
+#ifdef CONFIG_X86_5LEVEL
+static inline bool full_va_allowed(struct mm_struct *mm)
+{
+	/* LAM_U48 steals VA bits abouve 47-bit for tags */
+	return mm->context.lam != LAM_U48;
+}
+#else
+static inline bool full_va_allowed(struct mm_struct *mm)
+{
+	return false;
+}
+#endif
+
 #endif /* _ASM_X86_MMU_CONTEXT_H */
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 504fa5425bce..b5d0afee9b0f 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -21,6 +21,7 @@
 
 #include <asm/elf.h>
 #include <asm/ia32.h>
+#include <asm/mmu_context.h>
 
 /*
  * Align a virtual address to avoid aliasing in the I$ on AMD F15h.
@@ -189,7 +190,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 	/* requesting a specific address */
 	if (addr) {
 		addr &= PAGE_MASK;
-		if (!mmap_address_hint_valid(addr, len))
+		if (!mmap_address_hint_valid(mm, addr, len))
 			goto get_unmapped_area;
 
 		vma = find_vma(mm, addr);
@@ -210,7 +211,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 	 * !in_32bit_syscall() check to avoid high addresses for x32
 	 * (and make it no op on native i386).
 	 */
-	if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall())
+	if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall() && full_va_allowed(mm))
 		info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
 
 	info.align_mask = 0;
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index a0d023cb4292..9fdc8db42365 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -18,6 +18,7 @@
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/elf.h>
+#include <asm/mmu_context.h>
 
 #if 0	/* This is just for testing */
 struct page *
@@ -103,6 +104,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
 		unsigned long pgoff, unsigned long flags)
 {
 	struct hstate *h = hstate_file(file);
+	struct mm_struct *mm = current->mm;
 	struct vm_unmapped_area_info info;
 
 	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
@@ -114,7 +116,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
 	 * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
 	 * in the full address space.
 	 */
-	if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall())
+	if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall() && full_va_allowed(mm))
 		info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
 
 	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
@@ -161,7 +163,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 
 	if (addr) {
 		addr &= huge_page_mask(h);
-		if (!mmap_address_hint_valid(addr, len))
+		if (!mmap_address_hint_valid(mm, addr, len))
 			goto get_unmapped_area;
 
 		vma = find_vma(mm, addr);
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index c90c20904a60..f9ca824729de 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -21,6 +21,7 @@
 #include <linux/elf-randomize.h>
 #include <asm/elf.h>
 #include <asm/io.h>
+#include <asm/mmu_context.h>
 
 #include "physaddr.h"
 
@@ -35,6 +36,8 @@ unsigned long task_size_32bit(void)
 
 unsigned long task_size_64bit(int full_addr_space)
 {
+	if (!full_va_allowed(current->mm))
+		return DEFAULT_MAP_WINDOW;
 	return full_addr_space ? TASK_SIZE_MAX : DEFAULT_MAP_WINDOW;
 }
 
@@ -206,11 +209,15 @@ const char *arch_vma_name(struct vm_area_struct *vma)
  * the failure of such a fixed mapping request, so the restriction is not
  * applied.
  */
-bool mmap_address_hint_valid(unsigned long addr, unsigned long len)
+bool mmap_address_hint_valid(struct mm_struct *mm,
+			     unsigned long addr, unsigned long len)
 {
 	if (TASK_SIZE - len < addr)
 		return false;
 
+	if (addr + len > DEFAULT_MAP_WINDOW && !full_va_allowed(mm))
+		return false;
+
 	return (addr > DEFAULT_MAP_WINDOW) == (addr + len > DEFAULT_MAP_WINDOW);
 }
 
-- 
2.26.2
^ permalink raw reply related	[flat|nested] 33+ messages in thread
- * [RFC 9/9] x86/mm: Implement PR_SET/GET_TAGGED_ADDR_CTRL with LAM
  2021-02-05 15:16 [RFC 0/9] Linear Address Masking enabling Kirill A. Shutemov
                   ` (8 preceding siblings ...)
  2021-02-05 15:16 ` [RFC 8/9] x86/mm: Make LAM_U48 and mappings above 47-bits mutually exclusive Kirill A. Shutemov
@ 2021-02-05 15:16 ` Kirill A. Shutemov
  2021-02-05 15:42   ` H.J. Lu
  2021-02-05 15:16 ` [QEMU] x86: Implement Linear Address Masking support Kirill A. Shutemov
                   ` (2 subsequent siblings)
  12 siblings, 1 reply; 33+ messages in thread
From: Kirill A. Shutemov @ 2021-02-05 15:16 UTC (permalink / raw)
  To: Dave Hansen, Andy Lutomirski, Peter Zijlstra
  Cc: x86, Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov,
	Catalin Marinas, Will Deacon, H . J . Lu, Andi Kleen, linux-mm,
	linux-kernel, Kirill A. Shutemov
Provide prctl() interface to enabled LAM for user addresses. Depending
how many tag bits requested it may result in enabling LAM_U57 or
LAM_U48.
If LAM_U48 is enabled, the process is no longer able to use full address
space on 5-level paging machine and gets limited to 47-bit VA.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/include/asm/processor.h |  10 +++
 arch/x86/kernel/process_64.c     | 145 +++++++++++++++++++++++++++++++
 2 files changed, 155 insertions(+)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 82a08b585818..49fac2cc4329 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -810,6 +810,16 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
 extern int get_tsc_mode(unsigned long adr);
 extern int set_tsc_mode(unsigned int val);
 
+#ifdef CONFIG_X86_64
+long set_tagged_addr_ctrl(unsigned long flags,
+			  int __user *nr_bits, int __user *offset);
+long get_tagged_addr_ctrl(int __user *nr_bits, int __user *offset);
+#define SET_TAGGED_ADDR_CTRL(flags, nr_bits, offset)	\
+	set_tagged_addr_ctrl(flags, nr_bits, offset)
+#define GET_TAGGED_ADDR_CTRL(nr_bits, offset)		\
+	get_tagged_addr_ctrl(nr_bits, offset)
+#endif
+
 DECLARE_PER_CPU(u64, msr_misc_features_shadow);
 
 #ifdef CONFIG_CPU_SUP_AMD
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index df342bedea88..99b87f0e1bc7 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -837,3 +837,148 @@ unsigned long KSTK_ESP(struct task_struct *task)
 {
 	return task_pt_regs(task)->sp;
 }
+
+/*
+ * Control the relaxed ABI allowing tagged user addresses into the kernel.
+ */
+static unsigned int tagged_addr_disabled;
+
+static bool lam_u48_allowed(void)
+{
+	struct mm_struct *mm = current->mm;
+
+	if (!full_va_allowed(mm))
+		return true;
+
+	return find_vma(mm, DEFAULT_MAP_WINDOW) == NULL;
+}
+
+#define LAM_U48_BITS 15
+#define LAM_U57_BITS 6
+
+long set_tagged_addr_ctrl(unsigned long flags,
+			  int __user *nr_bits, int __user *offset)
+{
+	int val;
+
+	if (in_32bit_syscall())
+		return -EINVAL;
+	if (flags & ~PR_TAGGED_ADDR_ENABLE)
+		return -EINVAL;
+	if (!boot_cpu_has(X86_FEATURE_LAM))
+		return -ENOTSUPP;
+
+	/* Disable LAM */
+	if (!(flags & PR_TAGGED_ADDR_ENABLE)) {
+		clear_thread_flag(TIF_LAM_U48);
+		clear_thread_flag(TIF_LAM_U57);
+
+		/* Update CR3 */
+		switch_mm(current->mm, current->mm, current);
+
+		return 0;
+	}
+
+	/*
+	 * nr_bits == NULL || offset == NULL assumes ARM TBI (nr_bits == 8,
+	 * offset == 56). LAM cannot provide this.
+	 */
+	if (!nr_bits || !offset)
+		return -EINVAL;
+
+	/*
+	 * Do not allow the enabling of the tagged address ABI if globally
+	 * disabled via sysctl abi.tagged_addr_disabled.
+	 */
+	if (tagged_addr_disabled)
+		return -EINVAL;
+
+	if (get_user(val, nr_bits))
+		return -EFAULT;
+	if (val > LAM_U48_BITS || val < 1)
+		return -EINVAL;
+	if (val > LAM_U57_BITS && !lam_u48_allowed())
+		return -EINVAL;
+
+	val = val > LAM_U57_BITS ? LAM_U48_BITS : LAM_U57_BITS;
+	if (put_user(val, nr_bits) || put_user(63 - val, offset))
+		return -EFAULT;
+
+	if (val == LAM_U57_BITS) {
+		clear_thread_flag(TIF_LAM_U48);
+		set_thread_flag(TIF_LAM_U57);
+		if (current->mm->context.lam == LAM_NONE)
+			current->mm->context.lam = LAM_U57;
+	} else {
+		clear_thread_flag(TIF_LAM_U57);
+		set_thread_flag(TIF_LAM_U48);
+
+		/*
+		 * Do not allow to create a mapping above 47 bit.
+		 *
+		 * It's one way road: once a thread of the process enabled
+		 * LAM_U48, no thread can ever create mapping above 47 bit.
+		 * Even the LAM got disabled later.
+		 */
+		current->mm->context.lam = LAM_U48;
+	}
+
+	/* Update CR3 */
+	switch_mm(current->mm, current->mm, current);
+
+	return 0;
+}
+
+long get_tagged_addr_ctrl(int __user *nr_bits, int __user *offset)
+{
+	if (in_32bit_syscall())
+		return -EINVAL;
+
+	if (test_thread_flag(TIF_LAM_U57)) {
+		if (nr_bits && put_user(LAM_U57_BITS, nr_bits))
+			return -EFAULT;
+		if (offset && put_user(63 - LAM_U57_BITS, offset))
+			return -EFAULT;
+	} else if (test_thread_flag(TIF_LAM_U48)) {
+		if (nr_bits && put_user(LAM_U48_BITS, nr_bits))
+			return -EFAULT;
+		if (offset && put_user(63 - LAM_U48_BITS, offset))
+			return -EFAULT;
+	} else {
+		int max_bits = lam_u48_allowed() ? LAM_U48_BITS : LAM_U57_BITS;
+
+		/* Report maximum tag size */
+		if (nr_bits && put_user(max_bits, nr_bits))
+		    return -EFAULT;
+		return 0;
+	}
+
+	return PR_TAGGED_ADDR_ENABLE;
+}
+
+/*
+ * Global sysctl to disable the tagged user addresses support. This control
+ * only prevents the tagged address ABI enabling via prctl() and does not
+ * disable it for tasks that already opted in to the relaxed ABI.
+ */
+
+static struct ctl_table tagged_addr_sysctl_table[] = {
+	{
+		.procname	= "tagged_addr_disabled",
+		.mode		= 0644,
+		.data		= &tagged_addr_disabled,
+		.maxlen		= sizeof(int),
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{ }
+};
+
+static int __init tagged_addr_init(void)
+{
+	if (!register_sysctl("abi", tagged_addr_sysctl_table))
+		return -EINVAL;
+	return 0;
+}
+core_initcall(tagged_addr_init);
-- 
2.26.2
^ permalink raw reply related	[flat|nested] 33+ messages in thread
- * Re: [RFC 9/9] x86/mm: Implement PR_SET/GET_TAGGED_ADDR_CTRL with LAM
  2021-02-05 15:16 ` [RFC 9/9] x86/mm: Implement PR_SET/GET_TAGGED_ADDR_CTRL with LAM Kirill A. Shutemov
@ 2021-02-05 15:42   ` H.J. Lu
  2021-02-07  8:07     ` Dmitry Vyukov
  0 siblings, 1 reply; 33+ messages in thread
From: H.J. Lu @ 2021-02-05 15:42 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Dave Hansen, Andy Lutomirski, Peter Zijlstra,
	the arch/x86 maintainers, Andrey Ryabinin, Alexander Potapenko,
	Dmitry Vyukov, Catalin Marinas, Will Deacon, Andi Kleen, Linux-MM,
	LKML, GNU C Library, GCC Development
On Fri, Feb 5, 2021 at 7:16 AM Kirill A. Shutemov
<kirill.shutemov@linux.intel.com> wrote:
>
> Provide prctl() interface to enabled LAM for user addresses. Depending
> how many tag bits requested it may result in enabling LAM_U57 or
> LAM_U48.
I prefer the alternate kernel interface based on CET arch_prctl interface which
is implemented in glibc on users/intel/lam/master branch:
https://gitlab.com/x86-glibc/glibc/-/tree/users/intel/lam/master
and in GCC on users/intel/lam/master branch:
https://gitlab.com/x86-gcc/gcc/-/tree/users/intel/lam/master
-- 
H.J.
^ permalink raw reply	[flat|nested] 33+ messages in thread 
- * Re: [RFC 9/9] x86/mm: Implement PR_SET/GET_TAGGED_ADDR_CTRL with LAM
  2021-02-05 15:42   ` H.J. Lu
@ 2021-02-07  8:07     ` Dmitry Vyukov
  2021-02-07 14:09       ` Kirill A. Shutemov
  0 siblings, 1 reply; 33+ messages in thread
From: Dmitry Vyukov @ 2021-02-07  8:07 UTC (permalink / raw)
  To: H.J. Lu
  Cc: Kirill A. Shutemov, Dave Hansen, Andy Lutomirski, Peter Zijlstra,
	the arch/x86 maintainers, Andrey Ryabinin, Alexander Potapenko,
	Catalin Marinas, Will Deacon, Andi Kleen, Linux-MM, LKML,
	GNU C Library, GCC Development
On Fri, Feb 5, 2021 at 4:43 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Fri, Feb 5, 2021 at 7:16 AM Kirill A. Shutemov
> <kirill.shutemov@linux.intel.com> wrote:
> >
> > Provide prctl() interface to enabled LAM for user addresses. Depending
> > how many tag bits requested it may result in enabling LAM_U57 or
> > LAM_U48.
>
> I prefer the alternate kernel interface based on CET arch_prctl interface which
> is implemented in glibc on users/intel/lam/master branch:
>
> https://gitlab.com/x86-glibc/glibc/-/tree/users/intel/lam/master
>
> and in GCC on users/intel/lam/master branch:
>
> https://gitlab.com/x86-gcc/gcc/-/tree/users/intel/lam/master
Hi Kirill, H.J.,
I don't have strong preference for PR_SET/GET_TAGGED_ADDR_CTRL vs
ARCH_X86_FEATURE_1_ENABLE itself, but tying LAM to ELF and
GNU_PROPERTY in the second option looks strange. LAM can be used
outside of ELF/GNU, right?
^ permalink raw reply	[flat|nested] 33+ messages in thread 
- * Re: [RFC 9/9] x86/mm: Implement PR_SET/GET_TAGGED_ADDR_CTRL with LAM
  2021-02-07  8:07     ` Dmitry Vyukov
@ 2021-02-07 14:09       ` Kirill A. Shutemov
  2021-02-07 14:11         ` Dmitry Vyukov
  0 siblings, 1 reply; 33+ messages in thread
From: Kirill A. Shutemov @ 2021-02-07 14:09 UTC (permalink / raw)
  To: Dmitry Vyukov
  Cc: H.J. Lu, Kirill A. Shutemov, Dave Hansen, Andy Lutomirski,
	Peter Zijlstra, the arch/x86 maintainers, Andrey Ryabinin,
	Alexander Potapenko, Catalin Marinas, Will Deacon, Andi Kleen,
	Linux-MM, LKML, GNU C Library, GCC Development
On Sun, Feb 07, 2021 at 09:07:02AM +0100, Dmitry Vyukov wrote:
> On Fri, Feb 5, 2021 at 4:43 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Fri, Feb 5, 2021 at 7:16 AM Kirill A. Shutemov
> > <kirill.shutemov@linux.intel.com> wrote:
> > >
> > > Provide prctl() interface to enabled LAM for user addresses. Depending
> > > how many tag bits requested it may result in enabling LAM_U57 or
> > > LAM_U48.
> >
> > I prefer the alternate kernel interface based on CET arch_prctl interface which
> > is implemented in glibc on users/intel/lam/master branch:
> >
> > https://gitlab.com/x86-glibc/glibc/-/tree/users/intel/lam/master
> >
> > and in GCC on users/intel/lam/master branch:
> >
> > https://gitlab.com/x86-gcc/gcc/-/tree/users/intel/lam/master
> 
> Hi Kirill, H.J.,
> 
> I don't have strong preference for PR_SET/GET_TAGGED_ADDR_CTRL vs
> ARCH_X86_FEATURE_1_ENABLE itself, but tying LAM to ELF and
> GNU_PROPERTY in the second option looks strange. LAM can be used
> outside of ELF/GNU, right?
Sure. In both cases it's still a syscall.
-- 
 Kirill A. Shutemov
^ permalink raw reply	[flat|nested] 33+ messages in thread 
- * Re: [RFC 9/9] x86/mm: Implement PR_SET/GET_TAGGED_ADDR_CTRL with LAM
  2021-02-07 14:09       ` Kirill A. Shutemov
@ 2021-02-07 14:11         ` Dmitry Vyukov
  0 siblings, 0 replies; 33+ messages in thread
From: Dmitry Vyukov @ 2021-02-07 14:11 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: H.J. Lu, Kirill A. Shutemov, Dave Hansen, Andy Lutomirski,
	Peter Zijlstra, the arch/x86 maintainers, Andrey Ryabinin,
	Alexander Potapenko, Catalin Marinas, Will Deacon, Andi Kleen,
	Linux-MM, LKML, GNU C Library, GCC Development
On Sun, Feb 7, 2021 at 3:09 PM Kirill A. Shutemov <kirill@shutemov.name> wrote:
>
> On Sun, Feb 07, 2021 at 09:07:02AM +0100, Dmitry Vyukov wrote:
> > On Fri, Feb 5, 2021 at 4:43 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > On Fri, Feb 5, 2021 at 7:16 AM Kirill A. Shutemov
> > > <kirill.shutemov@linux.intel.com> wrote:
> > > >
> > > > Provide prctl() interface to enabled LAM for user addresses. Depending
> > > > how many tag bits requested it may result in enabling LAM_U57 or
> > > > LAM_U48.
> > >
> > > I prefer the alternate kernel interface based on CET arch_prctl interface which
> > > is implemented in glibc on users/intel/lam/master branch:
> > >
> > > https://gitlab.com/x86-glibc/glibc/-/tree/users/intel/lam/master
> > >
> > > and in GCC on users/intel/lam/master branch:
> > >
> > > https://gitlab.com/x86-gcc/gcc/-/tree/users/intel/lam/master
> >
> > Hi Kirill, H.J.,
> >
> > I don't have strong preference for PR_SET/GET_TAGGED_ADDR_CTRL vs
> > ARCH_X86_FEATURE_1_ENABLE itself, but tying LAM to ELF and
> > GNU_PROPERTY in the second option looks strange. LAM can be used
> > outside of ELF/GNU, right?
>
> Sure. In both cases it's still a syscall.
Oh, I meant just the naming scheme. The consts are declared in elf.h
and are prefixed with GNU_PROPERTY.
^ permalink raw reply	[flat|nested] 33+ messages in thread 
 
 
 
 
- * [QEMU] x86: Implement Linear Address Masking support
  2021-02-05 15:16 [RFC 0/9] Linear Address Masking enabling Kirill A. Shutemov
                   ` (9 preceding siblings ...)
  2021-02-05 15:16 ` [RFC 9/9] x86/mm: Implement PR_SET/GET_TAGGED_ADDR_CTRL with LAM Kirill A. Shutemov
@ 2021-02-05 15:16 ` Kirill A. Shutemov
  2021-02-05 15:49 ` [RFC 0/9] Linear Address Masking enabling Peter Zijlstra
  2021-02-07  8:24 ` Dmitry Vyukov
  12 siblings, 0 replies; 33+ messages in thread
From: Kirill A. Shutemov @ 2021-02-05 15:16 UTC (permalink / raw)
  To: Dave Hansen, Andy Lutomirski, Peter Zijlstra
  Cc: x86, Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov,
	Catalin Marinas, Will Deacon, H . J . Lu, Andi Kleen, linux-mm,
	linux-kernel, Kirill A. Shutemov
Linear Address Masking feature makes CPU ignore some bits of the virtual
address. These bits can be used to encode metadata.
The feature is enumerated with CPUID.(EAX=07H, ECX=01H):EAX.LAM[bit 26].
CR3.LAM_U57[bit 62] allows to encode 6 bits of metadata in bits 62:57 of
user pointers.
CR3.LAM_U48[bit 61] allows to encode 15 bits of metadata in bits 62:48
of user pointers.
CR4.LAM_SUP[bit 28] allows to encode metadata of supervisor pointers.
If 5-level paging is in use, 6 bits of metadata can be encoded in 62:57.
For 4-level paging, 15 bits of metadata can be encoded in bits 62:48.
QEMU strips address from the metadata bits and gets it to canonical
shape before handling memory access. It has to be done very early before
TLB lookup.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 accel/tcg/cputlb.c        | 54 +++++++++++++++++++++++----------------
 include/hw/core/cpu.h     |  1 +
 target/i386/cpu.c         |  5 ++--
 target/i386/cpu.h         |  7 +++++
 target/i386/excp_helper.c | 28 +++++++++++++++++++-
 target/i386/helper.c      |  2 +-
 6 files changed, 71 insertions(+), 26 deletions(-)
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 42ab79c1a582..f2d27134474f 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1271,6 +1271,17 @@ static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
     return ram_addr;
 }
 
+static vaddr clean_addr(CPUState *cpu, vaddr addr)
+{
+    CPUClass *cc = CPU_GET_CLASS(cpu);
+
+    if (cc->do_clean_addr) {
+        addr = cc->do_clean_addr(cpu, addr);
+    }
+
+    return addr;
+}
+
 /*
  * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the
  * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must
@@ -1702,9 +1713,11 @@ bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
 
 /* Probe for a read-modify-write atomic operation.  Do not allow unaligned
  * operations, or io operations to proceed.  Return the host address.  */
-static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
+static void *atomic_mmu_lookup(CPUArchState *env, target_ulong address,
                                TCGMemOpIdx oi, uintptr_t retaddr)
 {
+    CPUState *cpu = env_cpu(env);
+    target_ulong addr = clean_addr(cpu, address);
     size_t mmu_idx = get_mmuidx(oi);
     uintptr_t index = tlb_index(env, mmu_idx, addr);
     CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
@@ -1720,8 +1733,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
     /* Enforce guest required alignment.  */
     if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) {
         /* ??? Maybe indicate atomic op to cpu_unaligned_access */
-        cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
-                             mmu_idx, retaddr);
+        cpu_unaligned_access(cpu, addr, MMU_DATA_STORE, mmu_idx, retaddr);
     }
 
     /* Enforce qemu required alignment.  */
@@ -1736,8 +1748,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
     /* Check TLB entry and enforce page permissions.  */
     if (!tlb_hit(tlb_addr, addr)) {
         if (!VICTIM_TLB_HIT(addr_write, addr)) {
-            tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE,
-                     mmu_idx, retaddr);
+            tlb_fill(cpu, addr, 1 << s_bits, MMU_DATA_STORE, mmu_idx, retaddr);
             index = tlb_index(env, mmu_idx, addr);
             tlbe = tlb_entry(env, mmu_idx, addr);
         }
@@ -1753,8 +1764,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
 
     /* Let the guest notice RMW on a write-only page.  */
     if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
-        tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD,
-                 mmu_idx, retaddr);
+        tlb_fill(cpu, addr, 1 << s_bits, MMU_DATA_LOAD, mmu_idx, retaddr);
         /* Since we don't support reads and writes to different addresses,
            and we do have the proper page loaded for write, this shouldn't
            ever return.  But just in case, handle via stop-the-world.  */
@@ -1764,14 +1774,14 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
     hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
 
     if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
-        notdirty_write(env_cpu(env), addr, 1 << s_bits,
+        notdirty_write(cpu, addr, 1 << s_bits,
                        &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr);
     }
 
     return hostaddr;
 
  stop_the_world:
-    cpu_loop_exit_atomic(env_cpu(env), retaddr);
+    cpu_loop_exit_atomic(cpu, retaddr);
 }
 
 /*
@@ -1810,10 +1820,12 @@ load_memop(const void *haddr, MemOp op)
 }
 
 static inline uint64_t QEMU_ALWAYS_INLINE
-load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
+load_helper(CPUArchState *env, target_ulong address, TCGMemOpIdx oi,
             uintptr_t retaddr, MemOp op, bool code_read,
             FullLoadHelper *full_load)
 {
+    CPUState *cpu = env_cpu(env);
+    target_ulong addr = clean_addr(cpu, address);
     uintptr_t mmu_idx = get_mmuidx(oi);
     uintptr_t index = tlb_index(env, mmu_idx, addr);
     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
@@ -1829,16 +1841,14 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
 
     /* Handle CPU specific unaligned behaviour */
     if (addr & ((1 << a_bits) - 1)) {
-        cpu_unaligned_access(env_cpu(env), addr, access_type,
-                             mmu_idx, retaddr);
+        cpu_unaligned_access(cpu, addr, access_type, mmu_idx, retaddr);
     }
 
     /* If the TLB entry is for a different page, reload and try again.  */
     if (!tlb_hit(tlb_addr, addr)) {
         if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
                             addr & TARGET_PAGE_MASK)) {
-            tlb_fill(env_cpu(env), addr, size,
-                     access_type, mmu_idx, retaddr);
+            tlb_fill(cpu, addr, size, access_type, mmu_idx, retaddr);
             index = tlb_index(env, mmu_idx, addr);
             entry = tlb_entry(env, mmu_idx, addr);
         }
@@ -1861,7 +1871,7 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
         /* Handle watchpoints.  */
         if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
             /* On watchpoint hit, this will longjmp out.  */
-            cpu_check_watchpoint(env_cpu(env), addr, size,
+            cpu_check_watchpoint(cpu, addr, size,
                                  iotlbentry->attrs, BP_MEM_READ, retaddr);
         }
 
@@ -2341,9 +2351,11 @@ store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
 }
 
 static inline void QEMU_ALWAYS_INLINE
-store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
+store_helper(CPUArchState *env, target_ulong address, uint64_t val,
              TCGMemOpIdx oi, uintptr_t retaddr, MemOp op)
 {
+    CPUState *cpu = env_cpu(env);
+    target_ulong addr = clean_addr(cpu, address);
     uintptr_t mmu_idx = get_mmuidx(oi);
     uintptr_t index = tlb_index(env, mmu_idx, addr);
     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
@@ -2355,16 +2367,14 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
 
     /* Handle CPU specific unaligned behaviour */
     if (addr & ((1 << a_bits) - 1)) {
-        cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
-                             mmu_idx, retaddr);
+        cpu_unaligned_access(cpu, addr, MMU_DATA_STORE, mmu_idx, retaddr);
     }
 
     /* If the TLB entry is for a different page, reload and try again.  */
     if (!tlb_hit(tlb_addr, addr)) {
         if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
             addr & TARGET_PAGE_MASK)) {
-            tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
-                     mmu_idx, retaddr);
+            tlb_fill(cpu, addr, size, MMU_DATA_STORE, mmu_idx, retaddr);
             index = tlb_index(env, mmu_idx, addr);
             entry = tlb_entry(env, mmu_idx, addr);
         }
@@ -2386,7 +2396,7 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
         /* Handle watchpoints.  */
         if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
             /* On watchpoint hit, this will longjmp out.  */
-            cpu_check_watchpoint(env_cpu(env), addr, size,
+            cpu_check_watchpoint(cpu, addr, size,
                                  iotlbentry->attrs, BP_MEM_WRITE, retaddr);
         }
 
@@ -2406,7 +2416,7 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
 
         /* Handle clean RAM pages.  */
         if (tlb_addr & TLB_NOTDIRTY) {
-            notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
+            notdirty_write(cpu, addr, size, iotlbentry, retaddr);
         }
 
         haddr = (void *)((uintptr_t)addr + entry->addend);
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 3d92c967fffa..64817bc10f1b 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -171,6 +171,7 @@ struct CPUClass {
     int reset_dump_flags;
     bool (*has_work)(CPUState *cpu);
     void (*do_interrupt)(CPUState *cpu);
+    vaddr (*do_clean_addr)(CPUState *cpu, vaddr addr);
     void (*do_unaligned_access)(CPUState *cpu, vaddr addr,
                                 MMUAccessType access_type,
                                 int mmu_idx, uintptr_t retaddr);
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 5a8c96072e41..f819f0673103 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -666,7 +666,7 @@ static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
           /* CPUID_7_0_ECX_OSPKE is dynamic */ \
           CPUID_7_0_ECX_LA57)
 #define TCG_7_0_EDX_FEATURES 0
-#define TCG_7_1_EAX_FEATURES 0
+#define TCG_7_1_EAX_FEATURES CPUID_7_1_EAX_LAM
 #define TCG_APM_FEATURES 0
 #define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT
 #define TCG_XSAVE_FEATURES (CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XGETBV1)
@@ -997,7 +997,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
             NULL, NULL, NULL, NULL,
             NULL, NULL, NULL, NULL,
             NULL, NULL, NULL, NULL,
-            NULL, NULL, NULL, NULL,
+            NULL, NULL, "lam", NULL,
             NULL, NULL, NULL, NULL,
         },
         .cpuid = {
@@ -7290,6 +7290,7 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data)
 #ifdef CONFIG_TCG
     cc->tcg_initialize = tcg_x86_init;
     cc->tlb_fill = x86_cpu_tlb_fill;
+    cc->do_clean_addr = x86_cpu_clean_addr;
 #endif
     cc->disas_set_info = x86_disas_set_info;
 
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 88e8586f8fb4..f8477e16685d 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -229,6 +229,9 @@ typedef enum X86Seg {
 #define CR0_AM_MASK  (1U << 18)
 #define CR0_PG_MASK  (1U << 31)
 
+#define CR3_LAM_U48  (1ULL << 61)
+#define CR3_LAM_U57  (1ULL << 62)
+
 #define CR4_VME_MASK  (1U << 0)
 #define CR4_PVI_MASK  (1U << 1)
 #define CR4_TSD_MASK  (1U << 2)
@@ -250,6 +253,7 @@ typedef enum X86Seg {
 #define CR4_SMEP_MASK   (1U << 20)
 #define CR4_SMAP_MASK   (1U << 21)
 #define CR4_PKE_MASK   (1U << 22)
+#define CR4_LAM_SUP    (1U << 28)
 
 #define DR6_BD          (1 << 13)
 #define DR6_BS          (1 << 14)
@@ -796,6 +800,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
 
 /* AVX512 BFloat16 Instruction */
 #define CPUID_7_1_EAX_AVX512_BF16       (1U << 5)
+/* Linear Address Masking */
+#define CPUID_7_1_EAX_LAM               (1U << 26)
 
 /* CLZERO instruction */
 #define CPUID_8000_0008_EBX_CLZERO      (1U << 0)
@@ -1924,6 +1930,7 @@ bool x86_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
                       MMUAccessType access_type, int mmu_idx,
                       bool probe, uintptr_t retaddr);
 void x86_cpu_set_a20(X86CPU *cpu, int a20_state);
+vaddr x86_cpu_clean_addr(CPUState *cpu, vaddr addr);
 
 #ifndef CONFIG_USER_ONLY
 static inline int x86_asidx_from_attrs(CPUState *cs, MemTxAttrs attrs)
diff --git a/target/i386/excp_helper.c b/target/i386/excp_helper.c
index 191471749fbf..edf8194574b2 100644
--- a/target/i386/excp_helper.c
+++ b/target/i386/excp_helper.c
@@ -406,7 +406,7 @@ static int handle_mmu_fault(CPUState *cs, vaddr addr, int size,
             }
 
             if (la57) {
-                pml5e_addr = ((env->cr[3] & ~0xfff) +
+                pml5e_addr = ((env->cr[3] & PG_ADDRESS_MASK) +
                         (((addr >> 48) & 0x1ff) << 3)) & a20_mask;
                 pml5e_addr = get_hphys(cs, pml5e_addr, MMU_DATA_STORE, NULL);
                 pml5e = x86_ldq_phys(cs, pml5e_addr);
@@ -700,3 +700,29 @@ bool x86_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
     return true;
 #endif
 }
+
+static inline int64_t sign_extend64(uint64_t value, int index)
+{
+    int shift = 63 - index;
+    return (int64_t)(value << shift) >> shift;
+}
+
+vaddr x86_cpu_clean_addr(CPUState *cs, vaddr addr)
+{
+    CPUX86State *env = &X86_CPU(cs)->env;
+    bool la57 = env->cr[4] & CR4_LA57_MASK;
+
+    if (addr >> 63) {
+        if (env->cr[4] & CR4_LAM_SUP) {
+            return sign_extend64(addr, la57 ? 56 : 47);
+        }
+    } else {
+        if (env->cr[3] & CR3_LAM_U57) {
+            return sign_extend64(addr, 56);
+        } else if (env->cr[3] & CR3_LAM_U48) {
+            return sign_extend64(addr, 47);
+        }
+    }
+
+    return addr;
+}
diff --git a/target/i386/helper.c b/target/i386/helper.c
index 034f46bcc210..6c099443ce13 100644
--- a/target/i386/helper.c
+++ b/target/i386/helper.c
@@ -753,7 +753,7 @@ hwaddr x86_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr,
             }
 
             if (la57) {
-                pml5e_addr = ((env->cr[3] & ~0xfff) +
+                pml5e_addr = ((env->cr[3] & PG_ADDRESS_MASK) +
                         (((addr >> 48) & 0x1ff) << 3)) & a20_mask;
                 pml5e = x86_ldq_phys(cs, pml5e_addr);
                 if (!(pml5e & PG_PRESENT_MASK)) {
-- 
2.26.2
^ permalink raw reply related	[flat|nested] 33+ messages in thread
- * Re: [RFC 0/9] Linear Address Masking enabling
  2021-02-05 15:16 [RFC 0/9] Linear Address Masking enabling Kirill A. Shutemov
                   ` (10 preceding siblings ...)
  2021-02-05 15:16 ` [QEMU] x86: Implement Linear Address Masking support Kirill A. Shutemov
@ 2021-02-05 15:49 ` Peter Zijlstra
  2021-02-05 16:01   ` Kirill A. Shutemov
  2021-02-07  8:24 ` Dmitry Vyukov
  12 siblings, 1 reply; 33+ messages in thread
From: Peter Zijlstra @ 2021-02-05 15:49 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Dave Hansen, Andy Lutomirski, x86, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Catalin Marinas, Will Deacon,
	H . J . Lu, Andi Kleen, linux-mm, linux-kernel
On Fri, Feb 05, 2021 at 06:16:20PM +0300, Kirill A. Shutemov wrote:
> The feature competes for bits with 5-level paging: LAM_U48 makes it
> impossible to map anything about 47-bits. The patchset made these
> capability mutually exclusive: whatever used first wins. LAM_U57 can be
> combined with mappings above 47-bits.
And I suppose we still can't switch between 4 and 5 level at runtime,
using a CR3 bit?
^ permalink raw reply	[flat|nested] 33+ messages in thread
- * Re: [RFC 0/9] Linear Address Masking enabling
  2021-02-05 15:49 ` [RFC 0/9] Linear Address Masking enabling Peter Zijlstra
@ 2021-02-05 16:01   ` Kirill A. Shutemov
  2021-02-05 16:19     ` Peter Zijlstra
  0 siblings, 1 reply; 33+ messages in thread
From: Kirill A. Shutemov @ 2021-02-05 16:01 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Dave Hansen, Andy Lutomirski, x86, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Catalin Marinas, Will Deacon,
	H . J . Lu, Andi Kleen, linux-mm, linux-kernel
On Fri, Feb 05, 2021 at 04:49:05PM +0100, Peter Zijlstra wrote:
> On Fri, Feb 05, 2021 at 06:16:20PM +0300, Kirill A. Shutemov wrote:
> > The feature competes for bits with 5-level paging: LAM_U48 makes it
> > impossible to map anything about 47-bits. The patchset made these
> > capability mutually exclusive: whatever used first wins. LAM_U57 can be
> > combined with mappings above 47-bits.
> 
> And I suppose we still can't switch between 4 and 5 level at runtime,
> using a CR3 bit?
No. And I can't imagine how would it work with 5-level on kernel side.
-- 
 Kirill A. Shutemov
^ permalink raw reply	[flat|nested] 33+ messages in thread 
- * Re: [RFC 0/9] Linear Address Masking enabling
  2021-02-05 16:01   ` Kirill A. Shutemov
@ 2021-02-05 16:19     ` Peter Zijlstra
  0 siblings, 0 replies; 33+ messages in thread
From: Peter Zijlstra @ 2021-02-05 16:19 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Dave Hansen, Andy Lutomirski, x86, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, Catalin Marinas, Will Deacon,
	H . J . Lu, Andi Kleen, linux-mm, linux-kernel
On Fri, Feb 05, 2021 at 07:01:27PM +0300, Kirill A. Shutemov wrote:
> On Fri, Feb 05, 2021 at 04:49:05PM +0100, Peter Zijlstra wrote:
> > On Fri, Feb 05, 2021 at 06:16:20PM +0300, Kirill A. Shutemov wrote:
> > > The feature competes for bits with 5-level paging: LAM_U48 makes it
> > > impossible to map anything about 47-bits. The patchset made these
> > > capability mutually exclusive: whatever used first wins. LAM_U57 can be
> > > combined with mappings above 47-bits.
> > 
> > And I suppose we still can't switch between 4 and 5 level at runtime,
> > using a CR3 bit?
> 
> No. And I can't imagine how would it work with 5-level on kernel side.
KPTI already switches CR3 on every entry and only maps a very limited
number of kernel pages in the user map. This means a 4 level user
page-table should be possible.
The kernel page-tables would only need to update their p5d[0] on every
4l user change.
Not as nice as actually having separate user and kernel page-tables in
hardware, but it would actually make 5l page-tables useful on machines
with less than stupid amounds of memory I think.
One of the road-blocks to doing per-cpu kernel page-tables is having to
do 2k copies, only having to update a single P5D entry would be ideal.
Ofcourse, once we get 5l user tables we're back to being stupid, but
maybe tasks with that much memory don't actually switch much, who
knows.
^ permalink raw reply	[flat|nested] 33+ messages in thread 
 
 
- * Re: [RFC 0/9] Linear Address Masking enabling
  2021-02-05 15:16 [RFC 0/9] Linear Address Masking enabling Kirill A. Shutemov
                   ` (11 preceding siblings ...)
  2021-02-05 15:49 ` [RFC 0/9] Linear Address Masking enabling Peter Zijlstra
@ 2021-02-07  8:24 ` Dmitry Vyukov
  2021-02-07 14:11   ` Kirill A. Shutemov
  12 siblings, 1 reply; 33+ messages in thread
From: Dmitry Vyukov @ 2021-02-07  8:24 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Dave Hansen, Andy Lutomirski, Peter Zijlstra,
	the arch/x86 maintainers, Andrey Ryabinin, Alexander Potapenko,
	Catalin Marinas, Will Deacon, H . J . Lu, Andi Kleen, Linux-MM,
	LKML
On Fri, Feb 5, 2021 at 4:16 PM Kirill A. Shutemov
<kirill.shutemov@linux.intel.com> wrote:
>
> Linear Address Masking[1] (LAM) modifies the checking that is applied to
> 64-bit linear addresses, allowing software to use of the untranslated
> address bits for metadata.
>
> The patchset brings support for LAM for userspace addresses.
>
> The most sensitive part of enabling is change in tlb.c, where CR3 flags
> get set. Please take a look that what I'm doing makes sense.
>
> The patchset is RFC quality and the code requires more testing before it
> can be applied.
>
> The userspace API is not finalized yet. The patchset extends API used by
> ARM64: PR_GET/SET_TAGGED_ADDR_CTRL. The API is adjusted to not imply ARM
> TBI: it now allows to request a number of bits of metadata needed and
> report where these bits are located in the address.
>
> There's an alternative proposal[2] for the API based on Intel CET
> interface. Please let us know if you prefer one over another.
>
> The feature competes for bits with 5-level paging: LAM_U48 makes it
> impossible to map anything about 47-bits. The patchset made these
> capability mutually exclusive: whatever used first wins. LAM_U57 can be
> combined with mappings above 47-bits.
>
> I include QEMU patch in case if somebody wants to play with the feature.
Exciting! Do you plan to send the QEMU patch to QEMU?
> The branch:
>
>         git://git.kernel.org/pub/scm/linux/kernel/git/kas/linux.git lam
>
> Any comments are welcome.
>
> [1] ISE, Chapter 14. https://software.intel.com/content/dam/develop/external/us/en/documents-tps/architecture-instruction-set-extensions-programming-reference.pdf
> [2] https://github.com/hjl-tools/linux/commit/e85fa032e5b276ddf17edd056f92f599db9e8369
>
> Kirill A. Shutemov (9):
>   mm, arm64: Update PR_SET/GET_TAGGED_ADDR_CTRL interface
>   x86/mm: Fix CR3_ADDR_MASK
>   x86: CPUID and CR3/CR4 flags for Linear Address Masking
>   x86/mm: Introduce TIF_LAM_U57 and TIF_LAM_U48
>   x86/mm: Provide untagged_addr() helper
>   x86/uaccess: Remove tags from the address before checking
>   x86/mm: Handle tagged memory accesses from kernel threads
>   x86/mm: Make LAM_U48 and mappings above 47-bits mutually exclusive
>   x86/mm: Implement PR_SET/GET_TAGGED_ADDR_CTRL with LAM
>
>  arch/arm64/include/asm/processor.h            |  12 +-
>  arch/arm64/kernel/process.c                   |  45 +++++-
>  arch/arm64/kernel/ptrace.c                    |   4 +-
>  arch/x86/include/asm/cpufeatures.h            |   1 +
>  arch/x86/include/asm/elf.h                    |   3 +-
>  arch/x86/include/asm/mmu.h                    |   1 +
>  arch/x86/include/asm/mmu_context.h            |  13 ++
>  arch/x86/include/asm/page_32.h                |   3 +
>  arch/x86/include/asm/page_64.h                |  19 +++
>  arch/x86/include/asm/processor-flags.h        |   2 +-
>  arch/x86/include/asm/processor.h              |  10 ++
>  arch/x86/include/asm/thread_info.h            |   9 +-
>  arch/x86/include/asm/tlbflush.h               |   5 +
>  arch/x86/include/asm/uaccess.h                |  16 +-
>  arch/x86/include/uapi/asm/processor-flags.h   |   6 +
>  arch/x86/kernel/process_64.c                  | 145 ++++++++++++++++++
>  arch/x86/kernel/sys_x86_64.c                  |   5 +-
>  arch/x86/mm/hugetlbpage.c                     |   6 +-
>  arch/x86/mm/mmap.c                            |   9 +-
>  arch/x86/mm/tlb.c                             | 124 +++++++++++++--
>  kernel/sys.c                                  |  14 +-
>  .../testing/selftests/arm64/tags/tags_test.c  |  31 ----
>  .../selftests/{arm64 => vm}/tags/.gitignore   |   0
>  .../selftests/{arm64 => vm}/tags/Makefile     |   0
>  .../{arm64 => vm}/tags/run_tags_test.sh       |   0
>  tools/testing/selftests/vm/tags/tags_test.c   |  57 +++++++
>  26 files changed, 464 insertions(+), 76 deletions(-)
>  delete mode 100644 tools/testing/selftests/arm64/tags/tags_test.c
>  rename tools/testing/selftests/{arm64 => vm}/tags/.gitignore (100%)
>  rename tools/testing/selftests/{arm64 => vm}/tags/Makefile (100%)
>  rename tools/testing/selftests/{arm64 => vm}/tags/run_tags_test.sh (100%)
>  create mode 100644 tools/testing/selftests/vm/tags/tags_test.c
>
> --
> 2.26.2
>
^ permalink raw reply	[flat|nested] 33+ messages in thread
- * Re: [RFC 0/9] Linear Address Masking enabling
  2021-02-07  8:24 ` Dmitry Vyukov
@ 2021-02-07 14:11   ` Kirill A. Shutemov
  2021-09-21 16:52     ` Dmitry Vyukov
  0 siblings, 1 reply; 33+ messages in thread
From: Kirill A. Shutemov @ 2021-02-07 14:11 UTC (permalink / raw)
  To: Dmitry Vyukov
  Cc: Kirill A. Shutemov, Dave Hansen, Andy Lutomirski, Peter Zijlstra,
	the arch/x86 maintainers, Andrey Ryabinin, Alexander Potapenko,
	Catalin Marinas, Will Deacon, H . J . Lu, Andi Kleen, Linux-MM,
	LKML
On Sun, Feb 07, 2021 at 09:24:23AM +0100, Dmitry Vyukov wrote:
> On Fri, Feb 5, 2021 at 4:16 PM Kirill A. Shutemov
> <kirill.shutemov@linux.intel.com> wrote:
> >
> > Linear Address Masking[1] (LAM) modifies the checking that is applied to
> > 64-bit linear addresses, allowing software to use of the untranslated
> > address bits for metadata.
> >
> > The patchset brings support for LAM for userspace addresses.
> >
> > The most sensitive part of enabling is change in tlb.c, where CR3 flags
> > get set. Please take a look that what I'm doing makes sense.
> >
> > The patchset is RFC quality and the code requires more testing before it
> > can be applied.
> >
> > The userspace API is not finalized yet. The patchset extends API used by
> > ARM64: PR_GET/SET_TAGGED_ADDR_CTRL. The API is adjusted to not imply ARM
> > TBI: it now allows to request a number of bits of metadata needed and
> > report where these bits are located in the address.
> >
> > There's an alternative proposal[2] for the API based on Intel CET
> > interface. Please let us know if you prefer one over another.
> >
> > The feature competes for bits with 5-level paging: LAM_U48 makes it
> > impossible to map anything about 47-bits. The patchset made these
> > capability mutually exclusive: whatever used first wins. LAM_U57 can be
> > combined with mappings above 47-bits.
> >
> > I include QEMU patch in case if somebody wants to play with the feature.
> 
> Exciting! Do you plan to send the QEMU patch to QEMU?
Sure. After more testing, once I'm sure it's conforming to the hardware.
-- 
 Kirill A. Shutemov
^ permalink raw reply	[flat|nested] 33+ messages in thread 
- * Re: [RFC 0/9] Linear Address Masking enabling
  2021-02-07 14:11   ` Kirill A. Shutemov
@ 2021-09-21 16:52     ` Dmitry Vyukov
  2021-09-21 17:15       ` H.J. Lu
  0 siblings, 1 reply; 33+ messages in thread
From: Dmitry Vyukov @ 2021-09-21 16:52 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Kirill A. Shutemov, Dave Hansen, Andy Lutomirski, Peter Zijlstra,
	the arch/x86 maintainers, Andrey Ryabinin, Alexander Potapenko,
	Catalin Marinas, Will Deacon, H . J . Lu, Andi Kleen, Linux-MM,
	LKML, Carlos O'Donell, Marco Elver, Taras Madan
On Sun, 7 Feb 2021 at 15:11, Kirill A. Shutemov <kirill@shutemov.name> wrote:
>
> On Sun, Feb 07, 2021 at 09:24:23AM +0100, Dmitry Vyukov wrote:
> > On Fri, Feb 5, 2021 at 4:16 PM Kirill A. Shutemov
> > <kirill.shutemov@linux.intel.com> wrote:
> > >
> > > Linear Address Masking[1] (LAM) modifies the checking that is applied to
> > > 64-bit linear addresses, allowing software to use of the untranslated
> > > address bits for metadata.
> > >
> > > The patchset brings support for LAM for userspace addresses.
> > >
> > > The most sensitive part of enabling is change in tlb.c, where CR3 flags
> > > get set. Please take a look that what I'm doing makes sense.
> > >
> > > The patchset is RFC quality and the code requires more testing before it
> > > can be applied.
> > >
> > > The userspace API is not finalized yet. The patchset extends API used by
> > > ARM64: PR_GET/SET_TAGGED_ADDR_CTRL. The API is adjusted to not imply ARM
> > > TBI: it now allows to request a number of bits of metadata needed and
> > > report where these bits are located in the address.
> > >
> > > There's an alternative proposal[2] for the API based on Intel CET
> > > interface. Please let us know if you prefer one over another.
> > >
> > > The feature competes for bits with 5-level paging: LAM_U48 makes it
> > > impossible to map anything about 47-bits. The patchset made these
> > > capability mutually exclusive: whatever used first wins. LAM_U57 can be
> > > combined with mappings above 47-bits.
> > >
> > > I include QEMU patch in case if somebody wants to play with the feature.
> >
> > Exciting! Do you plan to send the QEMU patch to QEMU?
>
> Sure. After more testing, once I'm sure it's conforming to the hardware.
A follow up after H.J.'s LPC talk:
https://linuxplumbersconf.org/event/11/contributions/1010/
(also +Carlos)
As far as I understood, this kernel series depends on the Intel CET patches.
Where are these compiler-rt patches that block gcc support?
^ permalink raw reply	[flat|nested] 33+ messages in thread 
- * Re: [RFC 0/9] Linear Address Masking enabling
  2021-09-21 16:52     ` Dmitry Vyukov
@ 2021-09-21 17:15       ` H.J. Lu
  2021-09-22  1:15         ` Zhang, Xiang1
  0 siblings, 1 reply; 33+ messages in thread
From: H.J. Lu @ 2021-09-21 17:15 UTC (permalink / raw)
  To: Dmitry Vyukov
  Cc: Kirill A. Shutemov, Kirill A. Shutemov, Dave Hansen,
	Andy Lutomirski, Peter Zijlstra, the arch/x86 maintainers,
	Andrey Ryabinin, Alexander Potapenko, Catalin Marinas,
	Will Deacon, Andi Kleen, Linux-MM, LKML, Carlos O'Donell,
	Marco Elver, Taras Madan, xiang1...@intel.com
On Tue, Sep 21, 2021 at 9:52 AM Dmitry Vyukov <dvyukov@google.com> wrote:
>
> On Sun, 7 Feb 2021 at 15:11, Kirill A. Shutemov <kirill@shutemov.name> wrote:
> >
> > On Sun, Feb 07, 2021 at 09:24:23AM +0100, Dmitry Vyukov wrote:
> > > On Fri, Feb 5, 2021 at 4:16 PM Kirill A. Shutemov
> > > <kirill.shutemov@linux.intel.com> wrote:
> > > >
> > > > Linear Address Masking[1] (LAM) modifies the checking that is applied to
> > > > 64-bit linear addresses, allowing software to use of the untranslated
> > > > address bits for metadata.
> > > >
> > > > The patchset brings support for LAM for userspace addresses.
> > > >
> > > > The most sensitive part of enabling is change in tlb.c, where CR3 flags
> > > > get set. Please take a look that what I'm doing makes sense.
> > > >
> > > > The patchset is RFC quality and the code requires more testing before it
> > > > can be applied.
> > > >
> > > > The userspace API is not finalized yet. The patchset extends API used by
> > > > ARM64: PR_GET/SET_TAGGED_ADDR_CTRL. The API is adjusted to not imply ARM
> > > > TBI: it now allows to request a number of bits of metadata needed and
> > > > report where these bits are located in the address.
> > > >
> > > > There's an alternative proposal[2] for the API based on Intel CET
> > > > interface. Please let us know if you prefer one over another.
> > > >
> > > > The feature competes for bits with 5-level paging: LAM_U48 makes it
> > > > impossible to map anything about 47-bits. The patchset made these
> > > > capability mutually exclusive: whatever used first wins. LAM_U57 can be
> > > > combined with mappings above 47-bits.
> > > >
> > > > I include QEMU patch in case if somebody wants to play with the feature.
> > >
> > > Exciting! Do you plan to send the QEMU patch to QEMU?
> >
> > Sure. After more testing, once I'm sure it's conforming to the hardware.
>
> A follow up after H.J.'s LPC talk:
> https://linuxplumbersconf.org/event/11/contributions/1010/
> (also +Carlos)
>
> As far as I understood, this kernel series depends on the Intel CET patches.
>
> Where are these compiler-rt patches that block gcc support?
Hi Xiang,
Please share your compiler-rt changes for LAM.
-- 
H.J.
^ permalink raw reply	[flat|nested] 33+ messages in thread 
- * RE: [RFC 0/9] Linear Address Masking enabling
  2021-09-21 17:15       ` H.J. Lu
@ 2021-09-22  1:15         ` Zhang, Xiang1
  2021-09-22 12:54           ` Dmitry Vyukov
  0 siblings, 1 reply; 33+ messages in thread
From: Zhang, Xiang1 @ 2021-09-22  1:15 UTC (permalink / raw)
  To: H.J. Lu, Dmitry Vyukov
  Cc: Kirill A. Shutemov, Kirill A. Shutemov, Dave Hansen,
	Lutomirski, Andy, Peter Zijlstra, the arch/x86 maintainers,
	Andrey Ryabinin, Alexander Potapenko, Catalin Marinas,
	Will Deacon, Andi Kleen, Linux-MM, LKML, Carlos O'Donell,
	Marco Elver, Taras Madan
There are already in llvm.org.
One of my old patch is https://reviews.llvm.org/D102472 which has been committed by https://reviews.llvm.org/D102901  and https://reviews.llvm.org/D109790
BR
Xiang
-----Original Message-----
From: H.J. Lu <hjl.tools@gmail.com> 
Sent: Wednesday, September 22, 2021 1:16 AM
To: Dmitry Vyukov <dvyukov@google.com>
Cc: Kirill A. Shutemov <kirill@shutemov.name>; Kirill A. Shutemov <kirill.shutemov@linux.intel.com>; Dave Hansen <dave.hansen@linux.intel.com>; Lutomirski, Andy <luto@kernel.org>; Peter Zijlstra <peterz@infradead.org>; the arch/x86 maintainers <x86@kernel.org>; Andrey Ryabinin <aryabinin@virtuozzo.com>; Alexander Potapenko <glider@google.com>; Catalin Marinas <catalin.marinas@arm.com>; Will Deacon <will@kernel.org>; Andi Kleen <ak@linux.intel.com>; Linux-MM <linux-mm@kvack.org>; LKML <linux-kernel@vger.kernel.org>; Carlos O'Donell <carlos@redhat.com>; Marco Elver <elver@google.com>; Taras Madan <tarasmadan@google.com>; Zhang, Xiang1 <xiang1.zhang@intel.com>
Subject: Re: [RFC 0/9] Linear Address Masking enabling
On Tue, Sep 21, 2021 at 9:52 AM Dmitry Vyukov <dvyukov@google.com> wrote:
>
> On Sun, 7 Feb 2021 at 15:11, Kirill A. Shutemov <kirill@shutemov.name> wrote:
> >
> > On Sun, Feb 07, 2021 at 09:24:23AM +0100, Dmitry Vyukov wrote:
> > > On Fri, Feb 5, 2021 at 4:16 PM Kirill A. Shutemov 
> > > <kirill.shutemov@linux.intel.com> wrote:
> > > >
> > > > Linear Address Masking[1] (LAM) modifies the checking that is 
> > > > applied to 64-bit linear addresses, allowing software to use of 
> > > > the untranslated address bits for metadata.
> > > >
> > > > The patchset brings support for LAM for userspace addresses.
> > > >
> > > > The most sensitive part of enabling is change in tlb.c, where 
> > > > CR3 flags get set. Please take a look that what I'm doing makes sense.
> > > >
> > > > The patchset is RFC quality and the code requires more testing 
> > > > before it can be applied.
> > > >
> > > > The userspace API is not finalized yet. The patchset extends API 
> > > > used by
> > > > ARM64: PR_GET/SET_TAGGED_ADDR_CTRL. The API is adjusted to not 
> > > > imply ARM
> > > > TBI: it now allows to request a number of bits of metadata 
> > > > needed and report where these bits are located in the address.
> > > >
> > > > There's an alternative proposal[2] for the API based on Intel 
> > > > CET interface. Please let us know if you prefer one over another.
> > > >
> > > > The feature competes for bits with 5-level paging: LAM_U48 makes 
> > > > it impossible to map anything about 47-bits. The patchset made 
> > > > these capability mutually exclusive: whatever used first wins. 
> > > > LAM_U57 can be combined with mappings above 47-bits.
> > > >
> > > > I include QEMU patch in case if somebody wants to play with the feature.
> > >
> > > Exciting! Do you plan to send the QEMU patch to QEMU?
> >
> > Sure. After more testing, once I'm sure it's conforming to the hardware.
>
> A follow up after H.J.'s LPC talk:
> https://linuxplumbersconf.org/event/11/contributions/1010/
> (also +Carlos)
>
> As far as I understood, this kernel series depends on the Intel CET patches.
>
> Where are these compiler-rt patches that block gcc support?
Hi Xiang,
Please share your compiler-rt changes for LAM.
--
H.J.
^ permalink raw reply	[flat|nested] 33+ messages in thread 
- * Re: [RFC 0/9] Linear Address Masking enabling
  2021-09-22  1:15         ` Zhang, Xiang1
@ 2021-09-22 12:54           ` Dmitry Vyukov
  2021-09-22 20:03             ` Dmitry Vyukov
  2021-09-23  0:07             ` H.J. Lu
  0 siblings, 2 replies; 33+ messages in thread
From: Dmitry Vyukov @ 2021-09-22 12:54 UTC (permalink / raw)
  To: Zhang, Xiang1
  Cc: H.J. Lu, Kirill A. Shutemov, Kirill A. Shutemov, Dave Hansen,
	Lutomirski, Andy, Peter Zijlstra, the arch/x86 maintainers,
	Andrey Ryabinin, Alexander Potapenko, Catalin Marinas,
	Will Deacon, Andi Kleen, Linux-MM, LKML, Carlos O'Donell,
	Marco Elver, Taras Madan
On Wed, 22 Sept 2021 at 03:15, Zhang, Xiang1 <xiang1.zhang@intel.com> wrote:
>
> There are already in llvm.org.
> One of my old patch is https://reviews.llvm.org/D102472 which has been committed by https://reviews.llvm.org/D102901  and https://reviews.llvm.org/D109790
Hi Xiang,
Good sanitizer patches are upstream!
Please help me to understand the status of other pieces (H.J. you
probably talked about this yesterday, but I wasn't able to build a
complete picture during the talk, I think it will be useful to have
this in written form).
1. The presentation mentions "GCC: enable memory tagging with LAM in
x86 codegen".
What exactly is needed? Isn't LAM transparent for codegen? What's the
status in gcc? Does a corresponding change need to be done in llvm?
2. "Enable LAM in binutils".
This is already upstream in binutils 2.36, right?
3. The mentioned glibc patch:
http://patchwork.ozlabs.org/project/glibc/patch/20210211173711.71736-1-hjl.tools@gmail.com/
Not upstream yet, targeting glibc 2.34.
4. "Avoid pointer operations incompatible with LAM. memmove: mask out
memory tags before comparing pointers".
Is this upstream? Where is the patch? Are there other similar patches?
As a side note, regarding the memmove change: do we really need it?
Memory regions can overlap only if they come from the same
allocation/base object. If they come from different allocations, they
can't overlap (undefined behavior already).
5. Do we need any additional enabling changes in clang/llvm?
6. The kernel patches (this email thread) depend on the CET patches
(for the interface part only). And the CET patches is this, right?
https://lore.kernel.org/linux-doc/?q=x86%2Fcet%2Fshstk
7. Do I miss anything else?
H.J. please upload your slides here:
https://linuxplumbersconf.org/event/11/contributions/1010/
It would help with links and copy-pasting text.
FTR here is the link to the Plumbers talk:
https://youtu.be/zUw0ZVXCwoM?t=10456
Thank you
> BR
> Xiang
>
> -----Original Message-----
> From: H.J. Lu <hjl.tools@gmail.com>
> Sent: Wednesday, September 22, 2021 1:16 AM
> To: Dmitry Vyukov <dvyukov@google.com>
> Cc: Kirill A. Shutemov <kirill@shutemov.name>; Kirill A. Shutemov <kirill.shutemov@linux.intel.com>; Dave Hansen <dave.hansen@linux.intel.com>; Lutomirski, Andy <luto@kernel.org>; Peter Zijlstra <peterz@infradead.org>; the arch/x86 maintainers <x86@kernel.org>; Andrey Ryabinin <aryabinin@virtuozzo.com>; Alexander Potapenko <glider@google.com>; Catalin Marinas <catalin.marinas@arm.com>; Will Deacon <will@kernel.org>; Andi Kleen <ak@linux.intel.com>; Linux-MM <linux-mm@kvack.org>; LKML <linux-kernel@vger.kernel.org>; Carlos O'Donell <carlos@redhat.com>; Marco Elver <elver@google.com>; Taras Madan <tarasmadan@google.com>; Zhang, Xiang1 <xiang1.zhang@intel.com>
> Subject: Re: [RFC 0/9] Linear Address Masking enabling
>
> On Tue, Sep 21, 2021 at 9:52 AM Dmitry Vyukov <dvyukov@google.com> wrote:
> >
> > On Sun, 7 Feb 2021 at 15:11, Kirill A. Shutemov <kirill@shutemov.name> wrote:
> > >
> > > On Sun, Feb 07, 2021 at 09:24:23AM +0100, Dmitry Vyukov wrote:
> > > > On Fri, Feb 5, 2021 at 4:16 PM Kirill A. Shutemov
> > > > <kirill.shutemov@linux.intel.com> wrote:
> > > > >
> > > > > Linear Address Masking[1] (LAM) modifies the checking that is
> > > > > applied to 64-bit linear addresses, allowing software to use of
> > > > > the untranslated address bits for metadata.
> > > > >
> > > > > The patchset brings support for LAM for userspace addresses.
> > > > >
> > > > > The most sensitive part of enabling is change in tlb.c, where
> > > > > CR3 flags get set. Please take a look that what I'm doing makes sense.
> > > > >
> > > > > The patchset is RFC quality and the code requires more testing
> > > > > before it can be applied.
> > > > >
> > > > > The userspace API is not finalized yet. The patchset extends API
> > > > > used by
> > > > > ARM64: PR_GET/SET_TAGGED_ADDR_CTRL. The API is adjusted to not
> > > > > imply ARM
> > > > > TBI: it now allows to request a number of bits of metadata
> > > > > needed and report where these bits are located in the address.
> > > > >
> > > > > There's an alternative proposal[2] for the API based on Intel
> > > > > CET interface. Please let us know if you prefer one over another.
> > > > >
> > > > > The feature competes for bits with 5-level paging: LAM_U48 makes
> > > > > it impossible to map anything about 47-bits. The patchset made
> > > > > these capability mutually exclusive: whatever used first wins.
> > > > > LAM_U57 can be combined with mappings above 47-bits.
> > > > >
> > > > > I include QEMU patch in case if somebody wants to play with the feature.
> > > >
> > > > Exciting! Do you plan to send the QEMU patch to QEMU?
> > >
> > > Sure. After more testing, once I'm sure it's conforming to the hardware.
> >
> > A follow up after H.J.'s LPC talk:
> > https://linuxplumbersconf.org/event/11/contributions/1010/
> > (also +Carlos)
> >
> > As far as I understood, this kernel series depends on the Intel CET patches.
> >
> > Where are these compiler-rt patches that block gcc support?
>
> Hi Xiang,
>
> Please share your compiler-rt changes for LAM.
>
> --
> H.J.
^ permalink raw reply	[flat|nested] 33+ messages in thread 
- * Re: [RFC 0/9] Linear Address Masking enabling
  2021-09-22 12:54           ` Dmitry Vyukov
@ 2021-09-22 20:03             ` Dmitry Vyukov
  2021-09-22 21:33               ` Kirill A. Shutemov
  2021-09-23  0:15               ` H.J. Lu
  2021-09-23  0:07             ` H.J. Lu
  1 sibling, 2 replies; 33+ messages in thread
From: Dmitry Vyukov @ 2021-09-22 20:03 UTC (permalink / raw)
  To: Zhang, Xiang1
  Cc: H.J. Lu, Kirill A. Shutemov, Kirill A. Shutemov, Dave Hansen,
	Lutomirski, Andy, Peter Zijlstra, the arch/x86 maintainers,
	Andrey Ryabinin, Alexander Potapenko, Catalin Marinas,
	Will Deacon, Andi Kleen, Linux-MM, LKML, Carlos O'Donell,
	Marco Elver, Taras Madan
On Wed, 22 Sept 2021 at 14:54, Dmitry Vyukov <dvyukov@google.com> wrote:
>
> On Wed, 22 Sept 2021 at 03:15, Zhang, Xiang1 <xiang1.zhang@intel.com> wrote:
> >
> > There are already in llvm.org.
> > One of my old patch is https://reviews.llvm.org/D102472 which has been committed by https://reviews.llvm.org/D102901  and https://reviews.llvm.org/D109790
>
> Hi Xiang,
>
> Good sanitizer patches are upstream!
>
> Please help me to understand the status of other pieces (H.J. you
> probably talked about this yesterday, but I wasn't able to build a
> complete picture during the talk, I think it will be useful to have
> this in written form).
>
> 1. The presentation mentions "GCC: enable memory tagging with LAM in
> x86 codegen".
> What exactly is needed? Isn't LAM transparent for codegen? What's the
> status in gcc? Does a corresponding change need to be done in llvm?
>
> 2. "Enable LAM in binutils".
> This is already upstream in binutils 2.36, right?
>
> 3. The mentioned glibc patch:
> http://patchwork.ozlabs.org/project/glibc/patch/20210211173711.71736-1-hjl.tools@gmail.com/
> Not upstream yet, targeting glibc 2.34.
Do we need any support in other libc's, e.g. Android bionic?
> 4. "Avoid pointer operations incompatible with LAM. memmove: mask out
> memory tags before comparing pointers".
> Is this upstream? Where is the patch? Are there other similar patches?
>
> As a side note, regarding the memmove change: do we really need it?
> Memory regions can overlap only if they come from the same
> allocation/base object. If they come from different allocations, they
> can't overlap (undefined behavior already).
>
> 5. Do we need any additional enabling changes in clang/llvm?
>
> 6. The kernel patches (this email thread) depend on the CET patches
> (for the interface part only). And the CET patches is this, right?
> https://lore.kernel.org/linux-doc/?q=x86%2Fcet%2Fshstk
>
> 7. Do I miss anything else?
>
> H.J. please upload your slides here:
> https://linuxplumbersconf.org/event/11/contributions/1010/
> It would help with links and copy-pasting text.
>
> FTR here is the link to the Plumbers talk:
> https://youtu.be/zUw0ZVXCwoM?t=10456
>
> Thank you
>
>
> > BR
> > Xiang
> >
> > -----Original Message-----
> > From: H.J. Lu <hjl.tools@gmail.com>
> > Sent: Wednesday, September 22, 2021 1:16 AM
> > To: Dmitry Vyukov <dvyukov@google.com>
> > Cc: Kirill A. Shutemov <kirill@shutemov.name>; Kirill A. Shutemov <kirill.shutemov@linux.intel.com>; Dave Hansen <dave.hansen@linux.intel.com>; Lutomirski, Andy <luto@kernel.org>; Peter Zijlstra <peterz@infradead.org>; the arch/x86 maintainers <x86@kernel.org>; Andrey Ryabinin <aryabinin@virtuozzo.com>; Alexander Potapenko <glider@google.com>; Catalin Marinas <catalin.marinas@arm.com>; Will Deacon <will@kernel.org>; Andi Kleen <ak@linux.intel.com>; Linux-MM <linux-mm@kvack.org>; LKML <linux-kernel@vger.kernel.org>; Carlos O'Donell <carlos@redhat.com>; Marco Elver <elver@google.com>; Taras Madan <tarasmadan@google.com>; Zhang, Xiang1 <xiang1.zhang@intel.com>
> > Subject: Re: [RFC 0/9] Linear Address Masking enabling
> >
> > On Tue, Sep 21, 2021 at 9:52 AM Dmitry Vyukov <dvyukov@google.com> wrote:
> > >
> > > On Sun, 7 Feb 2021 at 15:11, Kirill A. Shutemov <kirill@shutemov.name> wrote:
> > > >
> > > > On Sun, Feb 07, 2021 at 09:24:23AM +0100, Dmitry Vyukov wrote:
> > > > > On Fri, Feb 5, 2021 at 4:16 PM Kirill A. Shutemov
> > > > > <kirill.shutemov@linux.intel.com> wrote:
> > > > > >
> > > > > > Linear Address Masking[1] (LAM) modifies the checking that is
> > > > > > applied to 64-bit linear addresses, allowing software to use of
> > > > > > the untranslated address bits for metadata.
> > > > > >
> > > > > > The patchset brings support for LAM for userspace addresses.
> > > > > >
> > > > > > The most sensitive part of enabling is change in tlb.c, where
> > > > > > CR3 flags get set. Please take a look that what I'm doing makes sense.
> > > > > >
> > > > > > The patchset is RFC quality and the code requires more testing
> > > > > > before it can be applied.
> > > > > >
> > > > > > The userspace API is not finalized yet. The patchset extends API
> > > > > > used by
> > > > > > ARM64: PR_GET/SET_TAGGED_ADDR_CTRL. The API is adjusted to not
> > > > > > imply ARM
> > > > > > TBI: it now allows to request a number of bits of metadata
> > > > > > needed and report where these bits are located in the address.
> > > > > >
> > > > > > There's an alternative proposal[2] for the API based on Intel
> > > > > > CET interface. Please let us know if you prefer one over another.
> > > > > >
> > > > > > The feature competes for bits with 5-level paging: LAM_U48 makes
> > > > > > it impossible to map anything about 47-bits. The patchset made
> > > > > > these capability mutually exclusive: whatever used first wins.
> > > > > > LAM_U57 can be combined with mappings above 47-bits.
> > > > > >
> > > > > > I include QEMU patch in case if somebody wants to play with the feature.
> > > > >
> > > > > Exciting! Do you plan to send the QEMU patch to QEMU?
> > > >
> > > > Sure. After more testing, once I'm sure it's conforming to the hardware.
> > >
> > > A follow up after H.J.'s LPC talk:
> > > https://linuxplumbersconf.org/event/11/contributions/1010/
> > > (also +Carlos)
> > >
> > > As far as I understood, this kernel series depends on the Intel CET patches.
> > >
> > > Where are these compiler-rt patches that block gcc support?
> >
> > Hi Xiang,
> >
> > Please share your compiler-rt changes for LAM.
> >
> > --
> > H.J.
^ permalink raw reply	[flat|nested] 33+ messages in thread 
- * Re: [RFC 0/9] Linear Address Masking enabling
  2021-09-22 20:03             ` Dmitry Vyukov
@ 2021-09-22 21:33               ` Kirill A. Shutemov
  2021-09-23  0:15               ` H.J. Lu
  1 sibling, 0 replies; 33+ messages in thread
From: Kirill A. Shutemov @ 2021-09-22 21:33 UTC (permalink / raw)
  To: Dmitry Vyukov
  Cc: Zhang, Xiang1, H.J. Lu, Kirill A. Shutemov, Dave Hansen,
	Lutomirski, Andy, Peter Zijlstra, the arch/x86 maintainers,
	Andrey Ryabinin, Alexander Potapenko, Catalin Marinas,
	Will Deacon, Andi Kleen, Linux-MM, LKML, Carlos O'Donell,
	Marco Elver, Taras Madan
On Wed, Sep 22, 2021 at 10:03:32PM +0200, Dmitry Vyukov wrote:
> > 6. The kernel patches (this email thread) depend on the CET patches
> > (for the interface part only). And the CET patches is this, right?
> > https://lore.kernel.org/linux-doc/?q=x86%2Fcet%2Fshstk
CET is two part patchset: shstk and ibt. Look for x86/cet/ibt.
At this point we plan to use CET interface, but it's not settled until it
actually lands upstream.
-- 
 Kirill A. Shutemov
^ permalink raw reply	[flat|nested] 33+ messages in thread 
- * Re: [RFC 0/9] Linear Address Masking enabling
  2021-09-22 20:03             ` Dmitry Vyukov
  2021-09-22 21:33               ` Kirill A. Shutemov
@ 2021-09-23  0:15               ` H.J. Lu
  2021-09-23  5:35                 ` Dmitry Vyukov
  1 sibling, 1 reply; 33+ messages in thread
From: H.J. Lu @ 2021-09-23  0:15 UTC (permalink / raw)
  To: Dmitry Vyukov
  Cc: Zhang, Xiang1, Kirill A. Shutemov, Kirill A. Shutemov,
	Dave Hansen, Lutomirski, Andy, Peter Zijlstra,
	the arch/x86 maintainers, Andrey Ryabinin, Alexander Potapenko,
	Catalin Marinas, Will Deacon, Andi Kleen, Linux-MM, LKML,
	Carlos O'Donell, Marco Elver, Taras Madan
On Wed, Sep 22, 2021 at 1:03 PM Dmitry Vyukov <dvyukov@google.com> wrote:
>
> On Wed, 22 Sept 2021 at 14:54, Dmitry Vyukov <dvyukov@google.com> wrote:
> >
> > On Wed, 22 Sept 2021 at 03:15, Zhang, Xiang1 <xiang1.zhang@intel.com> wrote:
> > >
> > > There are already in llvm.org.
> > > One of my old patch is https://reviews.llvm.org/D102472 which has been committed by https://reviews.llvm.org/D102901  and https://reviews.llvm.org/D109790
> >
> > Hi Xiang,
> >
> > Good sanitizer patches are upstream!
> >
> > Please help me to understand the status of other pieces (H.J. you
> > probably talked about this yesterday, but I wasn't able to build a
> > complete picture during the talk, I think it will be useful to have
> > this in written form).
> >
> > 1. The presentation mentions "GCC: enable memory tagging with LAM in
> > x86 codegen".
> > What exactly is needed? Isn't LAM transparent for codegen? What's the
> > status in gcc? Does a corresponding change need to be done in llvm?
> >
> > 2. "Enable LAM in binutils".
> > This is already upstream in binutils 2.36, right?
> >
> > 3. The mentioned glibc patch:
> > http://patchwork.ozlabs.org/project/glibc/patch/20210211173711.71736-1-hjl.tools@gmail.com/
> > Not upstream yet, targeting glibc 2.34.
>
> Do we need any support in other libc's, e.g. Android bionic?
Here is my tagged address API proposal:
https://sourceware.org/pipermail/libc-alpha/2021-August/130382.html
> > 4. "Avoid pointer operations incompatible with LAM. memmove: mask out
> > memory tags before comparing pointers".
> > Is this upstream? Where is the patch? Are there other similar patches?
> >
> > As a side note, regarding the memmove change: do we really need it?
> > Memory regions can overlap only if they come from the same
> > allocation/base object. If they come from different allocations, they
> > can't overlap (undefined behavior already).
> >
> > 5. Do we need any additional enabling changes in clang/llvm?
> >
> > 6. The kernel patches (this email thread) depend on the CET patches
> > (for the interface part only). And the CET patches is this, right?
> > https://lore.kernel.org/linux-doc/?q=x86%2Fcet%2Fshstk
> >
> > 7. Do I miss anything else?
> >
> > H.J. please upload your slides here:
> > https://linuxplumbersconf.org/event/11/contributions/1010/
> > It would help with links and copy-pasting text.
> >
> > FTR here is the link to the Plumbers talk:
> > https://youtu.be/zUw0ZVXCwoM?t=10456
> >
> > Thank you
> >
> >
> > > BR
> > > Xiang
> > >
> > > -----Original Message-----
> > > From: H.J. Lu <hjl.tools@gmail.com>
> > > Sent: Wednesday, September 22, 2021 1:16 AM
> > > To: Dmitry Vyukov <dvyukov@google.com>
> > > Cc: Kirill A. Shutemov <kirill@shutemov.name>; Kirill A. Shutemov <kirill.shutemov@linux.intel.com>; Dave Hansen <dave.hansen@linux.intel.com>; Lutomirski, Andy <luto@kernel.org>; Peter Zijlstra <peterz@infradead.org>; the arch/x86 maintainers <x86@kernel.org>; Andrey Ryabinin <aryabinin@virtuozzo.com>; Alexander Potapenko <glider@google.com>; Catalin Marinas <catalin.marinas@arm.com>; Will Deacon <will@kernel.org>; Andi Kleen <ak@linux.intel.com>; Linux-MM <linux-mm@kvack.org>; LKML <linux-kernel@vger.kernel.org>; Carlos O'Donell <carlos@redhat.com>; Marco Elver <elver@google.com>; Taras Madan <tarasmadan@google.com>; Zhang, Xiang1 <xiang1.zhang@intel.com>
> > > Subject: Re: [RFC 0/9] Linear Address Masking enabling
> > >
> > > On Tue, Sep 21, 2021 at 9:52 AM Dmitry Vyukov <dvyukov@google.com> wrote:
> > > >
> > > > On Sun, 7 Feb 2021 at 15:11, Kirill A. Shutemov <kirill@shutemov.name> wrote:
> > > > >
> > > > > On Sun, Feb 07, 2021 at 09:24:23AM +0100, Dmitry Vyukov wrote:
> > > > > > On Fri, Feb 5, 2021 at 4:16 PM Kirill A. Shutemov
> > > > > > <kirill.shutemov@linux.intel.com> wrote:
> > > > > > >
> > > > > > > Linear Address Masking[1] (LAM) modifies the checking that is
> > > > > > > applied to 64-bit linear addresses, allowing software to use of
> > > > > > > the untranslated address bits for metadata.
> > > > > > >
> > > > > > > The patchset brings support for LAM for userspace addresses.
> > > > > > >
> > > > > > > The most sensitive part of enabling is change in tlb.c, where
> > > > > > > CR3 flags get set. Please take a look that what I'm doing makes sense.
> > > > > > >
> > > > > > > The patchset is RFC quality and the code requires more testing
> > > > > > > before it can be applied.
> > > > > > >
> > > > > > > The userspace API is not finalized yet. The patchset extends API
> > > > > > > used by
> > > > > > > ARM64: PR_GET/SET_TAGGED_ADDR_CTRL. The API is adjusted to not
> > > > > > > imply ARM
> > > > > > > TBI: it now allows to request a number of bits of metadata
> > > > > > > needed and report where these bits are located in the address.
> > > > > > >
> > > > > > > There's an alternative proposal[2] for the API based on Intel
> > > > > > > CET interface. Please let us know if you prefer one over another.
> > > > > > >
> > > > > > > The feature competes for bits with 5-level paging: LAM_U48 makes
> > > > > > > it impossible to map anything about 47-bits. The patchset made
> > > > > > > these capability mutually exclusive: whatever used first wins.
> > > > > > > LAM_U57 can be combined with mappings above 47-bits.
> > > > > > >
> > > > > > > I include QEMU patch in case if somebody wants to play with the feature.
> > > > > >
> > > > > > Exciting! Do you plan to send the QEMU patch to QEMU?
> > > > >
> > > > > Sure. After more testing, once I'm sure it's conforming to the hardware.
> > > >
> > > > A follow up after H.J.'s LPC talk:
> > > > https://linuxplumbersconf.org/event/11/contributions/1010/
> > > > (also +Carlos)
> > > >
> > > > As far as I understood, this kernel series depends on the Intel CET patches.
> > > >
> > > > Where are these compiler-rt patches that block gcc support?
> > >
> > > Hi Xiang,
> > >
> > > Please share your compiler-rt changes for LAM.
> > >
> > > --
> > > H.J.
-- 
H.J.
^ permalink raw reply	[flat|nested] 33+ messages in thread 
- * Re: [RFC 0/9] Linear Address Masking enabling
  2021-09-23  0:15               ` H.J. Lu
@ 2021-09-23  5:35                 ` Dmitry Vyukov
  0 siblings, 0 replies; 33+ messages in thread
From: Dmitry Vyukov @ 2021-09-23  5:35 UTC (permalink / raw)
  To: H.J. Lu
  Cc: Zhang, Xiang1, Kirill A. Shutemov, Kirill A. Shutemov,
	Dave Hansen, Lutomirski, Andy, Peter Zijlstra,
	the arch/x86 maintainers, Andrey Ryabinin, Alexander Potapenko,
	Catalin Marinas, Will Deacon, Andi Kleen, Linux-MM, LKML,
	Carlos O'Donell, Marco Elver, Taras Madan
On Thu, 23 Sept 2021 at 02:15, H.J. Lu <hjl.tools@gmail.com> wrote:
> > > On Wed, 22 Sept 2021 at 03:15, Zhang, Xiang1 <xiang1.zhang@intel.com> wrote:
> > > >
> > > > There are already in llvm.org.
> > > > One of my old patch is https://reviews.llvm.org/D102472 which has been committed by https://reviews.llvm.org/D102901  and https://reviews.llvm.org/D109790
> > >
> > > Hi Xiang,
> > >
> > > Good sanitizer patches are upstream!
> > >
> > > Please help me to understand the status of other pieces (H.J. you
> > > probably talked about this yesterday, but I wasn't able to build a
> > > complete picture during the talk, I think it will be useful to have
> > > this in written form).
> > >
> > > 1. The presentation mentions "GCC: enable memory tagging with LAM in
> > > x86 codegen".
> > > What exactly is needed? Isn't LAM transparent for codegen? What's the
> > > status in gcc? Does a corresponding change need to be done in llvm?
> > >
> > > 2. "Enable LAM in binutils".
> > > This is already upstream in binutils 2.36, right?
> > >
> > > 3. The mentioned glibc patch:
> > > http://patchwork.ozlabs.org/project/glibc/patch/20210211173711.71736-1-hjl.tools@gmail.com/
> > > Not upstream yet, targeting glibc 2.34.
> >
> > Do we need any support in other libc's, e.g. Android bionic?
>
> Here is my tagged address API proposal:
>
> https://sourceware.org/pipermail/libc-alpha/2021-August/130382.html
Thank you, I got the large picture now.
> > > 4. "Avoid pointer operations incompatible with LAM. memmove: mask out
> > > memory tags before comparing pointers".
> > > Is this upstream? Where is the patch? Are there other similar patches?
> > >
> > > As a side note, regarding the memmove change: do we really need it?
> > > Memory regions can overlap only if they come from the same
> > > allocation/base object. If they come from different allocations, they
> > > can't overlap (undefined behavior already).
> > >
> > > 5. Do we need any additional enabling changes in clang/llvm?
> > >
> > > 6. The kernel patches (this email thread) depend on the CET patches
> > > (for the interface part only). And the CET patches is this, right?
> > > https://lore.kernel.org/linux-doc/?q=x86%2Fcet%2Fshstk
> > >
> > > 7. Do I miss anything else?
> > >
> > > H.J. please upload your slides here:
> > > https://linuxplumbersconf.org/event/11/contributions/1010/
> > > It would help with links and copy-pasting text.
> > >
> > > FTR here is the link to the Plumbers talk:
> > > https://youtu.be/zUw0ZVXCwoM?t=10456
> > >
> > > Thank you
> > >
> > >
> > > > BR
> > > > Xiang
> > > >
> > > > -----Original Message-----
> > > > From: H.J. Lu <hjl.tools@gmail.com>
> > > > Sent: Wednesday, September 22, 2021 1:16 AM
> > > > To: Dmitry Vyukov <dvyukov@google.com>
> > > > Cc: Kirill A. Shutemov <kirill@shutemov.name>; Kirill A. Shutemov <kirill.shutemov@linux.intel.com>; Dave Hansen <dave.hansen@linux.intel.com>; Lutomirski, Andy <luto@kernel.org>; Peter Zijlstra <peterz@infradead.org>; the arch/x86 maintainers <x86@kernel.org>; Andrey Ryabinin <aryabinin@virtuozzo.com>; Alexander Potapenko <glider@google.com>; Catalin Marinas <catalin.marinas@arm.com>; Will Deacon <will@kernel.org>; Andi Kleen <ak@linux.intel.com>; Linux-MM <linux-mm@kvack.org>; LKML <linux-kernel@vger.kernel.org>; Carlos O'Donell <carlos@redhat.com>; Marco Elver <elver@google.com>; Taras Madan <tarasmadan@google.com>; Zhang, Xiang1 <xiang1.zhang@intel.com>
> > > > Subject: Re: [RFC 0/9] Linear Address Masking enabling
> > > >
> > > > On Tue, Sep 21, 2021 at 9:52 AM Dmitry Vyukov <dvyukov@google.com> wrote:
> > > > >
> > > > > On Sun, 7 Feb 2021 at 15:11, Kirill A. Shutemov <kirill@shutemov.name> wrote:
> > > > > >
> > > > > > On Sun, Feb 07, 2021 at 09:24:23AM +0100, Dmitry Vyukov wrote:
> > > > > > > On Fri, Feb 5, 2021 at 4:16 PM Kirill A. Shutemov
> > > > > > > <kirill.shutemov@linux.intel.com> wrote:
> > > > > > > >
> > > > > > > > Linear Address Masking[1] (LAM) modifies the checking that is
> > > > > > > > applied to 64-bit linear addresses, allowing software to use of
> > > > > > > > the untranslated address bits for metadata.
> > > > > > > >
> > > > > > > > The patchset brings support for LAM for userspace addresses.
> > > > > > > >
> > > > > > > > The most sensitive part of enabling is change in tlb.c, where
> > > > > > > > CR3 flags get set. Please take a look that what I'm doing makes sense.
> > > > > > > >
> > > > > > > > The patchset is RFC quality and the code requires more testing
> > > > > > > > before it can be applied.
> > > > > > > >
> > > > > > > > The userspace API is not finalized yet. The patchset extends API
> > > > > > > > used by
> > > > > > > > ARM64: PR_GET/SET_TAGGED_ADDR_CTRL. The API is adjusted to not
> > > > > > > > imply ARM
> > > > > > > > TBI: it now allows to request a number of bits of metadata
> > > > > > > > needed and report where these bits are located in the address.
> > > > > > > >
> > > > > > > > There's an alternative proposal[2] for the API based on Intel
> > > > > > > > CET interface. Please let us know if you prefer one over another.
> > > > > > > >
> > > > > > > > The feature competes for bits with 5-level paging: LAM_U48 makes
> > > > > > > > it impossible to map anything about 47-bits. The patchset made
> > > > > > > > these capability mutually exclusive: whatever used first wins.
> > > > > > > > LAM_U57 can be combined with mappings above 47-bits.
> > > > > > > >
> > > > > > > > I include QEMU patch in case if somebody wants to play with the feature.
> > > > > > >
> > > > > > > Exciting! Do you plan to send the QEMU patch to QEMU?
> > > > > >
> > > > > > Sure. After more testing, once I'm sure it's conforming to the hardware.
> > > > >
> > > > > A follow up after H.J.'s LPC talk:
> > > > > https://linuxplumbersconf.org/event/11/contributions/1010/
> > > > > (also +Carlos)
> > > > >
> > > > > As far as I understood, this kernel series depends on the Intel CET patches.
> > > > >
> > > > > Where are these compiler-rt patches that block gcc support?
> > > >
> > > > Hi Xiang,
> > > >
> > > > Please share your compiler-rt changes for LAM.
> > > >
> > > > --
> > > > H.J.
>
>
>
> --
> H.J.
^ permalink raw reply	[flat|nested] 33+ messages in thread 
 
 
- * Re: [RFC 0/9] Linear Address Masking enabling
  2021-09-22 12:54           ` Dmitry Vyukov
  2021-09-22 20:03             ` Dmitry Vyukov
@ 2021-09-23  0:07             ` H.J. Lu
  1 sibling, 0 replies; 33+ messages in thread
From: H.J. Lu @ 2021-09-23  0:07 UTC (permalink / raw)
  To: Dmitry Vyukov
  Cc: Zhang, Xiang1, Kirill A. Shutemov, Kirill A. Shutemov,
	Dave Hansen, Lutomirski, Andy, Peter Zijlstra,
	the arch/x86 maintainers, Andrey Ryabinin, Alexander Potapenko,
	Catalin Marinas, Will Deacon, Andi Kleen, Linux-MM, LKML,
	Carlos O'Donell, Marco Elver, Taras Madan, Hongtao Liu
On Wed, Sep 22, 2021 at 5:54 AM Dmitry Vyukov <dvyukov@google.com> wrote:
>
> On Wed, 22 Sept 2021 at 03:15, Zhang, Xiang1 <xiang1.zhang@intel.com> wrote:
> >
> > There are already in llvm.org.
> > One of my old patch is https://reviews.llvm.org/D102472 which has been committed by https://reviews.llvm.org/D102901  and https://reviews.llvm.org/D109790
>
> Hi Xiang,
>
> Good sanitizer patches are upstream!
>
> Please help me to understand the status of other pieces (H.J. you
> probably talked about this yesterday, but I wasn't able to build a
> complete picture during the talk, I think it will be useful to have
> this in written form).
>
> 1. The presentation mentions "GCC: enable memory tagging with LAM in
> x86 codegen".
> What exactly is needed? Isn't LAM transparent for codegen? What's the
> status in gcc? Does a corresponding change need to be done in llvm?
The current LAM enabled GCC is on users/intel/lam/master branch at
https://gitlab.com/x86-gcc/gcc/-/tree/users/intel/lam/master
Hongtao, please sync libsanitizer with compiler-rt and check if
compiler-rt is up to date.  The LAM enabled GCC run-time uses
the proposed tagged address API.
> 2. "Enable LAM in binutils".
> This is already upstream in binutils 2.36, right?
Correct.
> 3. The mentioned glibc patch:
> http://patchwork.ozlabs.org/project/glibc/patch/20210211173711.71736-1-hjl.tools@gmail.com/
> Not upstream yet, targeting glibc 2.34.
It is targeting glibc 2.35 now.
> 4. "Avoid pointer operations incompatible with LAM. memmove: mask out
> memory tags before comparing pointers".
> Is this upstream? Where is the patch? Are there other similar patches?
The LAM enabled glibc is on users/intel/lam/master branch at:
https://gitlab.com/x86-glibc/glibc/-/tree/users/intel/lam/master
I am considering moving the tagged address API to libc_nonshared.a
for easy backport.
> As a side note, regarding the memmove change: do we really need it?
> Memory regions can overlap only if they come from the same
> allocation/base object. If they come from different allocations, they
> can't overlap (undefined behavior already).
The change isn't needed and has been removed.
> 5. Do we need any additional enabling changes in clang/llvm?
I proposed the tagged address API to support LAM.  compiler-rt
should use it.
> 6. The kernel patches (this email thread) depend on the CET patches
> (for the interface part only). And the CET patches is this, right?
> https://lore.kernel.org/linux-doc/?q=x86%2Fcet%2Fshstk
Yes.
> 7. Do I miss anything else?
No.
> H.J. please upload your slides here:
> https://linuxplumbersconf.org/event/11/contributions/1010/
> It would help with links and copy-pasting text.
Done.
H.J.
> FTR here is the link to the Plumbers talk:
> https://youtu.be/zUw0ZVXCwoM?t=10456
>
> Thank you
>
>
> > BR
> > Xiang
> >
> > -----Original Message-----
> > From: H.J. Lu <hjl.tools@gmail.com>
> > Sent: Wednesday, September 22, 2021 1:16 AM
> > To: Dmitry Vyukov <dvyukov@google.com>
> > Cc: Kirill A. Shutemov <kirill@shutemov.name>; Kirill A. Shutemov <kirill.shutemov@linux.intel.com>; Dave Hansen <dave.hansen@linux.intel.com>; Lutomirski, Andy <luto@kernel.org>; Peter Zijlstra <peterz@infradead.org>; the arch/x86 maintainers <x86@kernel.org>; Andrey Ryabinin <aryabinin@virtuozzo.com>; Alexander Potapenko <glider@google.com>; Catalin Marinas <catalin.marinas@arm.com>; Will Deacon <will@kernel.org>; Andi Kleen <ak@linux.intel.com>; Linux-MM <linux-mm@kvack.org>; LKML <linux-kernel@vger.kernel.org>; Carlos O'Donell <carlos@redhat.com>; Marco Elver <elver@google.com>; Taras Madan <tarasmadan@google.com>; Zhang, Xiang1 <xiang1.zhang@intel.com>
> > Subject: Re: [RFC 0/9] Linear Address Masking enabling
> >
> > On Tue, Sep 21, 2021 at 9:52 AM Dmitry Vyukov <dvyukov@google.com> wrote:
> > >
> > > On Sun, 7 Feb 2021 at 15:11, Kirill A. Shutemov <kirill@shutemov.name> wrote:
> > > >
> > > > On Sun, Feb 07, 2021 at 09:24:23AM +0100, Dmitry Vyukov wrote:
> > > > > On Fri, Feb 5, 2021 at 4:16 PM Kirill A. Shutemov
> > > > > <kirill.shutemov@linux.intel.com> wrote:
> > > > > >
> > > > > > Linear Address Masking[1] (LAM) modifies the checking that is
> > > > > > applied to 64-bit linear addresses, allowing software to use of
> > > > > > the untranslated address bits for metadata.
> > > > > >
> > > > > > The patchset brings support for LAM for userspace addresses.
> > > > > >
> > > > > > The most sensitive part of enabling is change in tlb.c, where
> > > > > > CR3 flags get set. Please take a look that what I'm doing makes sense.
> > > > > >
> > > > > > The patchset is RFC quality and the code requires more testing
> > > > > > before it can be applied.
> > > > > >
> > > > > > The userspace API is not finalized yet. The patchset extends API
> > > > > > used by
> > > > > > ARM64: PR_GET/SET_TAGGED_ADDR_CTRL. The API is adjusted to not
> > > > > > imply ARM
> > > > > > TBI: it now allows to request a number of bits of metadata
> > > > > > needed and report where these bits are located in the address.
> > > > > >
> > > > > > There's an alternative proposal[2] for the API based on Intel
> > > > > > CET interface. Please let us know if you prefer one over another.
> > > > > >
> > > > > > The feature competes for bits with 5-level paging: LAM_U48 makes
> > > > > > it impossible to map anything about 47-bits. The patchset made
> > > > > > these capability mutually exclusive: whatever used first wins.
> > > > > > LAM_U57 can be combined with mappings above 47-bits.
> > > > > >
> > > > > > I include QEMU patch in case if somebody wants to play with the feature.
> > > > >
> > > > > Exciting! Do you plan to send the QEMU patch to QEMU?
> > > >
> > > > Sure. After more testing, once I'm sure it's conforming to the hardware.
> > >
> > > A follow up after H.J.'s LPC talk:
> > > https://linuxplumbersconf.org/event/11/contributions/1010/
> > > (also +Carlos)
> > >
> > > As far as I understood, this kernel series depends on the Intel CET patches.
> > >
> > > Where are these compiler-rt patches that block gcc support?
> >
> > Hi Xiang,
> >
> > Please share your compiler-rt changes for LAM.
> >
> > --
> > H.J.
--
H.J.
^ permalink raw reply	[flat|nested] 33+ messages in thread