All of lore.kernel.org
 help / color / mirror / Atom feed
From: Junaid Shahid <junaids@google.com>
To: linux-kernel@vger.kernel.org
Cc: Ofir Weisse <oweisse@google.com>,
	kvm@vger.kernel.org, pbonzini@redhat.com, jmattson@google.com,
	pjt@google.com, alexandre.chartre@oracle.com, rppt@linux.ibm.com,
	dave.hansen@linux.intel.com, peterz@infradead.org,
	tglx@linutronix.de, luto@kernel.org, linux-mm@kvack.org
Subject: [RFC PATCH 47/47] mm: asi: Properly un/mapping task stack from ASI + tlb flush
Date: Tue, 22 Feb 2022 21:22:23 -0800	[thread overview]
Message-ID: <20220223052223.1202152-48-junaids@google.com> (raw)
In-Reply-To: <20220223052223.1202152-1-junaids@google.com>

From: Ofir Weisse <oweisse@google.com>

There are several locations where this is important. Especially since a
task_struct might be reused, potentially with a different mm.

1. Map in vcpu_run() @ arch/x86/kvm/x86.c
1. Unmap in release_task_stack() @ kernel/fork.c
2. Unmap in do_exit() @ kernel/exit.c
3. Unmap in begin_new_exec() @ fs/exec.c

Signed-off-by: Ofir Weisse <oweisse@google.com>


---
 arch/x86/include/asm/asi.h |  6 ++++
 arch/x86/kvm/x86.c         |  6 ++++
 arch/x86/mm/asi.c          | 59 ++++++++++++++++++++++++++++++++++++++
 fs/exec.c                  |  7 ++++-
 include/asm-generic/asi.h  | 16 +++++++++--
 include/linux/sched.h      |  5 ++++
 kernel/exit.c              |  2 +-
 kernel/fork.c              | 22 +++++++++++++-
 8 files changed, 118 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/asi.h b/arch/x86/include/asm/asi.h
index 6148e65fb0c2..9d8f43981678 100644
--- a/arch/x86/include/asm/asi.h
+++ b/arch/x86/include/asm/asi.h
@@ -87,6 +87,12 @@ void asi_unmap_user(struct asi *asi, void *va, size_t len);
 int  asi_fill_pgtbl_pool(struct asi_pgtbl_pool *pool, uint count, gfp_t flags);
 void asi_clear_pgtbl_pool(struct asi_pgtbl_pool *pool);
 
+int asi_map_task_stack(struct task_struct *tsk, struct asi *asi);
+void asi_unmap_task_stack(struct task_struct *tsk);
+void asi_mark_pages_local_nonsensitive(struct page *pages, uint order,
+                                       struct mm_struct *mm);
+void asi_clear_pages_local_nonsensitive(struct page *pages, uint order);
+
 static inline void asi_init_pgtbl_pool(struct asi_pgtbl_pool *pool)
 {
 	pool->pgtbl_list = NULL;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 294f73e9e71e..718104eefaed 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10122,6 +10122,12 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
 	vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
 	vcpu->arch.l1tf_flush_l1d = true;
 
+	/* We must have current->stack mapped into asi. This function can be
+	 * safely called many times, as it will only do the actual mapping once. */
+	r = asi_map_task_stack(current, vcpu->kvm->asi);
+	if (r != 0)
+		return r;
+
 	for (;;) {
 		if (kvm_vcpu_running(vcpu)) {
 			r = vcpu_enter_guest(vcpu);
diff --git a/arch/x86/mm/asi.c b/arch/x86/mm/asi.c
index 7f2aa1823736..a86ac6644a57 100644
--- a/arch/x86/mm/asi.c
+++ b/arch/x86/mm/asi.c
@@ -1029,6 +1029,45 @@ void asi_unmap(struct asi *asi, void *addr, size_t len, bool flush_tlb)
 		asi_flush_tlb_range(asi, addr, len);
 }
 
+int asi_map_task_stack(struct task_struct *tsk, struct asi *asi)
+{
+        int ret;
+
+        /* If the stack is already mapped to asi - don't need to map it again. */
+        if (tsk->asi_stack_mapped)
+                return 0;
+
+        if (!tsk->mm)
+                return -EINVAL;
+
+        /* If the stack was allocated via the page allocator, we assume the
+         * stack pages were marked with PageNonSensitive, therefore tsk->stack
+         * address is properly aliased. */
+        ret = asi_map(ASI_LOCAL_NONSENSITIVE, tsk->stack, THREAD_SIZE);
+        if (!ret) {
+		tsk->asi_stack_mapped = asi;
+		asi_sync_mapping(asi, tsk->stack, THREAD_SIZE);
+	}
+
+        return ret;
+}
+
+void asi_unmap_task_stack(struct task_struct *tsk)
+{
+        /* No need to unmap if the stack was not mapped to begin with. */
+        if (!tsk->asi_stack_mapped)
+                return;
+
+        if (!tsk->mm)
+                return;
+
+        asi_unmap(ASI_LOCAL_NONSENSITIVE, tsk->stack, THREAD_SIZE,
+                  /* flush_tlb = */ true);
+
+        tsk->asi_stack_mapped = NULL;
+}
+
+
 void *asi_va(unsigned long pa)
 {
 	struct page *page = pfn_to_page(PHYS_PFN(pa));
@@ -1336,3 +1375,23 @@ void asi_unmap_user(struct asi *asi, void *addr, size_t len)
 	}
 }
 EXPORT_SYMBOL_GPL(asi_unmap_user);
+
+void asi_mark_pages_local_nonsensitive(struct page *pages, uint order,
+                                       struct mm_struct *mm)
+{
+        uint i;
+        for (i = 0; i < (1 << order); i++) {
+                __SetPageLocalNonSensitive(pages + i);
+                pages[i].asi_mm = mm;
+	}
+}
+
+void asi_clear_pages_local_nonsensitive(struct page *pages, uint order)
+{
+        uint i;
+        for (i = 0; i < (1 << order); i++) {
+                __ClearPageLocalNonSensitive(pages + i);
+                pages[i].asi_mm = NULL;
+	}
+}
+
diff --git a/fs/exec.c b/fs/exec.c
index 76f3b433e80d..fb9182cf3f33 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -69,6 +69,7 @@
 #include <linux/uaccess.h>
 #include <asm/mmu_context.h>
 #include <asm/tlb.h>
+#include <asm/asi.h>
 
 #include <trace/events/task.h>
 #include "internal.h"
@@ -1238,7 +1239,11 @@ int begin_new_exec(struct linux_binprm * bprm)
 	struct task_struct *me = current;
 	int retval;
 
-        /* TODO: (oweisse) unmap the stack from ASI */
+        /* The old mm is about to be released later on in exec_mmap. We are
+         * reusing the task, including its stack which was mapped to
+         * mm->asi_pgd[0]. We need to asi_unmap the stack, so the destructor of
+         * the mm won't complain on "lingering" asi mappings. */
+        asi_unmap_task_stack(current);
 
 	/* Once we are committed compute the creds */
 	retval = bprm_creds_from_file(bprm);
diff --git a/include/asm-generic/asi.h b/include/asm-generic/asi.h
index 2763cb1a974c..6e9a261a2b9d 100644
--- a/include/asm-generic/asi.h
+++ b/include/asm-generic/asi.h
@@ -66,8 +66,13 @@ static inline struct asi *asi_get_target(void) { return NULL; }
 
 static inline struct asi *asi_get_current(void) { return NULL; }
 
-static inline
-int asi_map_gfp(struct asi *asi, void *addr, size_t len, gfp_t gfp_flags)
+static inline int asi_map_task_stack(struct task_struct *tsk, struct asi *asi)
+{ return 0; }
+
+static inline void asi_unmap_task_stack(struct task_struct *tsk) { }
+
+static inline int asi_map_gfp(struct asi *asi, void *addr, size_t len,
+			      gfp_t gfp_flags)
 {
 	return 0;
 }
@@ -130,6 +135,13 @@ static inline int asi_load_module(struct module* module) {return 0;}
 
 static inline void asi_unload_module(struct module* module) { }
 
+static inline
+void asi_mark_pages_local_nonsensitive(struct page *pages, uint order,
+                                       struct mm_struct *mm) { }
+
+static inline
+void asi_clear_pages_local_nonsensitive(struct page *pages, uint order) { }
+
 #endif  /* !_ASSEMBLY_ */
 
 #endif /* !CONFIG_ADDRESS_SPACE_ISOLATION */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 78c351e35fec..87ad45e52b19 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -67,6 +67,7 @@ struct sighand_struct;
 struct signal_struct;
 struct task_delay_info;
 struct task_group;
+struct asi;
 
 /*
  * Task state bitmask. NOTE! These bits are also
@@ -1470,6 +1471,10 @@ struct task_struct {
 	int				mce_count;
 #endif
 
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+        struct asi *asi_stack_mapped;
+#endif
+
 #ifdef CONFIG_KRETPROBES
 	struct llist_head               kretprobe_instances;
 #endif
diff --git a/kernel/exit.c b/kernel/exit.c
index ab2749cf6887..f21cc21814d1 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -768,7 +768,7 @@ void __noreturn do_exit(long code)
 	profile_task_exit(tsk);
 	kcov_task_exit(tsk);
 
-        /* TODO: (oweisse) unmap the stack from ASI */
+	asi_unmap_task_stack(tsk);
 
 	coredump_task_exit(tsk);
 	ptrace_event(PTRACE_EVENT_EXIT, code);
diff --git a/kernel/fork.c b/kernel/fork.c
index cb147a72372d..876fefc477cb 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -216,7 +216,6 @@ static int free_vm_stack_cache(unsigned int cpu)
 
 static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
 {
-  /* TODO: (oweisse) Add annotation to map the stack into ASI */
 #ifdef CONFIG_VMAP_STACK
 	void *stack;
 	int i;
@@ -269,7 +268,16 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
 	struct page *page = alloc_pages_node(node, THREADINFO_GFP,
 					     THREAD_SIZE_ORDER);
 
+        /* When marking pages as PageLocalNonSesitive we set the page->mm to be
+         * NULL. We must make sure the flag is cleared from the stack pages
+         * before free_pages is called. Otherwise, page->mm will be accessed
+         * which will reuslt in NULL reference. page_address() below will yield
+         * an aliased address after ASI_LOCAL_MAP, thanks to
+         * PageLocalNonSesitive flag. */
 	if (likely(page)) {
+                asi_mark_pages_local_nonsensitive(page,
+                                                  THREAD_SIZE_ORDER,
+                                                  NULL);
 		tsk->stack = kasan_reset_tag(page_address(page));
 		return tsk->stack;
 	}
@@ -301,6 +309,14 @@ static inline void free_thread_stack(struct task_struct *tsk)
 	}
 #endif
 
+        /* We must clear the PageNonSensitive flag before calling free_pages().
+         * Otherwise page->mm (which is NULL) will be accessed, in order to
+         * unmap the pages from ASI. Specifically for the stack, we assume the
+         * pages were already unmapped from ASI before we got here, via
+         * asi_unmap_task_stack(). */
+        asi_clear_pages_local_nonsensitive(virt_to_page(tsk->stack),
+                                                        THREAD_SIZE_ORDER);
+
 	__free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
 }
 # else
@@ -436,6 +452,7 @@ static void release_task_stack(struct task_struct *tsk)
 	if (WARN_ON(READ_ONCE(tsk->__state) != TASK_DEAD))
 		return;  /* Better to leak the stack than to free prematurely */
 
+        asi_unmap_task_stack(tsk);
 	account_kernel_stack(tsk, -1);
 	free_thread_stack(tsk);
 	tsk->stack = NULL;
@@ -916,6 +933,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	 * functions again.
 	 */
 	tsk->stack = stack;
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+	tsk->asi_stack_mapped = NULL;
+#endif
 #ifdef CONFIG_VMAP_STACK
 	tsk->stack_vm_area = stack_vm_area;
 #endif
-- 
2.35.1.473.g83b2b277ed-goog


  parent reply	other threads:[~2022-02-23  5:29 UTC|newest]

Thread overview: 64+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-23  5:21 [RFC PATCH 00/47] Address Space Isolation for KVM Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 01/47] mm: asi: Introduce ASI core API Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 02/47] mm: asi: Add command-line parameter to enable/disable ASI Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 03/47] mm: asi: Switch to unrestricted address space when entering scheduler Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 04/47] mm: asi: ASI support in interrupts/exceptions Junaid Shahid
2022-03-14 15:50   ` Thomas Gleixner
2022-03-15  2:01     ` Junaid Shahid
2022-03-15 12:55       ` Thomas Gleixner
2022-03-15 22:41         ` Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 05/47] mm: asi: Make __get_current_cr3_fast() ASI-aware Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 06/47] mm: asi: ASI page table allocation and free functions Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 07/47] mm: asi: Functions to map/unmap a memory range into ASI page tables Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 08/47] mm: asi: Add basic infrastructure for global non-sensitive mappings Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 09/47] mm: Add __PAGEFLAG_FALSE Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 10/47] mm: asi: Support for global non-sensitive direct map allocations Junaid Shahid
2022-03-23 21:06   ` Matthew Wilcox
2022-03-23 23:48     ` Junaid Shahid
2022-03-24  1:54       ` Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 11/47] mm: asi: Global non-sensitive vmalloc/vmap support Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 12/47] mm: asi: Support for global non-sensitive slab caches Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 13/47] asi: Added ASI memory cgroup flag Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 14/47] mm: asi: Disable ASI API when ASI is not enabled for a process Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 15/47] kvm: asi: Restricted address space for VM execution Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 16/47] mm: asi: Support for mapping non-sensitive pcpu chunks Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 17/47] mm: asi: Aliased direct map for local non-sensitive allocations Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 18/47] mm: asi: Support for pre-ASI-init " Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 19/47] mm: asi: Support for locally nonsensitive page allocations Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 20/47] mm: asi: Support for locally non-sensitive vmalloc allocations Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 21/47] mm: asi: Add support for locally non-sensitive VM_USERMAP pages Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 22/47] mm: asi: Added refcounting when initilizing an asi Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 23/47] mm: asi: Add support for mapping all userspace memory into ASI Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 24/47] mm: asi: Support for local non-sensitive slab caches Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 25/47] mm: asi: Avoid warning from NMI userspace accesses in ASI context Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 26/47] mm: asi: Use separate PCIDs for restricted address spaces Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 27/47] mm: asi: Avoid TLB flushes during ASI CR3 switches when possible Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 28/47] mm: asi: Avoid TLB flush IPIs to CPUs not in ASI context Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 29/47] mm: asi: Reduce TLB flushes when freeing pages asynchronously Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 30/47] mm: asi: Add API for mapping userspace address ranges Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 31/47] mm: asi: Support for non-sensitive SLUB caches Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 32/47] x86: asi: Allocate FPU state separately when ASI is enabled Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 33/47] kvm: asi: Map guest memory into restricted ASI address space Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 34/47] kvm: asi: Unmap guest memory from ASI address space when using nested virt Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 35/47] mm: asi: asi_exit() on PF, skip handling if address is accessible Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 36/47] mm: asi: Adding support for dynamic percpu ASI allocations Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 37/47] mm: asi: ASI annotation support for static variables Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 38/47] mm: asi: ASI annotation support for dynamic modules Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 39/47] mm: asi: Skip conventional L1TF/MDS mitigations Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 40/47] mm: asi: support for static percpu DEFINE_PER_CPU*_ASI Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 41/47] mm: asi: Annotation of static variables to be nonsensitive Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 42/47] mm: asi: Annotation of PERCPU " Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 43/47] mm: asi: Annotation of dynamic " Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 44/47] kvm: asi: Splitting kvm_vcpu_arch into non/sensitive parts Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 45/47] mm: asi: Mapping global nonsensitive areas in asi_global_init Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 46/47] kvm: asi: Do asi_exit() in vcpu_run loop before returning to userspace Junaid Shahid
2022-02-23  5:22 ` Junaid Shahid [this message]
2022-03-05  3:39 ` [RFC PATCH 00/47] Address Space Isolation for KVM Hyeonggon Yoo
2022-03-16 21:34 ` Alexandre Chartre
2022-03-17 23:25   ` Junaid Shahid
2022-03-22  9:46     ` Alexandre Chartre
2022-03-23 19:35       ` Junaid Shahid
2022-04-08  8:52         ` Alexandre Chartre
2022-04-11  3:26           ` junaid_shahid
2022-03-16 22:49 ` Thomas Gleixner
2022-03-17 21:24   ` Junaid Shahid

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220223052223.1202152-48-junaids@google.com \
    --to=junaids@google.com \
    --cc=alexandre.chartre@oracle.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=jmattson@google.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=oweisse@google.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=pjt@google.com \
    --cc=rppt@linux.ibm.com \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.