[RFC PATCH 29/47] mm: asi: Reduce TLB flushes when freeing pages asynchronously

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Junaid Shahid <junaids@google.com>
To: linux-kernel@vger.kernel.org
Cc: kvm@vger.kernel.org, pbonzini@redhat.com, jmattson@google.com,
	pjt@google.com, oweisse@google.com, alexandre.chartre@oracle.com,
	rppt@linux.ibm.com, dave.hansen@linux.intel.com,
	peterz@infradead.org, tglx@linutronix.de, luto@kernel.org,
	linux-mm@kvack.org
Subject: [RFC PATCH 29/47] mm: asi: Reduce TLB flushes when freeing pages asynchronously
Date: Tue, 22 Feb 2022 21:22:05 -0800	[thread overview]
Message-ID: <20220223052223.1202152-30-junaids@google.com> (raw)
In-Reply-To: <20220223052223.1202152-1-junaids@google.com>

When we are freeing pages asynchronously (because the original free
was issued with IRQs disabled), issue only one TLB flush per execution
of the async work function. If there is only one page to free, we do a
targeted flush for that page only. Otherwise, we just do a full flush.

Signed-off-by: Junaid Shahid <junaids@google.com>


---
 arch/x86/include/asm/tlbflush.h |  8 +++++
 arch/x86/mm/tlb.c               | 52 ++++++++++++++++++++-------------
 include/linux/mm_types.h        | 30 +++++++++++++------
 mm/page_alloc.c                 | 40 ++++++++++++++++++++-----
 4 files changed, 93 insertions(+), 37 deletions(-)

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 85315d1d2d70..7d04aa2a5f86 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -296,6 +296,14 @@ unsigned long build_cr3_pcid(pgd_t *pgd, u16 pcid, bool noflush);
 u16 kern_pcid(u16 asid);
 u16 asi_pcid(struct asi *asi, u16 asid);
 
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+
+void __asi_prepare_tlb_flush(struct asi *asi, u64 *new_tlb_gen);
+void __asi_flush_tlb_range(u64 mm_context_id, u16 pcid_index, u64 new_tlb_gen,
+			   size_t start, size_t end, const cpumask_t *cpu_mask);
+
+#endif /* CONFIG_ADDRESS_SPACE_ISOLATION */
+
 #endif /* !MODULE */
 
 #endif /* _ASM_X86_TLBFLUSH_H */
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 2a442335501f..fcd2c8e92f83 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -1302,21 +1302,10 @@ static bool is_asi_active_on_cpu(int cpu, void *info)
 	return per_cpu(asi_cpu_state.curr_asi, cpu);
 }
 
-void asi_flush_tlb_range(struct asi *asi, void *addr, size_t len)
+void __asi_prepare_tlb_flush(struct asi *asi, u64 *new_tlb_gen)
 {
-	size_t start = (size_t)addr;
-	size_t end = start + len;
-	struct flush_tlb_info *info;
-	u64 mm_context_id;
-	const cpumask_t *cpu_mask;
-	u64 new_tlb_gen = 0;
-
-	if (!static_cpu_has(X86_FEATURE_ASI))
-		return;
-
 	if (static_cpu_has(X86_FEATURE_PCID)) {
-		new_tlb_gen = atomic64_inc_return(asi->tlb_gen);
-
+		*new_tlb_gen = atomic64_inc_return(asi->tlb_gen);
 		/*
 		 * The increment of tlb_gen must happen before the curr_asi
 		 * reads in is_asi_active_on_cpu(). That ensures that if another
@@ -1326,8 +1315,35 @@ void asi_flush_tlb_range(struct asi *asi, void *addr, size_t len)
 		 */
 		smp_mb__after_atomic();
 	}
+}
+
+void __asi_flush_tlb_range(u64 mm_context_id, u16 pcid_index, u64 new_tlb_gen,
+			   size_t start, size_t end, const cpumask_t *cpu_mask)
+{
+	struct flush_tlb_info *info;
 
 	preempt_disable();
+	info = get_flush_tlb_info(NULL, start, end, 0, false, new_tlb_gen,
+				  mm_context_id, pcid_index);
+
+	on_each_cpu_cond_mask(is_asi_active_on_cpu, do_asi_tlb_flush, info,
+			      true, cpu_mask);
+	put_flush_tlb_info();
+	preempt_enable();
+}
+
+void asi_flush_tlb_range(struct asi *asi, void *addr, size_t len)
+{
+	size_t start = (size_t)addr;
+	size_t end = start + len;
+	u64 mm_context_id;
+	u64 new_tlb_gen = 0;
+	const cpumask_t *cpu_mask;
+
+	if (!static_cpu_has(X86_FEATURE_ASI))
+		return;
+
+	__asi_prepare_tlb_flush(asi, &new_tlb_gen);
 
 	if (asi == ASI_GLOBAL_NONSENSITIVE) {
 		mm_context_id = U64_MAX;
@@ -1337,14 +1353,8 @@ void asi_flush_tlb_range(struct asi *asi, void *addr, size_t len)
 		cpu_mask = mm_cpumask(asi->mm);
 	}
 
-	info = get_flush_tlb_info(NULL, start, end, 0, false, new_tlb_gen,
-				  mm_context_id, asi->pcid_index);
-
-	on_each_cpu_cond_mask(is_asi_active_on_cpu, do_asi_tlb_flush, info,
-			      true, cpu_mask);
-
-	put_flush_tlb_info();
-	preempt_enable();
+	__asi_flush_tlb_range(mm_context_id, asi->pcid_index, new_tlb_gen,
+			      start, end, cpu_mask);
 }
 
 #endif
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 56511adc263e..7d38229ca85c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -193,21 +193,33 @@ struct page {
 		/** @rcu_head: You can use this to free a page by RCU. */
 		struct rcu_head rcu_head;
 
-#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+#if defined(CONFIG_ADDRESS_SPACE_ISOLATION) && !defined(BUILD_VDSO32)
 		struct {
 			/* Links the pages_to_free_async list */
 			struct llist_node async_free_node;
 
 			unsigned long _asi_pad_1;
-			unsigned long _asi_pad_2;
+			u64 asi_tlb_gen;
 
-			/*
-			 * Upon allocation of a locally non-sensitive page, set
-			 * to the allocating mm. Must be set to the same mm when
-			 * the page is freed. May potentially be overwritten in
-			 * the meantime, as long as it is restored before free.
-			 */
-			struct mm_struct *asi_mm;
+			union {
+				/*
+				 * Upon allocation of a locally non-sensitive
+				 * page, set to the allocating mm. Must be set
+				 * to the same mm when the page is freed. May
+				 * potentially be overwritten in the meantime,
+				 * as long as it is restored before free.
+				 */
+				struct mm_struct *asi_mm;
+
+				/*
+				 * Set to the above mm's context ID if the page
+				 * is being freed asynchronously. Can't directly
+				 * use the mm_struct, unless we take additional
+				 * steps to avoid it from being freed while the
+				 * async work is pending.
+				 */
+				u64 asi_mm_ctx_id;
+			};
 		};
 #endif
 	};
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 01784bff2a80..998ff6a56732 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5182,20 +5182,41 @@ static void async_free_work_fn(struct work_struct *work)
 {
 	struct page *page, *tmp;
 	struct llist_node *pages_to_free;
-	void *va;
-	size_t len;
+	size_t addr;
 	uint order;
 
 	pages_to_free = llist_del_all(this_cpu_ptr(&pages_to_free_async));
 
-	/* A later patch will do a more optimized TLB flush. */
+	if (!pages_to_free)
+		return;
+
+	/* If we only have one page to free, then do a targeted TLB flush. */
+	if (!llist_next(pages_to_free)) {
+		page = llist_entry(pages_to_free, struct page, async_free_node);
+		addr = (size_t)page_to_virt(page);
+		order = page->private;
+
+		__asi_flush_tlb_range(page->asi_mm_ctx_id, 0, page->asi_tlb_gen,
+				      addr, addr + PAGE_SIZE * (1 << order),
+				      cpu_online_mask);
+		/* Need to clear, since it shares space with page->mapping. */
+		page->asi_tlb_gen = 0;
+
+		__free_the_page(page, order);
+		return;
+	}
+
+	/*
+	 * Otherwise, do a full flush. We could potentially try to optimize it
+	 * via taking a union of what needs to be flushed, but it may not be
+	 * worth the additional complexity.
+	 */
+	asi_flush_tlb_range(ASI_GLOBAL_NONSENSITIVE, 0, TLB_FLUSH_ALL);
 
 	llist_for_each_entry_safe(page, tmp, pages_to_free, async_free_node) {
-		va = page_to_virt(page);
 		order = page->private;
-		len = PAGE_SIZE * (1 << order);
-
-		asi_flush_tlb_range(ASI_GLOBAL_NONSENSITIVE, va, len);
+		/* Need to clear, since it shares space with page->mapping. */
+		page->asi_tlb_gen = 0;
 		__free_the_page(page, order);
 	}
 }
@@ -5291,6 +5312,11 @@ static bool asi_unmap_freed_pages(struct page *page, unsigned int order)
 	if (!async_flush_needed)
 		return true;
 
+	page->asi_mm_ctx_id = PageGlobalNonSensitive(page)
+			      ? U64_MAX : asi->mm->context.ctx_id;
+
+	__asi_prepare_tlb_flush(asi, &page->asi_tlb_gen);
+
 	page->private = order;
 	llist_add(&page->async_free_node, this_cpu_ptr(&pages_to_free_async));
 
-- 
2.35.1.473.g83b2b277ed-goog

next prev parent reply	other threads:[~2022-02-23  5:26 UTC|newest]

Thread overview: 64+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-23  5:21 [RFC PATCH 00/47] Address Space Isolation for KVM Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 01/47] mm: asi: Introduce ASI core API Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 02/47] mm: asi: Add command-line parameter to enable/disable ASI Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 03/47] mm: asi: Switch to unrestricted address space when entering scheduler Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 04/47] mm: asi: ASI support in interrupts/exceptions Junaid Shahid
2022-03-14 15:50   ` Thomas Gleixner
2022-03-15  2:01     ` Junaid Shahid
2022-03-15 12:55       ` Thomas Gleixner
2022-03-15 22:41         ` Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 05/47] mm: asi: Make __get_current_cr3_fast() ASI-aware Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 06/47] mm: asi: ASI page table allocation and free functions Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 07/47] mm: asi: Functions to map/unmap a memory range into ASI page tables Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 08/47] mm: asi: Add basic infrastructure for global non-sensitive mappings Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 09/47] mm: Add __PAGEFLAG_FALSE Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 10/47] mm: asi: Support for global non-sensitive direct map allocations Junaid Shahid
2022-03-23 21:06   ` Matthew Wilcox
2022-03-23 23:48     ` Junaid Shahid
2022-03-24  1:54       ` Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 11/47] mm: asi: Global non-sensitive vmalloc/vmap support Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 12/47] mm: asi: Support for global non-sensitive slab caches Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 13/47] asi: Added ASI memory cgroup flag Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 14/47] mm: asi: Disable ASI API when ASI is not enabled for a process Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 15/47] kvm: asi: Restricted address space for VM execution Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 16/47] mm: asi: Support for mapping non-sensitive pcpu chunks Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 17/47] mm: asi: Aliased direct map for local non-sensitive allocations Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 18/47] mm: asi: Support for pre-ASI-init " Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 19/47] mm: asi: Support for locally nonsensitive page allocations Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 20/47] mm: asi: Support for locally non-sensitive vmalloc allocations Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 21/47] mm: asi: Add support for locally non-sensitive VM_USERMAP pages Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 22/47] mm: asi: Added refcounting when initilizing an asi Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 23/47] mm: asi: Add support for mapping all userspace memory into ASI Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 24/47] mm: asi: Support for local non-sensitive slab caches Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 25/47] mm: asi: Avoid warning from NMI userspace accesses in ASI context Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 26/47] mm: asi: Use separate PCIDs for restricted address spaces Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 27/47] mm: asi: Avoid TLB flushes during ASI CR3 switches when possible Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 28/47] mm: asi: Avoid TLB flush IPIs to CPUs not in ASI context Junaid Shahid
2022-02-23  5:22 ` Junaid Shahid [this message]
2022-02-23  5:22 ` [RFC PATCH 30/47] mm: asi: Add API for mapping userspace address ranges Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 31/47] mm: asi: Support for non-sensitive SLUB caches Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 32/47] x86: asi: Allocate FPU state separately when ASI is enabled Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 33/47] kvm: asi: Map guest memory into restricted ASI address space Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 34/47] kvm: asi: Unmap guest memory from ASI address space when using nested virt Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 35/47] mm: asi: asi_exit() on PF, skip handling if address is accessible Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 36/47] mm: asi: Adding support for dynamic percpu ASI allocations Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 37/47] mm: asi: ASI annotation support for static variables Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 38/47] mm: asi: ASI annotation support for dynamic modules Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 39/47] mm: asi: Skip conventional L1TF/MDS mitigations Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 40/47] mm: asi: support for static percpu DEFINE_PER_CPU*_ASI Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 41/47] mm: asi: Annotation of static variables to be nonsensitive Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 42/47] mm: asi: Annotation of PERCPU " Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 43/47] mm: asi: Annotation of dynamic " Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 44/47] kvm: asi: Splitting kvm_vcpu_arch into non/sensitive parts Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 45/47] mm: asi: Mapping global nonsensitive areas in asi_global_init Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 46/47] kvm: asi: Do asi_exit() in vcpu_run loop before returning to userspace Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 47/47] mm: asi: Properly un/mapping task stack from ASI + tlb flush Junaid Shahid
2022-03-05  3:39 ` [RFC PATCH 00/47] Address Space Isolation for KVM Hyeonggon Yoo
2022-03-16 21:34 ` Alexandre Chartre
2022-03-17 23:25   ` Junaid Shahid
2022-03-22  9:46     ` Alexandre Chartre
2022-03-23 19:35       ` Junaid Shahid
2022-04-08  8:52         ` Alexandre Chartre
2022-04-11  3:26           ` junaid_shahid
2022-03-16 22:49 ` Thomas Gleixner
2022-03-17 21:24   ` Junaid Shahid

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:85315d1d2d7 dfblob:7d04aa2a5f8 dfblob:2a442335501
dfblob:fcd2c8e92f8 dfblob:56511adc263 dfblob:7d38229ca85
dfblob:01784bff2a8 dfblob:998ff6a5673 )
 OR (
bs:"[RFC PATCH 29/47] mm: asi: Reduce TLB flushes when freeing pages asynchronously" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220223052223.1202152-30-junaids@google.com \
    --to=junaids@google.com \
    --cc=alexandre.chartre@oracle.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=jmattson@google.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=oweisse@google.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=pjt@google.com \
    --cc=rppt@linux.ibm.com \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox