linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Dave Hansen <dave.hansen@intel.com>
To: Jason Gunthorpe <jgg@nvidia.com>, Baolu Lu <baolu.lu@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	Joerg Roedel <joro@8bytes.org>, Will Deacon <will@kernel.org>,
	Robin Murphy <robin.murphy@arm.com>,
	Kevin Tian <kevin.tian@intel.com>, Jann Horn <jannh@google.com>,
	Vasant Hegde <vasant.hegde@amd.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	Alistair Popple <apopple@nvidia.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Uladzislau Rezki <urezki@gmail.com>,
	Jean-Philippe Brucker <jean-philippe@linaro.org>,
	Andy Lutomirski <luto@kernel.org>, Yi Lai <yi1.lai@intel.com>,
	David Hildenbrand <david@redhat.com>,
	Lorenzo Stoakes <lorenzo.stoakes@oracle.com>,
	"Liam R . Howlett" <Liam.Howlett@oracle.com>,
	Vlastimil Babka <vbabka@suse.cz>, Mike Rapoport <rppt@kernel.org>,
	Michal Hocko <mhocko@kernel.org>,
	Matthew Wilcox <willy@infradead.org>,
	iommu@lists.linux.dev, security@kernel.org, x86@kernel.org,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org, "Jiang,
	Dave" <dave.jiang@intel.com>,
	Vinicius Costa Gomes <vinicius.gomes@intel.com>
Subject: Re: [PATCH v6 0/7] Fix stale IOTLB entries for kernel address space
Date: Fri, 17 Oct 2025 10:28:50 -0700	[thread overview]
Message-ID: <c3eee56a-7fe3-454c-878f-cff37467fb7e@intel.com> (raw)
In-Reply-To: <20251017140101.GM3901471@nvidia.com>

[-- Attachment #1: Type: text/plain, Size: 745 bytes --]

On 10/17/25 07:01, Jason Gunthorpe wrote:
>>> The other alternative is to have arch_vmap_pmd_supported() return false
>>> when SVA is active, or maybe when it's supported on the platform.
>>>
>>> Either of those are 10-ish lines of code and easy to backport.
>> Hi iommu folks, any insights on this?
> IDK, the only SVA user on x86 I know is IDXD, so if you do the above
> plan you break IDXD in all stable kernels. Doesn't sound OK?

Vinicius, any thoughts on this?

I'm thinking that even messing with arch_vmap_pmd_supported() would be
suboptimal. The easiest thing is to just stick the attached patch in
stable kernels and disable SVA at compile time.

There just aren't enough SVA users out in the wild to justify more
complexity than this.

[-- Attachment #2: svm.patch --]
[-- Type: text/x-patch, Size: 2868 bytes --]

diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index c9103a6fa06e..0b0e0283994f 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -124,7 +124,8 @@ bool emulate_vsyscall(unsigned long error_code,
 	if ((error_code & (X86_PF_WRITE | X86_PF_USER)) != X86_PF_USER)
 		return false;
 
-	if (!(error_code & X86_PF_INSTR)) {
+	/* Avoid emulation unless userspace was executing from vsyscall page: */
+	if (address != regs->ip) {
 		/* Failed vsyscall read */
 		if (vsyscall_mode == EMULATE)
 			return false;
@@ -136,13 +137,16 @@ bool emulate_vsyscall(unsigned long error_code,
 		return false;
 	}
 
+
+	/* X86_PF_INSTR is only set when NX is supported: */
+	if (cpu_feature_enabled(X86_FEATURE_NX))
+		WARN_ON_ONCE(!(error_code & X86_PF_INSTR));
+
 	/*
 	 * No point in checking CS -- the only way to get here is a user mode
 	 * trap to a high address, which means that we're in 64-bit user code.
 	 */
 
-	WARN_ON_ONCE(address != regs->ip);
-
 	if (vsyscall_mode == NONE) {
 		warn_bad_vsyscall(KERN_INFO, regs,
 				  "vsyscall attempted with vsyscall=none");
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 39f80111e6f1..e3ce9b0b2447 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -665,6 +665,7 @@ static unsigned long mm_mangle_tif_spec_bits(struct task_struct *next)
 static void cond_mitigation(struct task_struct *next)
 {
 	unsigned long prev_mm, next_mm;
+	bool userspace_needs_ibpb = false;
 
 	if (!next || !next->mm)
 		return;
@@ -722,7 +723,7 @@ static void cond_mitigation(struct task_struct *next)
 		 */
 		if (next_mm != prev_mm &&
 		    (next_mm | prev_mm) & LAST_USER_MM_IBPB)
-			indirect_branch_prediction_barrier();
+			userspace_needs_ibpb = true;
 	}
 
 	if (static_branch_unlikely(&switch_mm_always_ibpb)) {
@@ -732,9 +733,11 @@ static void cond_mitigation(struct task_struct *next)
 		 * last on this CPU.
 		 */
 		if ((prev_mm & ~LAST_USER_MM_SPEC_MASK) != (unsigned long)next->mm)
-			indirect_branch_prediction_barrier();
+			userspace_needs_ibpb = true;
 	}
 
+	this_cpu_write(x86_ibpb_exit_to_user, userspace_needs_ibpb);
+
 	if (static_branch_unlikely(&switch_mm_cond_l1d_flush)) {
 		/*
 		 * Flush L1D when the outgoing task requested it and/or
diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index f2f538c70650..a5d66bfd9e50 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -48,7 +48,10 @@ config INTEL_IOMMU_DEBUGFS
 
 config INTEL_IOMMU_SVM
 	bool "Support for Shared Virtual Memory with Intel IOMMU"
-	depends on X86_64
+	# The kernel does not invalidate IOTLB entries when freeing
+	# kernel page tables. This can lead to IOMMUs walking (and
+	# writing to) CPU page tables after they are freed.
+	depends on BROKEN
 	select MMU_NOTIFIER
 	select IOMMU_SVA
 	help

  reply	other threads:[~2025-10-17 17:28 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-14 13:04 [PATCH v6 0/7] Fix stale IOTLB entries for kernel address space Lu Baolu
2025-10-14 13:04 ` [PATCH v6 1/7] mm: Add a ptdesc flag to mark kernel page tables Lu Baolu
2025-10-16 19:26   ` David Hildenbrand
2025-10-14 13:04 ` [PATCH v6 2/7] mm: Actually mark kernel page table pages Lu Baolu
2025-10-14 13:04 ` [PATCH v6 3/7] x86/mm: Use 'ptdesc' when freeing PMD pages Lu Baolu
2025-10-14 23:19   ` Dave Hansen
2025-10-15  5:19     ` Baolu Lu
2025-10-16 19:33   ` David Hildenbrand
2025-10-14 13:04 ` [PATCH v6 4/7] mm: Introduce pure page table freeing function Lu Baolu
2025-10-14 13:04 ` [PATCH v6 5/7] x86/mm: Use pagetable_free() Lu Baolu
2025-10-14 13:04 ` [PATCH v6 6/7] mm: Introduce deferred freeing for kernel page tables Lu Baolu
2025-10-16 19:35   ` David Hildenbrand
2025-10-17  1:29     ` Baolu Lu
2025-10-14 13:04 ` [PATCH v6 7/7] iommu/sva: Invalidate stale IOTLB entries for kernel address space Lu Baolu
2025-10-14 20:59 ` [syzbot ci] Re: Fix " syzbot ci
2025-10-15 16:25   ` Dave Hansen
2025-10-16  8:00     ` Baolu Lu
2025-10-17 17:05       ` Dave Hansen
2025-10-17 17:10       ` David Hildenbrand
2025-10-20  5:34         ` Baolu Lu
2025-10-20 14:26           ` David Hildenbrand
2025-10-15  0:43 ` [PATCH v6 0/7] " Andrew Morton
2025-10-15  5:38   ` Baolu Lu
2025-10-15 15:55     ` Dave Hansen
2025-10-17  1:42       ` Baolu Lu
2025-10-17 14:01         ` Jason Gunthorpe
2025-10-17 17:28           ` Dave Hansen [this message]
2025-10-17 17:31             ` Dave Hansen
2025-10-17 17:54               ` Jason Gunthorpe
2025-10-17 18:26             ` Vinicius Costa Gomes
2025-10-22  5:06               ` Baolu Lu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=c3eee56a-7fe3-454c-878f-cff37467fb7e@intel.com \
    --to=dave.hansen@intel.com \
    --cc=Liam.Howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=apopple@nvidia.com \
    --cc=baolu.lu@linux.intel.com \
    --cc=bp@alien8.de \
    --cc=dave.jiang@intel.com \
    --cc=david@redhat.com \
    --cc=iommu@lists.linux.dev \
    --cc=jannh@google.com \
    --cc=jean-philippe@linaro.org \
    --cc=jgg@nvidia.com \
    --cc=joro@8bytes.org \
    --cc=kevin.tian@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=luto@kernel.org \
    --cc=mhocko@kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=robin.murphy@arm.com \
    --cc=rppt@kernel.org \
    --cc=security@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=urezki@gmail.com \
    --cc=vasant.hegde@amd.com \
    --cc=vbabka@suse.cz \
    --cc=vinicius.gomes@intel.com \
    --cc=will@kernel.org \
    --cc=willy@infradead.org \
    --cc=x86@kernel.org \
    --cc=yi1.lai@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).