Re: [PATCH v11 07/20] gpu: nova-core: mm: Add TLB flush support

public inbox for linux-fbdev@vger.kernel.org
 help / color / mirror / Atom feed

From: Joel Fernandes <joelagnelf@nvidia.com>
To: Danilo Krummrich <dakr@kernel.org>
Cc: linux-kernel@vger.kernel.org, Miguel Ojeda <ojeda@kernel.org>,
	Boqun Feng <boqun@kernel.org>, Gary Guo <gary@garyguo.net>,
	Bjorn Roy Baron <bjorn3_gh@protonmail.com>,
	Benno Lossin <lossin@kernel.org>,
	Andreas Hindborg <a.hindborg@kernel.org>,
	Alice Ryhl <aliceryhl@google.com>,
	Trevor Gross <tmgross@umich.edu>,
	Dave Airlie <airlied@redhat.com>,
	Daniel Almeida <daniel.almeida@collabora.com>,
	Koen Koning <koen.koning@linux.intel.com>,
	dri-devel@lists.freedesktop.org, rust-for-linux@vger.kernel.org,
	Nikola Djukic <ndjukic@nvidia.com>,
	Maarten Lankhorst <maarten.lankhorst@linux.intel.com>,
	Maxime Ripard <mripard@kernel.org>,
	Thomas Zimmermann <tzimmermann@suse.de>,
	David Airlie <airlied@gmail.com>, Simona Vetter <simona@ffwll.ch>,
	Jonathan Corbet <corbet@lwn.net>,
	Alex Deucher <alexander.deucher@amd.com>,
	Christian Koenig <christian.koenig@amd.com>,
	Jani Nikula <jani.nikula@linux.intel.com>,
	Joonas Lahtinen <joonas.lahtinen@linux.intel.com>,
	Rodrigo Vivi <rodrigo.vivi@intel.com>,
	Tvrtko Ursulin <tursulin@ursulin.net>,
	Huang Rui <ray.huang@amd.com>,
	Matthew Auld <matthew.auld@intel.com>,
	Lucas De Marchi <lucas.demarchi@intel.com>,
	Thomas Hellstrom <thomas.hellstrom@linux.intel.com>,
	Helge Deller <deller@gmx.de>, Alex Gaynor <alex.gaynor@gmail.com>,
	Boqun Feng <boqun.feng@gmail.com>,
	John Hubbard <jhubbard@nvidia.com>,
	Alistair Popple <apopple@nvidia.com>,
	Timur Tabi <ttabi@nvidia.com>, Edwin Peer <epeer@nvidia.com>,
	Alexandre Courbot <acourbot@nvidia.com>,
	Andrea Righi <arighi@nvidia.com>,
	Andy Ritger <aritger@nvidia.com>, Zhi Wang <zhiw@nvidia.com>,
	Balbir Singh <balbirs@nvidia.com>,
	Philipp Stanner <phasta@kernel.org>,
	Elle Rhumsaa <elle@weathered-steel.dev>,
	alexeyi@nvidia.com, Eliot Courtney <ecourtney@nvidia.com>,
	joel@joelfernandes.org, linux-doc@vger.kernel.org,
	amd-gfx@lists.freedesktop.org, intel-gfx@lists.freedesktop.org,
	intel-xe@lists.freedesktop.org, linux-fbdev@vger.kernel.org
Subject: Re: [PATCH v11 07/20] gpu: nova-core: mm: Add TLB flush support
Date: Tue, 21 Apr 2026 13:23:00 -0400	[thread overview]
Message-ID: <20260421172300.GA2038908@joelbox2> (raw)
In-Reply-To: <9f30b572-04be-4adc-b5f0-a286ea601996@nvidia.com>

On Tue, Apr 21, 2026 at 09:47:39AM -0400, Joel Fernandes wrote:
> 
> 
> On 4/16/2026 6:53 PM, Danilo Krummrich wrote:
> > On Fri Apr 17, 2026 at 12:18 AM CEST, Joel Fernandes wrote:
> >> On 4/16/2026 5:45 PM, Danilo Krummrich wrote:
> >>> Why do we need the try_access() dance in the first place? I assume this ends up
> >>> being called from the BarAccess destructor?
> >>
> >> BarAccess is different. The try_access() calls here are in tlb.rs and
> >> pramin.rs for Bar0.
> > 
> > Yes, and we shouldn't need them in the first place; we should have a
> > &Device<Bound> in all call paths this is called from.

So it causes a bit more threading of the device, but agreed it is an improvement.
Here is a preview, let me know if this is not what you had in mind, thanks!

---8<-----------------------

diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
index 6ea9ab7647ced..c2756525dffad 100644
--- a/drivers/gpu/nova-core/gpu.rs
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -322,7 +322,7 @@ pub(crate) fn new<'a>(
                 // PRAMIN covers all physical VRAM (including GSP-reserved areas
                 // above the usable region, e.g. the BAR1 page directory).
                 let pramin_vram_region = 0..gsp_static_info.total_fb_end;
-                GpuMm::new(devres_bar.clone(), spec.chipset, GpuBuddyParams {
+                GpuMm::new(devres_bar.clone(), pdev.as_ref(), spec.chipset, GpuBuddyParams {
                     base_offset: usable_vram.start,
                     size: usable_vram.end - usable_vram.start,
                     chunk_size: Alignment::new::<SZ_4K>(),
diff --git a/drivers/gpu/nova-core/mm.rs b/drivers/gpu/nova-core/mm.rs
index 2583e32fb5dc1..1c0d076a785d5 100644
--- a/drivers/gpu/nova-core/mm.rs
+++ b/drivers/gpu/nova-core/mm.rs
@@ -32,6 +32,7 @@ fn from(pfn: Pfn) -> Self {
 
 use kernel::{
     bitfield,
+    device,
     devres::Devres,
     gpu::buddy::{
         GpuBuddy,
@@ -75,13 +76,14 @@ impl GpuMm {
     /// areas). PRAMIN window accesses are validated against this range.
     pub(crate) fn new(
         bar: Arc<Devres<Bar0>>,
+        dev: &device::Device<device::Bound>,
         chipset: Chipset,
         buddy_params: GpuBuddyParams,
         pramin_vram_region: core::ops::Range<u64>,
     ) -> Result<impl PinInit<Self>> {
         let buddy = GpuBuddy::new(buddy_params)?;
         let tlb_init = Tlb::new(bar.clone());
-        let pramin_init = pramin::Pramin::new(bar, chipset, pramin_vram_region)?;
+        let pramin_init = pramin::Pramin::new(bar, dev, chipset, pramin_vram_region)?;
 
         Ok(pin_init!(Self {
             buddy,
diff --git a/drivers/gpu/nova-core/mm/bar_user.rs b/drivers/gpu/nova-core/mm/bar_user.rs
index 086d33776c48a..172f9c0f5b4d8 100644
--- a/drivers/gpu/nova-core/mm/bar_user.rs
+++ b/drivers/gpu/nova-core/mm/bar_user.rs
@@ -4,6 +4,7 @@
 //! for GPU work submission, and applications to access GPU buffers via mmap().
 
 use kernel::{
+    device,
     io::Io,
     prelude::*, //
 };
@@ -45,6 +46,7 @@ pub(crate) fn new(pdb_addr: VramAddress, chipset: Chipset, va_size: u64) -> Resu
     /// Map physical pages to a contiguous BAR1 virtual range.
     pub(crate) fn map<'a>(
         &'a mut self,
+        dev: &'a device::Device<device::Bound>,
         mm: &'a GpuMm,
         bar: &'a Bar1,
         pfns: &[Pfn],
@@ -54,10 +56,11 @@ pub(crate) fn map<'a>(
             return Err(EINVAL);
         }
 
-        let mapped = self.vmm.map_pages(mm, pfns, None, writable)?;
+        let mapped = self.vmm.map_pages(dev, mm, pfns, None, writable)?;
 
         Ok(BarUserAccess {
             vmm: &mut self.vmm,
+            dev,
             mm,
             bar,
             mapped: Some(mapped),
@@ -72,6 +75,7 @@ pub(crate) fn map<'a>(
 /// [`Vmm::unmap_pages()`], which consumes it).
 pub(crate) struct BarUserAccess<'a> {
     vmm: &'a mut Vmm,
+    dev: &'a device::Device<device::Bound>,
     mm: &'a GpuMm,
     bar: &'a Bar1,
     /// Needs to be an `Option` so that we can `take()` it and call `Drop`
@@ -144,7 +148,7 @@ pub(crate) fn try_write64(&self, value: u64, offset: usize) -> Result {
 impl Drop for BarUserAccess<'_> {
     fn drop(&mut self) {
         if let Some(mapped) = self.mapped.take() {
-            if self.vmm.unmap_pages(self.mm, mapped).is_err() {
+            if self.vmm.unmap_pages(self.dev, self.mm, mapped).is_err() {
                 kernel::pr_warn_once!("BarUserAccess: unmap_pages failed.\n");
             }
         }
@@ -158,7 +162,7 @@ fn drop(&mut self) {
 /// and test pages as needed.
 #[cfg(CONFIG_NOVA_MM_SELFTESTS)]
 pub(crate) fn run_self_test(
-    dev: &kernel::device::Device,
+    pdev: &device::Device<device::Bound>,
     mm: &GpuMm,
     bar1: &Bar1,
     bar1_pdb: u64,
@@ -180,12 +184,13 @@ pub(crate) fn run_self_test(
     const PATTERN_PRAMIN: u32 = 0xDEAD_BEEF;
     const PATTERN_BAR1: u32 = 0xCAFE_BABE;
 
+    let dev = pdev.as_ref();
     dev_info!(dev, "MM: Starting self-test...\n");
 
     let pdb_addr = VramAddress::new(bar1_pdb);
 
     // Check if initial page tables are in VRAM.
-    if crate::mm::pagetable::check_pdb_valid(mm.pramin(), pdb_addr, chipset).is_err() {
+    if crate::mm::pagetable::check_pdb_valid(pdev, mm.pramin(), pdb_addr, chipset).is_err() {
         dev_info!(dev, "MM: Self-test SKIPPED - no valid VRAM page tables\n");
         return Ok(());
     }
@@ -208,7 +213,7 @@ pub(crate) fn run_self_test(
     let mut vmm = Vmm::new(pdb_addr, chipset.mmu_version(), SZ_64K.into_safe_cast())?;
 
     // Create a test mapping.
-    let mapped = vmm.map_pages(mm, &[test_pfn], None, true)?;
+    let mapped = vmm.map_pages(pdev, mm, &[test_pfn], None, true)?;
     let test_vfn = mapped.vfn_start;
 
     // Pre-compute test addresses for the PRAMIN to BAR1 read test.
@@ -219,7 +224,7 @@ pub(crate) fn run_self_test(
 
     // Test 1: Write via PRAMIN, read via BAR1.
     {
-        let mut window = mm.pramin().get_window()?;
+        let mut window = mm.pramin().get_window(pdev)?;
         window.try_write32(vram_read_addr, PATTERN_PRAMIN)?;
     }
 
@@ -239,19 +244,19 @@ pub(crate) fn run_self_test(
     };
 
     // Cleanup - invalidate PTE.
-    vmm.unmap_pages(mm, mapped)?;
+    vmm.unmap_pages(pdev, mm, mapped)?;
 
     // Test 2: Two-phase prepare/execute API.
-    let prepared = vmm.prepare_map(mm, 1, None)?;
-    let mapped2 = vmm.execute_map(mm, prepared, &[test_pfn], true)?;
-    let readback = vmm.read_mapping(mm, mapped2.vfn_start)?;
+    let prepared = vmm.prepare_map(pdev, mm, 1, None)?;
+    let mapped2 = vmm.execute_map(pdev, mm, prepared, &[test_pfn], true)?;
+    let readback = vmm.read_mapping(pdev, mm, mapped2.vfn_start)?;
     let test2_passed = if readback == Some(test_pfn) {
         true
     } else {
         dev_err!(dev, "MM: Test 2 FAILED - Two-phase map readback mismatch\n");
         false
     };
-    vmm.unmap_pages(mm, mapped2)?;
+    vmm.unmap_pages(pdev, mm, mapped2)?;
 
     // Test 3: Range-constrained allocation with a hole — exercises block.size()-driven
     // BAR1 mapping. A 4K hole is punched at base+16K, then a single 32K allocation
@@ -311,7 +316,7 @@ pub(crate) fn run_self_test(
             )?;
         }
 
-        let mapped = vmm.map_pages(mm, &pfns, None, true)?;
+        let mapped = vmm.map_pages(pdev, mm, &pfns, None, true)?;
         let bar1_base_vfn: usize = mapped.vfn_start.raw().into_safe_cast();
         let bar1_base = bar1_base_vfn.checked_mul(PAGE_SIZE).ok_or(EOVERFLOW)?;
 
@@ -326,7 +331,7 @@ pub(crate) fn run_self_test(
             bar1.try_write32(PATTERN_BAR1, page_bar1_off)?;
 
             let pramin_val = {
-                let mut window = mm.pramin().get_window()?;
+                let mut window = mm.pramin().get_window(pdev)?;
                 window.try_read32(page_phys.into_safe_cast())?
             };
 
@@ -342,7 +347,7 @@ pub(crate) fn run_self_test(
             }
         }
 
-        vmm.unmap_pages(mm, mapped)?;
+        vmm.unmap_pages(pdev, mm, mapped)?;
     }
 
     // Verify aggregate: all returned block sizes must sum to allocation size.
@@ -363,11 +368,11 @@ pub(crate) fn run_self_test(
     // Test 4: Exercise `BarUser::map()` end-to-end.
     let mut bar_user = BarUser::new(pdb_addr, chipset, SZ_64K.into_safe_cast())?;
     let test4_passed = {
-        let access = bar_user.map(mm, bar1, &[test_pfn], true)?;
+        let access = bar_user.map(pdev, mm, bar1, &[test_pfn], true)?;
 
         // Write pattern via PRAMIN, read via BarUserAccess.
         {
-            let mut window = mm.pramin().get_window()?;
+            let mut window = mm.pramin().get_window(pdev)?;
             window.try_write32(test_vram.raw(), PATTERN_BAR1)?;
         }
 
diff --git a/drivers/gpu/nova-core/mm/pagetable.rs b/drivers/gpu/nova-core/mm/pagetable.rs
index 922ff8bd4f0fd..b267dcf4dd8ba 100644
--- a/drivers/gpu/nova-core/mm/pagetable.rs
+++ b/drivers/gpu/nova-core/mm/pagetable.rs
@@ -22,7 +22,10 @@
     VirtualAddress,
     VramAddress, //
 };
-use kernel::prelude::*;
+use kernel::{
+    device,
+    prelude::*, //
+};
 
 /// Extracts the page table index at a given level from a virtual address.
 pub(super) trait VaLevelIndex {
@@ -386,10 +389,11 @@ fn from(val: AperturePde) -> Self {
 /// Check if the PDB has valid, VRAM-backed page tables.
 #[cfg(CONFIG_NOVA_MM_SELFTESTS)]
 fn check_pdb_inner<M: MmuConfig>(
+    dev: &device::Device<device::Bound>,
     pramin: &pramin::Pramin,
     pdb_addr: VramAddress,
 ) -> Result {
-    let mut window = pramin.get_window()?;
+    let mut window = pramin.get_window(dev)?;
     let raw = window.try_read64(pdb_addr.raw())?;
 
     if !M::Pde::new(raw).is_valid_vram() {
@@ -401,12 +405,13 @@ fn check_pdb_inner<M: MmuConfig>(
 /// Check if the PDB has valid, VRAM-backed page tables, dispatching by MMU version.
 #[cfg(CONFIG_NOVA_MM_SELFTESTS)]
 pub(super) fn check_pdb_valid(
+    dev: &device::Device<device::Bound>,
     pramin: &pramin::Pramin,
     pdb_addr: VramAddress,
     chipset: crate::gpu::Chipset,
 ) -> Result {
     match MmuVersion::from(chipset.arch()) {
-        MmuVersion::V2 => check_pdb_inner::<MmuV2>(pramin, pdb_addr),
-        MmuVersion::V3 => check_pdb_inner::<MmuV3>(pramin, pdb_addr),
+        MmuVersion::V2 => check_pdb_inner::<MmuV2>(dev, pramin, pdb_addr),
+        MmuVersion::V3 => check_pdb_inner::<MmuV3>(dev, pramin, pdb_addr),
     }
 }
diff --git a/drivers/gpu/nova-core/mm/pagetable/map.rs b/drivers/gpu/nova-core/mm/pagetable/map.rs
index a9719580143e1..16af491472dbc 100644
--- a/drivers/gpu/nova-core/mm/pagetable/map.rs
+++ b/drivers/gpu/nova-core/mm/pagetable/map.rs
@@ -5,6 +5,7 @@
 use core::marker::PhantomData;
 
 use kernel::{
+    device,
     gpu::buddy::{
         AllocatedBlocks,
         GpuBuddyAllocFlags,
@@ -73,7 +74,11 @@ pub(super) fn new(pdb_addr: VramAddress) -> Self {
     }
 
     /// Allocate and zero a physical page table page.
-    fn alloc_and_zero_page(mm: &GpuMm, level: PageTableLevel) -> Result<PreparedPtPage> {
+    fn alloc_and_zero_page(
+        dev: &device::Device<device::Bound>,
+        mm: &GpuMm,
+        level: PageTableLevel,
+    ) -> Result<PreparedPtPage> {
         let blocks = KBox::pin_init(
             mm.buddy().alloc_blocks(
                 GpuBuddyAllocMode::Simple,
@@ -87,7 +92,7 @@ fn alloc_and_zero_page(mm: &GpuMm, level: PageTableLevel) -> Result<PreparedPtPa
         let page_vram = VramAddress::new(blocks.iter().next().ok_or(ENOMEM)?.offset());
 
         // Zero via PRAMIN.
-        let mut window = mm.pramin().get_window()?;
+        let mut window = mm.pramin().get_window(dev)?;
         let base = page_vram.raw();
         for off in (0..PAGE_SIZE).step_by(8) {
             window.try_write64(base + off, 0)?;
@@ -106,6 +111,7 @@ fn alloc_and_zero_page(mm: &GpuMm, level: PageTableLevel) -> Result<PreparedPtPa
     /// the fence signalling critical path.
     fn ensure_single_pte_path(
         &self,
+        dev: &device::Device<device::Bound>,
         mm: &GpuMm,
         vfn: Vfn,
         pt_pages: &mut RBTree<VramAddress, PreparedPtPage>,
@@ -113,7 +119,7 @@ fn ensure_single_pte_path(
         let max_iter = 2 * M::PDE_LEVELS.len();
 
         for _ in 0..max_iter {
-            let mut window = mm.pramin().get_window()?;
+            let mut window = mm.pramin().get_window(dev)?;
 
             let result = self
                 .walker
@@ -133,7 +139,7 @@ fn ensure_single_pte_path(
                 } => {
                     // Drop PRAMIN before allocation.
                     drop(window);
-                    let page = Self::alloc_and_zero_page(mm, level)?;
+                    let page = Self::alloc_and_zero_page(dev, mm, level)?;
                     let node = RBTreeNode::new(install_addr, page, GFP_KERNEL)?;
                     let old = pt_pages.insert(node);
                     if old.is_some() {
@@ -160,6 +166,7 @@ fn ensure_single_pte_path(
     /// per-VFN to prepare pages for all missing PDEs.
     pub(super) fn prepare_map(
         &self,
+        dev: &device::Device<device::Bound>,
         mm: &GpuMm,
         vfn_start: Vfn,
         num_pages: usize,
@@ -175,7 +182,7 @@ pub(super) fn prepare_map(
         for i in 0..num_pages {
             let i_u64: u64 = i.into_safe_cast();
             let vfn = Vfn::new(vfn_start.raw() + i_u64);
-            self.ensure_single_pte_path(mm, vfn, pt_pages)?;
+            self.ensure_single_pte_path(dev, mm, vfn, pt_pages)?;
         }
         Ok(())
     }
@@ -185,6 +192,7 @@ pub(super) fn prepare_map(
     /// Drains `pt_pages` and moves allocations into `page_table_allocs`.
     pub(super) fn install_mappings(
         &self,
+        dev: &device::Device<device::Bound>,
         mm: &GpuMm,
         pt_pages: &mut RBTree<VramAddress, PreparedPtPage>,
         page_table_allocs: &mut KVec<Pin<KBox<AllocatedBlocks>>>,
@@ -192,7 +200,7 @@ pub(super) fn install_mappings(
         pfns: &[Pfn],
         writable: bool,
     ) -> Result {
-        let mut window = mm.pramin().get_window()?;
+        let mut window = mm.pramin().get_window(dev)?;
 
         // Drain prepared PT pages, install all pending PDEs.
         let mut cursor = pt_pages.cursor_front_mut();
@@ -239,14 +247,20 @@ pub(super) fn install_mappings(
         drop(window);
 
         // Flush TLB.
-        mm.tlb().flush(self.pdb_addr)
+        mm.tlb().flush(dev, self.pdb_addr)
     }
 
     /// Invalidate PTEs for a range and flush TLB.
-    pub(super) fn invalidate_ptes(&self, mm: &GpuMm, vfn_start: Vfn, num_pages: usize) -> Result {
+    pub(super) fn invalidate_ptes(
+        &self,
+        dev: &device::Device<device::Bound>,
+        mm: &GpuMm,
+        vfn_start: Vfn,
+        num_pages: usize,
+    ) -> Result {
         let invalid_pte = M::Pte::invalid();
 
-        let mut window = mm.pramin().get_window()?;
+        let mut window = mm.pramin().get_window(dev)?;
         for i in 0..num_pages {
             let i_u64: u64 = i.into_safe_cast();
             let vfn = Vfn::new(vfn_start.raw() + i_u64);
@@ -265,7 +279,7 @@ pub(super) fn invalidate_ptes(&self, mm: &GpuMm, vfn_start: Vfn, num_pages: usiz
         }
         drop(window);
 
-        mm.tlb().flush(self.pdb_addr)
+        mm.tlb().flush(dev, self.pdb_addr)
     }
 }
 
@@ -298,6 +312,7 @@ pub(in crate::mm) fn new(pdb_addr: VramAddress, version: MmuVersion) -> Self {
     /// Prepare page table resources for a mapping.
     pub(in crate::mm) fn prepare_map(
         &self,
+        dev: &device::Device<device::Bound>,
         mm: &GpuMm,
         vfn_start: Vfn,
         num_pages: usize,
@@ -306,13 +321,14 @@ pub(in crate::mm) fn prepare_map(
     ) -> Result {
         pt_map_dispatch!(
             self,
-            prepare_map(mm, vfn_start, num_pages, page_table_allocs, pt_pages)
+            prepare_map(dev, mm, vfn_start, num_pages, page_table_allocs, pt_pages)
         )
     }
 
     /// Install prepared PDEs and write PTEs, then flush TLB.
     pub(in crate::mm) fn install_mappings(
         &self,
+        dev: &device::Device<device::Bound>,
         mm: &GpuMm,
         pt_pages: &mut RBTree<VramAddress, PreparedPtPage>,
         page_table_allocs: &mut KVec<Pin<KBox<AllocatedBlocks>>>,
@@ -322,17 +338,18 @@ pub(in crate::mm) fn install_mappings(
     ) -> Result {
         pt_map_dispatch!(
             self,
-            install_mappings(mm, pt_pages, page_table_allocs, vfn_start, pfns, writable)
+            install_mappings(dev, mm, pt_pages, page_table_allocs, vfn_start, pfns, writable)
         )
     }
 
     /// Invalidate PTEs for a range and flush TLB.
     pub(in crate::mm) fn invalidate_ptes(
         &self,
+        dev: &device::Device<device::Bound>,
         mm: &GpuMm,
         vfn_start: Vfn,
         num_pages: usize,
     ) -> Result {
-        pt_map_dispatch!(self, invalidate_ptes(mm, vfn_start, num_pages))
+        pt_map_dispatch!(self, invalidate_ptes(dev, mm, vfn_start, num_pages))
     }
 }
diff --git a/drivers/gpu/nova-core/mm/pagetable/walk.rs b/drivers/gpu/nova-core/mm/pagetable/walk.rs
index 89d4426bcf144..fedb8b4f33e58 100644
--- a/drivers/gpu/nova-core/mm/pagetable/walk.rs
+++ b/drivers/gpu/nova-core/mm/pagetable/walk.rs
@@ -36,7 +36,10 @@
 
 use core::marker::PhantomData;
 
-use kernel::prelude::*;
+use kernel::{
+    device,
+    prelude::*, //
+};
 
 use super::{
     DualPdeOps,
@@ -168,8 +171,13 @@ pub(super) fn walk_pde_levels(
     /// Walk to PTE for lookup only (no allocation).
     ///
     /// Returns [`WalkResult::PageTableMissing`] if intermediate tables don't exist.
-    pub(super) fn walk_to_pte_lookup(&self, mm: &GpuMm, vfn: Vfn) -> Result<WalkResult> {
-        let mut window = mm.pramin().get_window()?;
+    pub(super) fn walk_to_pte_lookup(
+        &self,
+        dev: &device::Device<device::Bound>,
+        mm: &GpuMm,
+        vfn: Vfn,
+    ) -> Result<WalkResult> {
+        let mut window = mm.pramin().get_window(dev)?;
         self.walk_to_pte_lookup_with_window(&mut window, vfn)
     }
 
@@ -236,7 +244,12 @@ pub(in crate::mm) fn new(pdb_addr: VramAddress, version: MmuVersion) -> Self {
     }
 
     /// Walk to PTE for lookup.
-    pub(in crate::mm) fn walk_to_pte(&self, mm: &GpuMm, vfn: Vfn) -> Result<WalkResult> {
-        pt_walk_dispatch!(self, walk_to_pte_lookup(mm, vfn))
+    pub(in crate::mm) fn walk_to_pte(
+        &self,
+        dev: &device::Device<device::Bound>,
+        mm: &GpuMm,
+        vfn: Vfn,
+    ) -> Result<WalkResult> {
+        pt_walk_dispatch!(self, walk_to_pte_lookup(dev, mm, vfn))
     }
 }
diff --git a/drivers/gpu/nova-core/mm/pramin.rs b/drivers/gpu/nova-core/mm/pramin.rs
index f56d6c3d4e255..c16717a73ecba 100644
--- a/drivers/gpu/nova-core/mm/pramin.rs
+++ b/drivers/gpu/nova-core/mm/pramin.rs
@@ -75,11 +75,11 @@
 };
 
 use kernel::{
+    device,
     devres::Devres,
     io::Io,
     new_mutex,
     prelude::*,
-    revocable::RevocableGuard,
     sizes::{
         SZ_1M,
         SZ_64K, //
@@ -117,7 +117,7 @@ pub(crate) fn $name(&mut self, vram_offset: usize) -> Result<$ty> {
                 self.compute_window(vram_offset, ::core::mem::size_of::<$ty>())?;
 
             if let Some(base) = new_base {
-                regs::pramin_window_write_base(self.chipset.arch(), &self.bar, base)?;
+                regs::pramin_window_write_base(self.chipset.arch(), self.bar, base)?;
                 *self.state = base;
             }
             self.bar.$name(bar_offset)
@@ -134,7 +134,7 @@ pub(crate) fn $name(&mut self, vram_offset: usize, value: $ty) -> Result {
                 self.compute_window(vram_offset, ::core::mem::size_of::<$ty>())?;
 
             if let Some(base) = new_base {
-                regs::pramin_window_write_base(self.chipset.arch(), &self.bar, base)?;
+                regs::pramin_window_write_base(self.chipset.arch(), self.bar, base)?;
                 *self.state = base;
             }
             self.bar.$name(value, bar_offset)
@@ -169,11 +169,12 @@ impl Pramin {
     /// `vram_region` specifies the valid VRAM address range.
     pub(crate) fn new(
         bar: Arc<Devres<Bar0>>,
+        dev: &device::Device<device::Bound>,
         chipset: Chipset,
         vram_region: Range<u64>,
     ) -> Result<impl PinInit<Self>> {
-        let bar_access = bar.try_access().ok_or(ENODEV)?;
-        let current_base = regs::pramin_window_read_base(chipset.arch(), &bar_access);
+        let bar_access = bar.access(dev)?;
+        let current_base = regs::pramin_window_read_base(chipset.arch(), bar_access);
 
         Ok(pin_init!(Self {
             bar,
@@ -192,8 +193,11 @@ fn vram_region(&self) -> &Range<u64> {
     ///
     /// Returns a [`PraminWindow`] guard that provides VRAM read/write accessors.
     /// The [`PraminWindow`] is exclusive and only one can exist at a time.
-    pub(crate) fn get_window(&self) -> Result<PraminWindow<'_>> {
-        let bar = self.bar.try_access().ok_or(ENODEV)?;
+    pub(crate) fn get_window<'a>(
+        &'a self,
+        dev: &'a device::Device<device::Bound>,
+    ) -> Result<PraminWindow<'a>> {
+        let bar = self.bar.access(dev)?;
         let state = self.state.lock();
         Ok(PraminWindow {
             bar,
@@ -212,7 +216,7 @@ pub(crate) fn get_window(&self) -> Result<PraminWindow<'_>> {
 /// Only one [`PraminWindow`] can exist at a time per [`Pramin`] instance (enforced by the
 /// internal `MutexGuard`).
 pub(crate) struct PraminWindow<'a> {
-    bar: RevocableGuard<'a, Bar0>,
+    bar: &'a Bar0,
     chipset: Chipset,
     vram_region: Range<u64>,
     state: MutexGuard<'a, u64>,
@@ -433,14 +437,15 @@ fn test_misaligned_access(
 
 /// Run PRAMIN self-tests during boot if self-tests are enabled.
 #[cfg(CONFIG_NOVA_MM_SELFTESTS)]
-pub(crate) fn run_self_test(dev: &kernel::device::Device, pramin: &Pramin) -> Result {
+pub(crate) fn run_self_test(pdev: &device::Device<device::Bound>, pramin: &Pramin) -> Result {
+    let dev = pdev.as_ref();
     dev_info!(dev, "PRAMIN: Starting self-test...\n");
 
     let vram_region = pramin.vram_region();
     let base: usize = vram_region.start.into_safe_cast();
     let base = base + SELFTEST_REGION_OFFSET;
     let vram_end = vram_region.end;
-    let mut win = pramin.get_window()?;
+    let mut win = pramin.get_window(pdev)?;
 
     test_byte_readwrite(dev, &mut win, base)?;
     test_u32_as_bytes(dev, &mut win, base)?;
diff --git a/drivers/gpu/nova-core/mm/tlb.rs b/drivers/gpu/nova-core/mm/tlb.rs
index 8d36e1552792d..53c6fe6084b81 100644
--- a/drivers/gpu/nova-core/mm/tlb.rs
+++ b/drivers/gpu/nova-core/mm/tlb.rs
@@ -11,17 +11,22 @@
 //! ```ignore
 //! use crate::mm::tlb::Tlb;
 //!
-//! fn page_table_update(tlb: &Tlb, pdb_addr: VramAddress) -> Result<()> {
+//! fn page_table_update(
+//!     dev: &device::Device<device::Bound>,
+//!     tlb: &Tlb,
+//!     pdb_addr: VramAddress,
+//! ) -> Result<()> {
 //!     // ... modify page tables ...
 //!
 //!     // Flush TLB to make changes visible (polls for completion).
-//!     tlb.flush(pdb_addr)?;
+//!     tlb.flush(dev, pdb_addr)?;
 //!
 //!     Ok(())
 //! }
 //! ```
 
 use kernel::{
+    device,
     devres::Devres,
     io::poll::read_poll_timeout,
     io::Io,
@@ -92,39 +97,29 @@ pub(super) fn new(bar: Arc<Devres<Bar0>>) -> impl PinInit<Self> {
     /// This invalidates all TLB entries associated with the given PDB address.
     /// Must be called after modifying page table entries to ensure the GPU sees
     /// the updated mappings.
-    pub(super) fn flush(&self, pdb_addr: VramAddress) -> Result {
+    pub(super) fn flush(
+        &self,
+        dev: &device::Device<device::Bound>,
+        pdb_addr: VramAddress,
+    ) -> Result {
         let _guard = self.lock.lock();
+        let bar = self.bar.access(dev)?;
 
-        // Broken into 2 phases with scopes (Write and Poll) to avoid holding
-        // RevecablableGuard (and hence RCU read-side critical section) across
-        // the read_poll_timeout() call that can sleep.
+        // Write PDB address.
+        bar.write_reg(regs::NV_TLB_FLUSH_PDB_LO::from_pdb_addr(pdb_addr.raw_u64()));
+        bar.write_reg(regs::NV_TLB_FLUSH_PDB_HI::from_pdb_addr(pdb_addr.raw_u64()));
 
-        // Write phase — hold bar access briefly for register writes only.
-        {
-            let bar = self.bar.try_access().ok_or(ENODEV)?;
+        // Trigger flush.
+        bar.write_reg(
+            regs::NV_TLB_FLUSH_CTRL::zeroed()
+                .with_all_va(true)
+                .with_ack(TlbAckMode::None)
+                .with_trigger(true),
+        );
 
-            // Write PDB address.
-            bar.write_reg(regs::NV_TLB_FLUSH_PDB_LO::from_pdb_addr(pdb_addr.raw_u64()));
-            bar.write_reg(regs::NV_TLB_FLUSH_PDB_HI::from_pdb_addr(pdb_addr.raw_u64()));
-
-            // Trigger flush: invalidate all virtual addresses, require global
-            // acknowledgment from all engines before completion. See
-            // [`TlbAckMode::Globally`] for why this scope is used unconditionally.
-            bar.write_reg(
-                regs::NV_TLB_FLUSH_CTRL::zeroed()
-                    .with_all_va(true)
-                    .with_ack(TlbAckMode::None)
-                    .with_trigger(true),
-            );
-        }
-
-        // Poll for completion — re-acquire bar access each iteration to avoid
-        // holding the RCU read-side lock (via RevocableGuard) across sleep.
+        // Poll for completion.
         read_poll_timeout(
-            || {
-                let bar = self.bar.try_access().ok_or(ENODEV)?;
-                Ok(bar.read(regs::NV_TLB_FLUSH_CTRL))
-            },
+            || Ok(bar.read(regs::NV_TLB_FLUSH_CTRL)),
             |ctrl: &regs::NV_TLB_FLUSH_CTRL| !ctrl.trigger(),
             Delta::ZERO,
             Delta::from_secs(2),
diff --git a/drivers/gpu/nova-core/mm/vmm.rs b/drivers/gpu/nova-core/mm/vmm.rs
index 45da443211583..35caaed56007e 100644
--- a/drivers/gpu/nova-core/mm/vmm.rs
+++ b/drivers/gpu/nova-core/mm/vmm.rs
@@ -6,6 +6,7 @@
 //! virtual address spaces (Channels, BAR1, BAR2).
 
 use kernel::{
+    device,
     gpu::buddy::AllocatedBlocks,
     maple_tree::MapleTreeAlloc,
     prelude::*,
@@ -207,8 +208,13 @@ fn free_vfn(&self, vfn: Vfn) {
     }
 
     /// Read the [`Pfn`] for a mapped [`Vfn`] if one is mapped.
-    pub(super) fn read_mapping(&self, mm: &GpuMm, vfn: Vfn) -> Result<Option<Pfn>> {
-        match self.pt_walk.walk_to_pte(mm, vfn)? {
+    pub(super) fn read_mapping(
+        &self,
+        dev: &device::Device<device::Bound>,
+        mm: &GpuMm,
+        vfn: Vfn,
+    ) -> Result<Option<Pfn>> {
+        match self.pt_walk.walk_to_pte(dev, mm, vfn)? {
             WalkResult::Mapped { pfn, .. } => Ok(Some(pfn)),
             WalkResult::Unmapped { .. } | WalkResult::PageTableMissing => Ok(None),
         }
@@ -223,6 +229,7 @@ pub(super) fn read_mapping(&self, mm: &GpuMm, vfn: Vfn) -> Result<Option<Pfn>> {
     /// to call outside the fence signalling critical path.
     pub(crate) fn prepare_map(
         &mut self,
+        dev: &device::Device<device::Bound>,
         mm: &GpuMm,
         num_pages: usize,
         va_range: Option<Range<u64>>,
@@ -235,6 +242,7 @@ pub(crate) fn prepare_map(
         let vfn_start = self.alloc_vfn_range(num_pages, va_range)?;
 
         if let Err(e) = self.pt_map.prepare_map(
+            dev,
             mm,
             vfn_start,
             num_pages,
@@ -257,6 +265,7 @@ pub(crate) fn prepare_map(
     /// Installs all prepared PDEs and writes PTEs into the page table, then flushes TLB.
     pub(crate) fn execute_map(
         &mut self,
+        dev: &device::Device<device::Bound>,
         mm: &GpuMm,
         prepared: PreparedMapping,
         pfns: &[Pfn],
@@ -275,6 +284,7 @@ pub(crate) fn execute_map(
         _drop_guard.disarm();
 
         if let Err(e) = self.pt_map.install_mappings(
+            dev,
             mm,
             &mut self.pt_pages,
             &mut self.page_table_allocs,
@@ -300,6 +310,7 @@ pub(crate) fn execute_map(
     /// [`Vmm::execute_map()`] will be called separately.
     pub(crate) fn map_pages(
         &mut self,
+        dev: &device::Device<device::Bound>,
         mm: &GpuMm,
         pfns: &[Pfn],
         va_range: Option<Range<u64>>,
@@ -322,15 +333,20 @@ pub(crate) fn map_pages(
             }
         }
 
-        let prepared = self.prepare_map(mm, pfns.len(), va_range)?;
-        self.execute_map(mm, prepared, pfns, writable)
+        let prepared = self.prepare_map(dev, mm, pfns.len(), va_range)?;
+        self.execute_map(dev, mm, prepared, pfns, writable)
     }
 
     /// Unmap all pages in a [`MappedRange`] with a single TLB flush.
-    pub(crate) fn unmap_pages(&mut self, mm: &GpuMm, range: MappedRange) -> Result {
+    pub(crate) fn unmap_pages(
+        &mut self,
+        dev: &device::Device<device::Bound>,
+        mm: &GpuMm,
+        range: MappedRange,
+    ) -> Result {
         let result = self
             .pt_map
-            .invalidate_ptes(mm, range.vfn_start, range.num_pages);
+            .invalidate_ptes(dev, mm, range.vfn_start, range.num_pages);
 
         // TODO: Internal page table pages (PDE, PTE pages) are still kept around.
         // This is by design as repeated maps/unmaps will be fast. As a future TODO,
-- 
2.34.1

next prev parent reply	other threads:[~2026-04-21 17:23 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-15 21:05 [PATCH v11 01/20] gpu: nova-core: gsp: Return GspStaticInfo from boot() Joel Fernandes
2026-04-15 21:05 ` [PATCH v11 02/20] gpu: nova-core: gsp: Extract usable FB region from GSP Joel Fernandes
2026-04-16 23:04   ` John Hubbard
2026-04-16 23:26   ` John Hubbard
2026-04-21 14:55     ` Joel Fernandes
2026-04-21 20:05       ` David Airlie
2026-04-21 21:41         ` John Hubbard
2026-04-15 21:05 ` [PATCH v11 03/20] gpu: nova-core: gsp: Expose total physical VRAM end from FB region info Joel Fernandes
2026-04-15 21:05 ` [PATCH v11 04/20] gpu: nova-core: mm: Add support to use PRAMIN windows to write to VRAM Joel Fernandes
2026-04-15 21:05 ` [PATCH v11 05/20] docs: gpu: nova-core: Document the PRAMIN aperture mechanism Joel Fernandes
2026-04-15 21:05 ` [PATCH v11 06/20] gpu: nova-core: mm: Add common memory management types Joel Fernandes
2026-04-15 21:05 ` [PATCH v11 07/20] gpu: nova-core: mm: Add TLB flush support Joel Fernandes
2026-04-16 21:23   ` Joel Fernandes
2026-04-16 21:45     ` Danilo Krummrich
2026-04-16 22:18       ` Joel Fernandes
2026-04-16 22:53         ` Danilo Krummrich
2026-04-21 13:47           ` Joel Fernandes
2026-04-21 17:23             ` Joel Fernandes [this message]
2026-04-21 14:28           ` Joel Fernandes
2026-04-15 21:05 ` [PATCH v11 08/20] gpu: nova-core: mm: Add GpuMm centralized memory manager Joel Fernandes
2026-04-15 21:05 ` [PATCH v11 09/20] gpu: nova-core: mm: Add common types for all page table formats Joel Fernandes
2026-04-15 21:05 ` [PATCH v11 10/20] gpu: nova-core: mm: Add MMU v2 page table types Joel Fernandes
2026-04-15 21:05 ` [PATCH v11 11/20] gpu: nova-core: mm: Add MMU v3 " Joel Fernandes
2026-04-15 21:05 ` [PATCH v11 12/20] gpu: nova-core: mm: Add unified page table entry wrapper enums Joel Fernandes
2026-04-15 21:05 ` [PATCH v11 13/20] gpu: nova-core: mm: Add page table walker for MMU v2/v3 Joel Fernandes
2026-04-15 21:05 ` [PATCH v11 14/20] gpu: nova-core: mm: Add Virtual Memory Manager Joel Fernandes
2026-04-15 21:05 ` [PATCH v11 15/20] gpu: nova-core: mm: Add virtual address range tracking to VMM Joel Fernandes
2026-04-15 21:05 ` [PATCH v11 16/20] gpu: nova-core: mm: Add multi-page mapping API " Joel Fernandes
2026-04-15 21:05 ` [PATCH v11 17/20] gpu: nova-core: Add BAR1 aperture type and size constant Joel Fernandes
2026-04-15 21:05 ` [PATCH v11 18/20] gpu: nova-core: mm: Add BAR1 user interface Joel Fernandes
2026-04-15 21:05 ` [PATCH v11 19/20] gpu: nova-core: mm: Add BAR1 memory management self-tests Joel Fernandes
2026-04-15 21:05 ` [PATCH v11 20/20] gpu: nova-core: mm: Add PRAMIN aperture self-tests Joel Fernandes
2026-04-15 21:05 ` [PATCH v11 00/21] gpu: nova-core: Add memory management support Joel Fernandes
2026-04-16 22:57 ` [PATCH v11 01/20] gpu: nova-core: gsp: Return GspStaticInfo from boot() John Hubbard

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:6ea9ab7647ce dfblob:c2756525dffa dfblob:2583e32fb5dc
dfblob:1c0d076a785d dfblob:086d33776c48 dfblob:172f9c0f5b4d
dfblob:922ff8bd4f0f dfblob:b267dcf4dd8b dfblob:a9719580143e
dfblob:16af491472db dfblob:89d4426bcf14 dfblob:fedb8b4f33e5
dfblob:f56d6c3d4e25 dfblob:c16717a73ecb dfblob:8d36e1552792
dfblob:53c6fe6084b8 dfblob:45da44321158 dfblob:35caaed56007 )
 OR (
bs:"Re: [PATCH v11 07/20] gpu: nova-core: mm: Add TLB flush support" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260421172300.GA2038908@joelbox2 \
    --to=joelagnelf@nvidia.com \
    --cc=a.hindborg@kernel.org \
    --cc=acourbot@nvidia.com \
    --cc=airlied@gmail.com \
    --cc=airlied@redhat.com \
    --cc=alex.gaynor@gmail.com \
    --cc=alexander.deucher@amd.com \
    --cc=alexeyi@nvidia.com \
    --cc=aliceryhl@google.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=apopple@nvidia.com \
    --cc=arighi@nvidia.com \
    --cc=aritger@nvidia.com \
    --cc=balbirs@nvidia.com \
    --cc=bjorn3_gh@protonmail.com \
    --cc=boqun.feng@gmail.com \
    --cc=boqun@kernel.org \
    --cc=christian.koenig@amd.com \
    --cc=corbet@lwn.net \
    --cc=dakr@kernel.org \
    --cc=daniel.almeida@collabora.com \
    --cc=deller@gmx.de \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=ecourtney@nvidia.com \
    --cc=elle@weathered-steel.dev \
    --cc=epeer@nvidia.com \
    --cc=gary@garyguo.net \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=jani.nikula@linux.intel.com \
    --cc=jhubbard@nvidia.com \
    --cc=joel@joelfernandes.org \
    --cc=joonas.lahtinen@linux.intel.com \
    --cc=koen.koning@linux.intel.com \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-fbdev@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lossin@kernel.org \
    --cc=lucas.demarchi@intel.com \
    --cc=maarten.lankhorst@linux.intel.com \
    --cc=matthew.auld@intel.com \
    --cc=mripard@kernel.org \
    --cc=ndjukic@nvidia.com \
    --cc=ojeda@kernel.org \
    --cc=phasta@kernel.org \
    --cc=ray.huang@amd.com \
    --cc=rodrigo.vivi@intel.com \
    --cc=rust-for-linux@vger.kernel.org \
    --cc=simona@ffwll.ch \
    --cc=thomas.hellstrom@linux.intel.com \
    --cc=tmgross@umich.edu \
    --cc=ttabi@nvidia.com \
    --cc=tursulin@ursulin.net \
    --cc=tzimmermann@suse.de \
    --cc=zhiw@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox