Re: [PATCH v11 06/22] gpu: nova-core: Blackwell: use correct sysmem flush registers

Rust for Linux List
 help / color / mirror / Atom feed

From: "Eliot Courtney" <ecourtney@nvidia.com>
To: "John Hubbard" <jhubbard@nvidia.com>,
	"Danilo Krummrich" <dakr@kernel.org>,
	"Alexandre Courbot" <acourbot@nvidia.com>
Cc: "Joel Fernandes" <joelagnelf@nvidia.com>,
	"Timur Tabi" <ttabi@nvidia.com>,
	"Alistair Popple" <apopple@nvidia.com>,
	"Eliot Courtney" <ecourtney@nvidia.com>,
	"Shashank Sharma" <shashanks@nvidia.com>,
	"Zhi Wang" <zhiw@nvidia.com>, "David Airlie" <airlied@gmail.com>,
	"Simona Vetter" <simona@ffwll.ch>,
	"Bjorn Helgaas" <bhelgaas@google.com>,
	"Miguel Ojeda" <ojeda@kernel.org>,
	"Alex Gaynor" <alex.gaynor@gmail.com>,
	"Boqun Feng" <boqun.feng@gmail.com>,
	"Gary Guo" <gary@garyguo.net>,
	"Björn Roy Baron" <bjorn3_gh@protonmail.com>,
	"Benno Lossin" <lossin@kernel.org>,
	"Andreas Hindborg" <a.hindborg@kernel.org>,
	"Alice Ryhl" <aliceryhl@google.com>,
	"Trevor Gross" <tmgross@umich.edu>,
	rust-for-linux@vger.kernel.org,
	LKML <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH v11 06/22] gpu: nova-core: Blackwell: use correct sysmem flush registers
Date: Mon, 01 Jun 2026 16:33:50 +0900	[thread overview]
Message-ID: <DIXJEO0ILZVH.2BOQ4HKC3396B@nvidia.com> (raw)
In-Reply-To: <20260530030953.740561-7-jhubbard@nvidia.com>

On Sat May 30, 2026 at 12:09 PM JST, John Hubbard wrote:
> Blackwell GPUs moved the sysmem flush page registers away from the
> Ampere/Ada location. GB10x routes the flush through a pair of HSHUB0
> register sets (primary and egress) that must both be programmed to
> the same address. GB20x routes it through FBHUB0.
>
> Implement these paths in the GB10x and GB20x framebuffer HALs.
>
> Signed-off-by: John Hubbard <jhubbard@nvidia.com>
> ---
>  drivers/gpu/nova-core/fb/hal/gb100.rs | 46 +++++++++++++++++++++++++--
>  drivers/gpu/nova-core/fb/hal/gb202.rs | 40 +++++++++++++++++++++--
>  drivers/gpu/nova-core/regs.rs         | 37 +++++++++++++++++++++
>  3 files changed, 117 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/nova-core/fb/hal/gb100.rs b/drivers/gpu/nova-core/fb/hal/gb100.rs
> index 8d63350abf8a..70f4c11b1e77 100644
> --- a/drivers/gpu/nova-core/fb/hal/gb100.rs
> +++ b/drivers/gpu/nova-core/fb/hal/gb100.rs
> @@ -4,6 +4,8 @@
>  //! Blackwell GB10x framebuffer HAL.
>  
>  use kernel::{
> +    io::Io,
> +    num::Bounded,
>      prelude::*,
>      ptr::{
>          const_align_up,
> @@ -15,11 +17,45 @@
>  use crate::{
>      driver::Bar0,
>      fb::hal::FbHal,
> -    num::usize_into_u32, //
> +    num::usize_into_u32,
> +    regs, //
>  };
>  
>  struct Gb100;
>  
> +fn read_sysmem_flush_page_gb100(bar: &Bar0) -> u64 {
> +    let lo = u64::from(
> +        bar.read(regs::NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO)
> +            .adr(),
> +    );
> +    let hi = u64::from(
> +        bar.read(regs::NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI)
> +            .adr(),
> +    );
> +
> +    lo | (hi << 32)
> +}
> +
> +/// Write the sysmem flush page address through the GB10x HSHUB0 registers.
> +///
> +/// Both the primary and EG (egress) register pairs must be programmed to the same address,
> +/// as required by hardware.
> +fn write_sysmem_flush_page_gb100(bar: &Bar0, addr: Bounded<u64, 52>) {
> +    // CAST: lower 32 bits. Hardware ignores bits 7:0.
> +    let addr_lo = *addr as u32;
> +    let addr_hi = addr.shr::<32, 20>().cast::<u32>();
> +
> +    // Write HI first. The hardware will trigger the flush on the LO write.
> +
> +    // Primary HSHUB pair.
> +    bar.write_reg(regs::NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI::zeroed().with_adr(addr_hi));
> +    bar.write_reg(regs::NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO::zeroed().with_adr(addr_lo));
> +
> +    // EG (egress) pair -- must match the primary pair.
> +    bar.write_reg(regs::NV_PFB_HSHUB0_EG_PCIE_FLUSH_SYSMEM_ADDR_HI::zeroed().with_adr(addr_hi));
> +    bar.write_reg(regs::NV_PFB_HSHUB0_EG_PCIE_FLUSH_SYSMEM_ADDR_LO::zeroed().with_adr(addr_lo));
> +}
> +
>  pub(super) const fn pmu_reserved_size_gb100() -> u32 {
>      usize_into_u32::<{ const_align_up(SZ_8M + SZ_16M + SZ_4K, Alignment::new::<SZ_128K>()).unwrap() }>(
>      )
> @@ -27,11 +63,15 @@ pub(super) const fn pmu_reserved_size_gb100() -> u32 {
>  
>  impl FbHal for Gb100 {
>      fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 {
> -        super::ga100::read_sysmem_flush_page_ga100(bar)
> +        read_sysmem_flush_page_gb100(bar)
>      }
>  
>      fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result {
> -        super::ga100::write_sysmem_flush_page_ga100(bar, addr);
> +        let addr: Bounded<u64, 52> = Bounded::<u64, 64>::from(addr)
> +            .try_shrink::<52>()
> +            .ok_or(EINVAL)?;

Maybe more simply written:
`let addr = Bounded::<u64, 52>::try_new(addr).ok_or(EINVAL)?;`

> +
> +        write_sysmem_flush_page_gb100(bar, addr);
>  
>          Ok(())
>      }
> diff --git a/drivers/gpu/nova-core/fb/hal/gb202.rs b/drivers/gpu/nova-core/fb/hal/gb202.rs
> index 542c1d7429e9..5a6b815eec3d 100644
> --- a/drivers/gpu/nova-core/fb/hal/gb202.rs
> +++ b/drivers/gpu/nova-core/fb/hal/gb202.rs
> @@ -4,24 +4,58 @@
>  //! Blackwell GB20x framebuffer HAL.
>  
>  use kernel::{
> +    io::Io,
> +    num::Bounded,
>      prelude::*,
>      sizes::SizeConstants, //
>  };
>  
>  use crate::{
>      driver::Bar0,
> -    fb::hal::FbHal, //
> +    fb::hal::FbHal,
> +    regs, //
>  };
>  
>  struct Gb202;
>  
> +fn read_sysmem_flush_page_gb202(bar: &Bar0) -> u64 {
> +    let lo = u64::from(
> +        bar.read(regs::NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO)
> +            .adr(),
> +    );
> +    let hi = u64::from(
> +        bar.read(regs::NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI)
> +            .adr(),
> +    );
> +
> +    lo | (hi << 32)
> +}
> +
> +/// Write the sysmem flush page address through the GB20x FBHUB0 registers.
> +fn write_sysmem_flush_page_gb202(bar: &Bar0, addr: Bounded<u64, 52>) {
> +    // Write HI first. The hardware will trigger the flush on the LO write.
> +    bar.write_reg(
> +        regs::NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI::zeroed()
> +            .with_adr(addr.shr::<32, 20>().cast::<u32>()),
> +    );
> +    bar.write_reg(
> +        regs::NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO::zeroed()
> +            // CAST: lower 32 bits. Hardware ignores bits 7:0.
> +            .with_adr(*addr as u32),
> +    );
> +}
> +
>  impl FbHal for Gb202 {
>      fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 {
> -        super::ga100::read_sysmem_flush_page_ga100(bar)
> +        read_sysmem_flush_page_gb202(bar)
>      }
>  
>      fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result {
> -        super::ga100::write_sysmem_flush_page_ga100(bar, addr);
> +        let addr: Bounded<u64, 52> = Bounded::<u64, 64>::from(addr)
> +            .try_shrink::<52>()
> +            .ok_or(EINVAL)?;

Same here.

> +
> +        write_sysmem_flush_page_gb202(bar, addr);
>  
>          Ok(())
>      }
> diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
> index 356fbf364ea5..65be6ec71ed4 100644
> --- a/drivers/gpu/nova-core/regs.rs
> +++ b/drivers/gpu/nova-core/regs.rs
> @@ -1,4 +1,5 @@
>  // SPDX-License-Identifier: GPL-2.0
> +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
>  
>  use kernel::{
>      io::{
> @@ -145,6 +146,42 @@ fn fmt(&self, f: &mut kernel::fmt::Formatter<'_>) -> kernel::fmt::Result {
>          /// Bits 12..40 of the higher (exclusive) bound of the WPR2 region.
>          31:4    hi_val;
>      }
> +
> +    // Blackwell GB10x sysmem flush registers (HSHUB0).
> +    //
> +    // GB10x GPUs use two pairs of HSHUB registers for sysmembar: a primary pair and an EG
> +    // (egress) pair. Both must be programmed to the same address. Hardware ignores bits 7:0
> +    // of each LO register. HSHUB0 base is 0x00891000.
> +
> +    pub(crate) NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO(u32) @ 0x00891e50 {
> +        31:0    adr => u32;
> +    }
> +
> +    pub(crate) NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI(u32) @ 0x00891e54 {
> +        19:0    adr;
> +    }
> +
> +    pub(crate) NV_PFB_HSHUB0_EG_PCIE_FLUSH_SYSMEM_ADDR_LO(u32) @ 0x008916c0 {
> +        31:0    adr => u32;
> +    }
> +
> +    pub(crate) NV_PFB_HSHUB0_EG_PCIE_FLUSH_SYSMEM_ADDR_HI(u32) @ 0x008916c4 {
> +        19:0    adr;
> +    }
> +
> +    // Blackwell GB20x sysmem flush registers (FBHUB0).
> +    //
> +    // Unlike the older NV_PFB_NISO_FLUSH_SYSMEM_ADDR registers which encode the address with an
> +    // 8-bit right-shift, these registers take the raw address split into lower/upper 32-bit halves.
> +    // The hardware ignores bits 7:0 of the LO register.
> +
> +    pub(crate) NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO(u32) @ 0x008a1d58 {
> +        31:0    adr => u32;
> +    }
> +
> +    pub(crate) NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI(u32) @ 0x008a1d5c {
> +        19:0    adr;
> +    }
>  }

May be nice to move these to the place (HAL) they are used if they
aren't used anywhere else (and reduce visibility). I am also curious
about where 0x00891000 comes from.

next prev parent reply	other threads:[~2026-06-01  7:34 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-30  3:09 [PATCH v11 00/22] gpu: nova-core: firmware: Hopper/Blackwell support John Hubbard
2026-05-30  3:09 ` [PATCH v11 01/22] gpu: nova-core: set DMA mask width based on GPU architecture John Hubbard
2026-06-01  4:01   ` Eliot Courtney
2026-05-30  3:09 ` [PATCH v11 02/22] gpu: nova-core: Hopper/Blackwell: new location for PCI config mirror John Hubbard
2026-06-01  4:04   ` Eliot Courtney
2026-05-30  3:09 ` [PATCH v11 03/22] gpu: nova-core: Blackwell: compute PMU-reserved framebuffer size John Hubbard
2026-06-01  2:07   ` Alexandre Courbot
2026-06-01  5:34     ` Alexandre Courbot
2026-06-01 18:01       ` John Hubbard
2026-06-01  4:41   ` Eliot Courtney
2026-05-30  3:09 ` [PATCH v11 04/22] gpu: nova-core: Hopper/Blackwell: larger non-WPR heap John Hubbard
2026-06-01  2:24   ` Alexandre Courbot
2026-06-01 18:03     ` John Hubbard
2026-06-01  5:01   ` Eliot Courtney
2026-05-30  3:09 ` [PATCH v11 05/22] gpu: nova-core: Hopper/Blackwell: larger WPR2 (GSP) heap John Hubbard
2026-06-01  5:21   ` Eliot Courtney
2026-05-30  3:09 ` [PATCH v11 06/22] gpu: nova-core: Blackwell: use correct sysmem flush registers John Hubbard
2026-06-01  7:01   ` Alexandre Courbot
2026-06-01 18:16     ` John Hubbard
2026-06-01  7:33   ` Eliot Courtney [this message]
2026-06-01 13:13     ` Alexandre Courbot
2026-06-01 18:09       ` John Hubbard
2026-05-30  3:09 ` [PATCH v11 07/22] gpu: nova-core: don't assume 64-bit firmware images John Hubbard
2026-06-01  6:36   ` Eliot Courtney
2026-05-30  3:09 ` [PATCH v11 08/22] gpu: nova-core: add support for 32-bit " John Hubbard
2026-06-01  6:37   ` Eliot Courtney
2026-05-30  3:09 ` [PATCH v11 09/22] gpu: nova-core: add auto-detection of 32-bit, 64-bit " John Hubbard
2026-06-01  6:49   ` Eliot Courtney
2026-05-30  3:09 ` [PATCH v11 10/22] gpu: nova-core: Hopper/Blackwell: add FSP falcon engine stub John Hubbard
2026-06-01  7:47   ` Eliot Courtney
2026-06-01 16:10   ` Timur Tabi
2026-06-01 18:17     ` John Hubbard
2026-05-30  3:09 ` [PATCH v11 11/22] gpu: nova-core: Hopper/Blackwell: add FMC firmware image John Hubbard
2026-06-01  8:38   ` Eliot Courtney
2026-05-30  3:09 ` [PATCH v11 12/22] gpu: nova-core: Hopper/Blackwell: add FSP secure boot completion waiting John Hubbard
2026-06-01  7:48   ` Alexandre Courbot
2026-06-01  8:32     ` Eliot Courtney
2026-06-01 13:07       ` Alexandre Courbot
2026-06-01 18:18         ` John Hubbard
2026-05-30  3:09 ` [PATCH v11 13/22] gpu: nova-core: Hopper/Blackwell: add FMC signature extraction John Hubbard
2026-06-01  8:55   ` Eliot Courtney
2026-06-01 14:45   ` Alexandre Courbot
2026-06-01 14:49     ` Alexandre Courbot
2026-06-01 18:21       ` John Hubbard
2026-05-30  3:09 ` [PATCH v11 14/22] gpu: nova-core: Hopper/Blackwell: add FSP falcon EMEM operations John Hubbard
2026-05-30  3:09 ` [PATCH v11 15/22] gpu: nova-core: Hopper/Blackwell: add FSP message infrastructure John Hubbard
2026-05-30  3:09 ` [PATCH v11 16/22] gpu: nova-core: add MCTP/NVDM protocol types for firmware communication John Hubbard
2026-05-30  3:09 ` [PATCH v11 17/22] gpu: nova-core: Hopper/Blackwell: add FSP send/receive messaging John Hubbard
2026-05-30  3:09 ` [PATCH v11 18/22] gpu: nova-core: Hopper/Blackwell: add FspCotVersion type John Hubbard
2026-06-01 14:07   ` Alexandre Courbot
2026-06-01 18:23     ` John Hubbard
2026-05-30  3:09 ` [PATCH v11 19/22] gpu: nova-core: Hopper/Blackwell: add FSP Chain of Trust boot John Hubbard
2026-05-30  3:09 ` [PATCH v11 20/22] gpu: nova-core: Hopper/Blackwell: add GSP lockdown release polling John Hubbard
2026-05-30  3:09 ` [PATCH v11 21/22] gpu: nova-core: add non-sec2 unload path John Hubbard
2026-05-30  3:09 ` [PATCH v11 22/22] gpu: nova-core: gsp: enable FSP boot path John Hubbard
2026-05-30  3:21 ` [PATCH v11 00/22] gpu: nova-core: firmware: Hopper/Blackwell support John Hubbard

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=DIXJEO0ILZVH.2BOQ4HKC3396B@nvidia.com \
    --to=ecourtney@nvidia.com \
    --cc=a.hindborg@kernel.org \
    --cc=acourbot@nvidia.com \
    --cc=airlied@gmail.com \
    --cc=alex.gaynor@gmail.com \
    --cc=aliceryhl@google.com \
    --cc=apopple@nvidia.com \
    --cc=bhelgaas@google.com \
    --cc=bjorn3_gh@protonmail.com \
    --cc=boqun.feng@gmail.com \
    --cc=dakr@kernel.org \
    --cc=gary@garyguo.net \
    --cc=jhubbard@nvidia.com \
    --cc=joelagnelf@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lossin@kernel.org \
    --cc=ojeda@kernel.org \
    --cc=rust-for-linux@vger.kernel.org \
    --cc=shashanks@nvidia.com \
    --cc=simona@ffwll.ch \
    --cc=tmgross@umich.edu \
    --cc=ttabi@nvidia.com \
    --cc=zhiw@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox