From: "Eliot Courtney" <ecourtney@nvidia.com>
To: "John Hubbard" <jhubbard@nvidia.com>,
"Danilo Krummrich" <dakr@kernel.org>,
"Alexandre Courbot" <acourbot@nvidia.com>
Cc: "Joel Fernandes" <joelagnelf@nvidia.com>,
"Timur Tabi" <ttabi@nvidia.com>,
"Alistair Popple" <apopple@nvidia.com>,
"Eliot Courtney" <ecourtney@nvidia.com>,
"Shashank Sharma" <shashanks@nvidia.com>,
"Zhi Wang" <zhiw@nvidia.com>, "David Airlie" <airlied@gmail.com>,
"Simona Vetter" <simona@ffwll.ch>,
"Bjorn Helgaas" <bhelgaas@google.com>,
"Miguel Ojeda" <ojeda@kernel.org>,
"Alex Gaynor" <alex.gaynor@gmail.com>,
"Boqun Feng" <boqun.feng@gmail.com>,
"Gary Guo" <gary@garyguo.net>,
"Björn Roy Baron" <bjorn3_gh@protonmail.com>,
"Benno Lossin" <lossin@kernel.org>,
"Andreas Hindborg" <a.hindborg@kernel.org>,
"Alice Ryhl" <aliceryhl@google.com>,
"Trevor Gross" <tmgross@umich.edu>,
rust-for-linux@vger.kernel.org,
LKML <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH v11 06/22] gpu: nova-core: Blackwell: use correct sysmem flush registers
Date: Mon, 01 Jun 2026 16:33:50 +0900 [thread overview]
Message-ID: <DIXJEO0ILZVH.2BOQ4HKC3396B@nvidia.com> (raw)
In-Reply-To: <20260530030953.740561-7-jhubbard@nvidia.com>
On Sat May 30, 2026 at 12:09 PM JST, John Hubbard wrote:
> Blackwell GPUs moved the sysmem flush page registers away from the
> Ampere/Ada location. GB10x routes the flush through a pair of HSHUB0
> register sets (primary and egress) that must both be programmed to
> the same address. GB20x routes it through FBHUB0.
>
> Implement these paths in the GB10x and GB20x framebuffer HALs.
>
> Signed-off-by: John Hubbard <jhubbard@nvidia.com>
> ---
> drivers/gpu/nova-core/fb/hal/gb100.rs | 46 +++++++++++++++++++++++++--
> drivers/gpu/nova-core/fb/hal/gb202.rs | 40 +++++++++++++++++++++--
> drivers/gpu/nova-core/regs.rs | 37 +++++++++++++++++++++
> 3 files changed, 117 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/nova-core/fb/hal/gb100.rs b/drivers/gpu/nova-core/fb/hal/gb100.rs
> index 8d63350abf8a..70f4c11b1e77 100644
> --- a/drivers/gpu/nova-core/fb/hal/gb100.rs
> +++ b/drivers/gpu/nova-core/fb/hal/gb100.rs
> @@ -4,6 +4,8 @@
> //! Blackwell GB10x framebuffer HAL.
>
> use kernel::{
> + io::Io,
> + num::Bounded,
> prelude::*,
> ptr::{
> const_align_up,
> @@ -15,11 +17,45 @@
> use crate::{
> driver::Bar0,
> fb::hal::FbHal,
> - num::usize_into_u32, //
> + num::usize_into_u32,
> + regs, //
> };
>
> struct Gb100;
>
> +fn read_sysmem_flush_page_gb100(bar: &Bar0) -> u64 {
> + let lo = u64::from(
> + bar.read(regs::NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO)
> + .adr(),
> + );
> + let hi = u64::from(
> + bar.read(regs::NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI)
> + .adr(),
> + );
> +
> + lo | (hi << 32)
> +}
> +
> +/// Write the sysmem flush page address through the GB10x HSHUB0 registers.
> +///
> +/// Both the primary and EG (egress) register pairs must be programmed to the same address,
> +/// as required by hardware.
> +fn write_sysmem_flush_page_gb100(bar: &Bar0, addr: Bounded<u64, 52>) {
> + // CAST: lower 32 bits. Hardware ignores bits 7:0.
> + let addr_lo = *addr as u32;
> + let addr_hi = addr.shr::<32, 20>().cast::<u32>();
> +
> + // Write HI first. The hardware will trigger the flush on the LO write.
> +
> + // Primary HSHUB pair.
> + bar.write_reg(regs::NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI::zeroed().with_adr(addr_hi));
> + bar.write_reg(regs::NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO::zeroed().with_adr(addr_lo));
> +
> + // EG (egress) pair -- must match the primary pair.
> + bar.write_reg(regs::NV_PFB_HSHUB0_EG_PCIE_FLUSH_SYSMEM_ADDR_HI::zeroed().with_adr(addr_hi));
> + bar.write_reg(regs::NV_PFB_HSHUB0_EG_PCIE_FLUSH_SYSMEM_ADDR_LO::zeroed().with_adr(addr_lo));
> +}
> +
> pub(super) const fn pmu_reserved_size_gb100() -> u32 {
> usize_into_u32::<{ const_align_up(SZ_8M + SZ_16M + SZ_4K, Alignment::new::<SZ_128K>()).unwrap() }>(
> )
> @@ -27,11 +63,15 @@ pub(super) const fn pmu_reserved_size_gb100() -> u32 {
>
> impl FbHal for Gb100 {
> fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 {
> - super::ga100::read_sysmem_flush_page_ga100(bar)
> + read_sysmem_flush_page_gb100(bar)
> }
>
> fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result {
> - super::ga100::write_sysmem_flush_page_ga100(bar, addr);
> + let addr: Bounded<u64, 52> = Bounded::<u64, 64>::from(addr)
> + .try_shrink::<52>()
> + .ok_or(EINVAL)?;
Maybe more simply written:
`let addr = Bounded::<u64, 52>::try_new(addr).ok_or(EINVAL)?;`
> +
> + write_sysmem_flush_page_gb100(bar, addr);
>
> Ok(())
> }
> diff --git a/drivers/gpu/nova-core/fb/hal/gb202.rs b/drivers/gpu/nova-core/fb/hal/gb202.rs
> index 542c1d7429e9..5a6b815eec3d 100644
> --- a/drivers/gpu/nova-core/fb/hal/gb202.rs
> +++ b/drivers/gpu/nova-core/fb/hal/gb202.rs
> @@ -4,24 +4,58 @@
> //! Blackwell GB20x framebuffer HAL.
>
> use kernel::{
> + io::Io,
> + num::Bounded,
> prelude::*,
> sizes::SizeConstants, //
> };
>
> use crate::{
> driver::Bar0,
> - fb::hal::FbHal, //
> + fb::hal::FbHal,
> + regs, //
> };
>
> struct Gb202;
>
> +fn read_sysmem_flush_page_gb202(bar: &Bar0) -> u64 {
> + let lo = u64::from(
> + bar.read(regs::NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO)
> + .adr(),
> + );
> + let hi = u64::from(
> + bar.read(regs::NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI)
> + .adr(),
> + );
> +
> + lo | (hi << 32)
> +}
> +
> +/// Write the sysmem flush page address through the GB20x FBHUB0 registers.
> +fn write_sysmem_flush_page_gb202(bar: &Bar0, addr: Bounded<u64, 52>) {
> + // Write HI first. The hardware will trigger the flush on the LO write.
> + bar.write_reg(
> + regs::NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI::zeroed()
> + .with_adr(addr.shr::<32, 20>().cast::<u32>()),
> + );
> + bar.write_reg(
> + regs::NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO::zeroed()
> + // CAST: lower 32 bits. Hardware ignores bits 7:0.
> + .with_adr(*addr as u32),
> + );
> +}
> +
> impl FbHal for Gb202 {
> fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 {
> - super::ga100::read_sysmem_flush_page_ga100(bar)
> + read_sysmem_flush_page_gb202(bar)
> }
>
> fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result {
> - super::ga100::write_sysmem_flush_page_ga100(bar, addr);
> + let addr: Bounded<u64, 52> = Bounded::<u64, 64>::from(addr)
> + .try_shrink::<52>()
> + .ok_or(EINVAL)?;
Same here.
> +
> + write_sysmem_flush_page_gb202(bar, addr);
>
> Ok(())
> }
> diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
> index 356fbf364ea5..65be6ec71ed4 100644
> --- a/drivers/gpu/nova-core/regs.rs
> +++ b/drivers/gpu/nova-core/regs.rs
> @@ -1,4 +1,5 @@
> // SPDX-License-Identifier: GPL-2.0
> +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
>
> use kernel::{
> io::{
> @@ -145,6 +146,42 @@ fn fmt(&self, f: &mut kernel::fmt::Formatter<'_>) -> kernel::fmt::Result {
> /// Bits 12..40 of the higher (exclusive) bound of the WPR2 region.
> 31:4 hi_val;
> }
> +
> + // Blackwell GB10x sysmem flush registers (HSHUB0).
> + //
> + // GB10x GPUs use two pairs of HSHUB registers for sysmembar: a primary pair and an EG
> + // (egress) pair. Both must be programmed to the same address. Hardware ignores bits 7:0
> + // of each LO register. HSHUB0 base is 0x00891000.
> +
> + pub(crate) NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO(u32) @ 0x00891e50 {
> + 31:0 adr => u32;
> + }
> +
> + pub(crate) NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI(u32) @ 0x00891e54 {
> + 19:0 adr;
> + }
> +
> + pub(crate) NV_PFB_HSHUB0_EG_PCIE_FLUSH_SYSMEM_ADDR_LO(u32) @ 0x008916c0 {
> + 31:0 adr => u32;
> + }
> +
> + pub(crate) NV_PFB_HSHUB0_EG_PCIE_FLUSH_SYSMEM_ADDR_HI(u32) @ 0x008916c4 {
> + 19:0 adr;
> + }
> +
> + // Blackwell GB20x sysmem flush registers (FBHUB0).
> + //
> + // Unlike the older NV_PFB_NISO_FLUSH_SYSMEM_ADDR registers which encode the address with an
> + // 8-bit right-shift, these registers take the raw address split into lower/upper 32-bit halves.
> + // The hardware ignores bits 7:0 of the LO register.
> +
> + pub(crate) NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO(u32) @ 0x008a1d58 {
> + 31:0 adr => u32;
> + }
> +
> + pub(crate) NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI(u32) @ 0x008a1d5c {
> + 19:0 adr;
> + }
> }
May be nice to move these to the place (HAL) they are used if they
aren't used anywhere else (and reduce visibility). I am also curious
about where 0x00891000 comes from.
next prev parent reply other threads:[~2026-06-01 7:34 UTC|newest]
Thread overview: 56+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-30 3:09 [PATCH v11 00/22] gpu: nova-core: firmware: Hopper/Blackwell support John Hubbard
2026-05-30 3:09 ` [PATCH v11 01/22] gpu: nova-core: set DMA mask width based on GPU architecture John Hubbard
2026-06-01 4:01 ` Eliot Courtney
2026-05-30 3:09 ` [PATCH v11 02/22] gpu: nova-core: Hopper/Blackwell: new location for PCI config mirror John Hubbard
2026-06-01 4:04 ` Eliot Courtney
2026-05-30 3:09 ` [PATCH v11 03/22] gpu: nova-core: Blackwell: compute PMU-reserved framebuffer size John Hubbard
2026-06-01 2:07 ` Alexandre Courbot
2026-06-01 5:34 ` Alexandre Courbot
2026-06-01 18:01 ` John Hubbard
2026-06-01 4:41 ` Eliot Courtney
2026-05-30 3:09 ` [PATCH v11 04/22] gpu: nova-core: Hopper/Blackwell: larger non-WPR heap John Hubbard
2026-06-01 2:24 ` Alexandre Courbot
2026-06-01 18:03 ` John Hubbard
2026-06-01 5:01 ` Eliot Courtney
2026-05-30 3:09 ` [PATCH v11 05/22] gpu: nova-core: Hopper/Blackwell: larger WPR2 (GSP) heap John Hubbard
2026-06-01 5:21 ` Eliot Courtney
2026-05-30 3:09 ` [PATCH v11 06/22] gpu: nova-core: Blackwell: use correct sysmem flush registers John Hubbard
2026-06-01 7:01 ` Alexandre Courbot
2026-06-01 18:16 ` John Hubbard
2026-06-01 7:33 ` Eliot Courtney [this message]
2026-06-01 13:13 ` Alexandre Courbot
2026-06-01 18:09 ` John Hubbard
2026-05-30 3:09 ` [PATCH v11 07/22] gpu: nova-core: don't assume 64-bit firmware images John Hubbard
2026-06-01 6:36 ` Eliot Courtney
2026-05-30 3:09 ` [PATCH v11 08/22] gpu: nova-core: add support for 32-bit " John Hubbard
2026-06-01 6:37 ` Eliot Courtney
2026-05-30 3:09 ` [PATCH v11 09/22] gpu: nova-core: add auto-detection of 32-bit, 64-bit " John Hubbard
2026-06-01 6:49 ` Eliot Courtney
2026-05-30 3:09 ` [PATCH v11 10/22] gpu: nova-core: Hopper/Blackwell: add FSP falcon engine stub John Hubbard
2026-06-01 7:47 ` Eliot Courtney
2026-06-01 16:10 ` Timur Tabi
2026-06-01 18:17 ` John Hubbard
2026-05-30 3:09 ` [PATCH v11 11/22] gpu: nova-core: Hopper/Blackwell: add FMC firmware image John Hubbard
2026-06-01 8:38 ` Eliot Courtney
2026-05-30 3:09 ` [PATCH v11 12/22] gpu: nova-core: Hopper/Blackwell: add FSP secure boot completion waiting John Hubbard
2026-06-01 7:48 ` Alexandre Courbot
2026-06-01 8:32 ` Eliot Courtney
2026-06-01 13:07 ` Alexandre Courbot
2026-06-01 18:18 ` John Hubbard
2026-05-30 3:09 ` [PATCH v11 13/22] gpu: nova-core: Hopper/Blackwell: add FMC signature extraction John Hubbard
2026-06-01 8:55 ` Eliot Courtney
2026-06-01 14:45 ` Alexandre Courbot
2026-06-01 14:49 ` Alexandre Courbot
2026-06-01 18:21 ` John Hubbard
2026-05-30 3:09 ` [PATCH v11 14/22] gpu: nova-core: Hopper/Blackwell: add FSP falcon EMEM operations John Hubbard
2026-05-30 3:09 ` [PATCH v11 15/22] gpu: nova-core: Hopper/Blackwell: add FSP message infrastructure John Hubbard
2026-05-30 3:09 ` [PATCH v11 16/22] gpu: nova-core: add MCTP/NVDM protocol types for firmware communication John Hubbard
2026-05-30 3:09 ` [PATCH v11 17/22] gpu: nova-core: Hopper/Blackwell: add FSP send/receive messaging John Hubbard
2026-05-30 3:09 ` [PATCH v11 18/22] gpu: nova-core: Hopper/Blackwell: add FspCotVersion type John Hubbard
2026-06-01 14:07 ` Alexandre Courbot
2026-06-01 18:23 ` John Hubbard
2026-05-30 3:09 ` [PATCH v11 19/22] gpu: nova-core: Hopper/Blackwell: add FSP Chain of Trust boot John Hubbard
2026-05-30 3:09 ` [PATCH v11 20/22] gpu: nova-core: Hopper/Blackwell: add GSP lockdown release polling John Hubbard
2026-05-30 3:09 ` [PATCH v11 21/22] gpu: nova-core: add non-sec2 unload path John Hubbard
2026-05-30 3:09 ` [PATCH v11 22/22] gpu: nova-core: gsp: enable FSP boot path John Hubbard
2026-05-30 3:21 ` [PATCH v11 00/22] gpu: nova-core: firmware: Hopper/Blackwell support John Hubbard
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=DIXJEO0ILZVH.2BOQ4HKC3396B@nvidia.com \
--to=ecourtney@nvidia.com \
--cc=a.hindborg@kernel.org \
--cc=acourbot@nvidia.com \
--cc=airlied@gmail.com \
--cc=alex.gaynor@gmail.com \
--cc=aliceryhl@google.com \
--cc=apopple@nvidia.com \
--cc=bhelgaas@google.com \
--cc=bjorn3_gh@protonmail.com \
--cc=boqun.feng@gmail.com \
--cc=dakr@kernel.org \
--cc=gary@garyguo.net \
--cc=jhubbard@nvidia.com \
--cc=joelagnelf@nvidia.com \
--cc=linux-kernel@vger.kernel.org \
--cc=lossin@kernel.org \
--cc=ojeda@kernel.org \
--cc=rust-for-linux@vger.kernel.org \
--cc=shashanks@nvidia.com \
--cc=simona@ffwll.ch \
--cc=tmgross@umich.edu \
--cc=ttabi@nvidia.com \
--cc=zhiw@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox