All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Eliot Courtney" <ecourtney@nvidia.com>
To: "John Hubbard" <jhubbard@nvidia.com>,
	"Danilo Krummrich" <dakr@kernel.org>,
	"Alexandre Courbot" <acourbot@nvidia.com>
Cc: "Joel Fernandes" <joelagnelf@nvidia.com>,
	"Timur Tabi" <ttabi@nvidia.com>,
	"Alistair Popple" <apopple@nvidia.com>,
	"Eliot Courtney" <ecourtney@nvidia.com>,
	"Shashank Sharma" <shashanks@nvidia.com>,
	"Zhi Wang" <zhiw@nvidia.com>, "David Airlie" <airlied@gmail.com>,
	"Simona Vetter" <simona@ffwll.ch>,
	"Bjorn Helgaas" <bhelgaas@google.com>,
	"Miguel Ojeda" <ojeda@kernel.org>,
	"Alex Gaynor" <alex.gaynor@gmail.com>,
	"Boqun Feng" <boqun.feng@gmail.com>,
	"Gary Guo" <gary@garyguo.net>,
	"Björn Roy Baron" <bjorn3_gh@protonmail.com>,
	"Benno Lossin" <lossin@kernel.org>,
	"Andreas Hindborg" <a.hindborg@kernel.org>,
	"Alice Ryhl" <aliceryhl@google.com>,
	"Trevor Gross" <tmgross@umich.edu>,
	rust-for-linux@vger.kernel.org,
	LKML <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH v11 06/22] gpu: nova-core: Blackwell: use correct sysmem flush registers
Date: Mon, 01 Jun 2026 16:33:50 +0900	[thread overview]
Message-ID: <DIXJEO0ILZVH.2BOQ4HKC3396B@nvidia.com> (raw)
In-Reply-To: <20260530030953.740561-7-jhubbard@nvidia.com>

On Sat May 30, 2026 at 12:09 PM JST, John Hubbard wrote:
> Blackwell GPUs moved the sysmem flush page registers away from the
> Ampere/Ada location. GB10x routes the flush through a pair of HSHUB0
> register sets (primary and egress) that must both be programmed to
> the same address. GB20x routes it through FBHUB0.
>
> Implement these paths in the GB10x and GB20x framebuffer HALs.
>
> Signed-off-by: John Hubbard <jhubbard@nvidia.com>
> ---
>  drivers/gpu/nova-core/fb/hal/gb100.rs | 46 +++++++++++++++++++++++++--
>  drivers/gpu/nova-core/fb/hal/gb202.rs | 40 +++++++++++++++++++++--
>  drivers/gpu/nova-core/regs.rs         | 37 +++++++++++++++++++++
>  3 files changed, 117 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/nova-core/fb/hal/gb100.rs b/drivers/gpu/nova-core/fb/hal/gb100.rs
> index 8d63350abf8a..70f4c11b1e77 100644
> --- a/drivers/gpu/nova-core/fb/hal/gb100.rs
> +++ b/drivers/gpu/nova-core/fb/hal/gb100.rs
> @@ -4,6 +4,8 @@
>  //! Blackwell GB10x framebuffer HAL.
>  
>  use kernel::{
> +    io::Io,
> +    num::Bounded,
>      prelude::*,
>      ptr::{
>          const_align_up,
> @@ -15,11 +17,45 @@
>  use crate::{
>      driver::Bar0,
>      fb::hal::FbHal,
> -    num::usize_into_u32, //
> +    num::usize_into_u32,
> +    regs, //
>  };
>  
>  struct Gb100;
>  
> +fn read_sysmem_flush_page_gb100(bar: &Bar0) -> u64 {
> +    let lo = u64::from(
> +        bar.read(regs::NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO)
> +            .adr(),
> +    );
> +    let hi = u64::from(
> +        bar.read(regs::NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI)
> +            .adr(),
> +    );
> +
> +    lo | (hi << 32)
> +}
> +
> +/// Write the sysmem flush page address through the GB10x HSHUB0 registers.
> +///
> +/// Both the primary and EG (egress) register pairs must be programmed to the same address,
> +/// as required by hardware.
> +fn write_sysmem_flush_page_gb100(bar: &Bar0, addr: Bounded<u64, 52>) {
> +    // CAST: lower 32 bits. Hardware ignores bits 7:0.
> +    let addr_lo = *addr as u32;
> +    let addr_hi = addr.shr::<32, 20>().cast::<u32>();
> +
> +    // Write HI first. The hardware will trigger the flush on the LO write.
> +
> +    // Primary HSHUB pair.
> +    bar.write_reg(regs::NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI::zeroed().with_adr(addr_hi));
> +    bar.write_reg(regs::NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO::zeroed().with_adr(addr_lo));
> +
> +    // EG (egress) pair -- must match the primary pair.
> +    bar.write_reg(regs::NV_PFB_HSHUB0_EG_PCIE_FLUSH_SYSMEM_ADDR_HI::zeroed().with_adr(addr_hi));
> +    bar.write_reg(regs::NV_PFB_HSHUB0_EG_PCIE_FLUSH_SYSMEM_ADDR_LO::zeroed().with_adr(addr_lo));
> +}
> +
>  pub(super) const fn pmu_reserved_size_gb100() -> u32 {
>      usize_into_u32::<{ const_align_up(SZ_8M + SZ_16M + SZ_4K, Alignment::new::<SZ_128K>()).unwrap() }>(
>      )
> @@ -27,11 +63,15 @@ pub(super) const fn pmu_reserved_size_gb100() -> u32 {
>  
>  impl FbHal for Gb100 {
>      fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 {
> -        super::ga100::read_sysmem_flush_page_ga100(bar)
> +        read_sysmem_flush_page_gb100(bar)
>      }
>  
>      fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result {
> -        super::ga100::write_sysmem_flush_page_ga100(bar, addr);
> +        let addr: Bounded<u64, 52> = Bounded::<u64, 64>::from(addr)
> +            .try_shrink::<52>()
> +            .ok_or(EINVAL)?;

Maybe more simply written:
`let addr = Bounded::<u64, 52>::try_new(addr).ok_or(EINVAL)?;`

> +
> +        write_sysmem_flush_page_gb100(bar, addr);
>  
>          Ok(())
>      }
> diff --git a/drivers/gpu/nova-core/fb/hal/gb202.rs b/drivers/gpu/nova-core/fb/hal/gb202.rs
> index 542c1d7429e9..5a6b815eec3d 100644
> --- a/drivers/gpu/nova-core/fb/hal/gb202.rs
> +++ b/drivers/gpu/nova-core/fb/hal/gb202.rs
> @@ -4,24 +4,58 @@
>  //! Blackwell GB20x framebuffer HAL.
>  
>  use kernel::{
> +    io::Io,
> +    num::Bounded,
>      prelude::*,
>      sizes::SizeConstants, //
>  };
>  
>  use crate::{
>      driver::Bar0,
> -    fb::hal::FbHal, //
> +    fb::hal::FbHal,
> +    regs, //
>  };
>  
>  struct Gb202;
>  
> +fn read_sysmem_flush_page_gb202(bar: &Bar0) -> u64 {
> +    let lo = u64::from(
> +        bar.read(regs::NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO)
> +            .adr(),
> +    );
> +    let hi = u64::from(
> +        bar.read(regs::NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI)
> +            .adr(),
> +    );
> +
> +    lo | (hi << 32)
> +}
> +
> +/// Write the sysmem flush page address through the GB20x FBHUB0 registers.
> +fn write_sysmem_flush_page_gb202(bar: &Bar0, addr: Bounded<u64, 52>) {
> +    // Write HI first. The hardware will trigger the flush on the LO write.
> +    bar.write_reg(
> +        regs::NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI::zeroed()
> +            .with_adr(addr.shr::<32, 20>().cast::<u32>()),
> +    );
> +    bar.write_reg(
> +        regs::NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO::zeroed()
> +            // CAST: lower 32 bits. Hardware ignores bits 7:0.
> +            .with_adr(*addr as u32),
> +    );
> +}
> +
>  impl FbHal for Gb202 {
>      fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 {
> -        super::ga100::read_sysmem_flush_page_ga100(bar)
> +        read_sysmem_flush_page_gb202(bar)
>      }
>  
>      fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result {
> -        super::ga100::write_sysmem_flush_page_ga100(bar, addr);
> +        let addr: Bounded<u64, 52> = Bounded::<u64, 64>::from(addr)
> +            .try_shrink::<52>()
> +            .ok_or(EINVAL)?;

Same here.

> +
> +        write_sysmem_flush_page_gb202(bar, addr);
>  
>          Ok(())
>      }
> diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
> index 356fbf364ea5..65be6ec71ed4 100644
> --- a/drivers/gpu/nova-core/regs.rs
> +++ b/drivers/gpu/nova-core/regs.rs
> @@ -1,4 +1,5 @@
>  // SPDX-License-Identifier: GPL-2.0
> +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
>  
>  use kernel::{
>      io::{
> @@ -145,6 +146,42 @@ fn fmt(&self, f: &mut kernel::fmt::Formatter<'_>) -> kernel::fmt::Result {
>          /// Bits 12..40 of the higher (exclusive) bound of the WPR2 region.
>          31:4    hi_val;
>      }
> +
> +    // Blackwell GB10x sysmem flush registers (HSHUB0).
> +    //
> +    // GB10x GPUs use two pairs of HSHUB registers for sysmembar: a primary pair and an EG
> +    // (egress) pair. Both must be programmed to the same address. Hardware ignores bits 7:0
> +    // of each LO register. HSHUB0 base is 0x00891000.
> +
> +    pub(crate) NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO(u32) @ 0x00891e50 {
> +        31:0    adr => u32;
> +    }
> +
> +    pub(crate) NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI(u32) @ 0x00891e54 {
> +        19:0    adr;
> +    }
> +
> +    pub(crate) NV_PFB_HSHUB0_EG_PCIE_FLUSH_SYSMEM_ADDR_LO(u32) @ 0x008916c0 {
> +        31:0    adr => u32;
> +    }
> +
> +    pub(crate) NV_PFB_HSHUB0_EG_PCIE_FLUSH_SYSMEM_ADDR_HI(u32) @ 0x008916c4 {
> +        19:0    adr;
> +    }
> +
> +    // Blackwell GB20x sysmem flush registers (FBHUB0).
> +    //
> +    // Unlike the older NV_PFB_NISO_FLUSH_SYSMEM_ADDR registers which encode the address with an
> +    // 8-bit right-shift, these registers take the raw address split into lower/upper 32-bit halves.
> +    // The hardware ignores bits 7:0 of the LO register.
> +
> +    pub(crate) NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO(u32) @ 0x008a1d58 {
> +        31:0    adr => u32;
> +    }
> +
> +    pub(crate) NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI(u32) @ 0x008a1d5c {
> +        19:0    adr;
> +    }
>  }

May be nice to move these to the place (HAL) they are used if they
aren't used anywhere else (and reduce visibility). I am also curious
about where 0x00891000 comes from.


  parent reply	other threads:[~2026-06-01  7:34 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-30  3:09 [PATCH v11 00/22] gpu: nova-core: firmware: Hopper/Blackwell support John Hubbard
2026-05-30  3:09 ` [PATCH v11 01/22] gpu: nova-core: set DMA mask width based on GPU architecture John Hubbard
2026-06-01  4:01   ` Eliot Courtney
2026-05-30  3:09 ` [PATCH v11 02/22] gpu: nova-core: Hopper/Blackwell: new location for PCI config mirror John Hubbard
2026-06-01  4:04   ` Eliot Courtney
2026-05-30  3:09 ` [PATCH v11 03/22] gpu: nova-core: Blackwell: compute PMU-reserved framebuffer size John Hubbard
2026-06-01  2:07   ` Alexandre Courbot
2026-06-01  5:34     ` Alexandre Courbot
2026-06-01 18:01       ` John Hubbard
2026-06-01  4:41   ` Eliot Courtney
2026-05-30  3:09 ` [PATCH v11 04/22] gpu: nova-core: Hopper/Blackwell: larger non-WPR heap John Hubbard
2026-06-01  2:24   ` Alexandre Courbot
2026-06-01 18:03     ` John Hubbard
2026-06-01  5:01   ` Eliot Courtney
2026-05-30  3:09 ` [PATCH v11 05/22] gpu: nova-core: Hopper/Blackwell: larger WPR2 (GSP) heap John Hubbard
2026-06-01  5:21   ` Eliot Courtney
2026-05-30  3:09 ` [PATCH v11 06/22] gpu: nova-core: Blackwell: use correct sysmem flush registers John Hubbard
2026-06-01  7:01   ` Alexandre Courbot
2026-06-01 18:16     ` John Hubbard
2026-06-01  7:33   ` Eliot Courtney [this message]
2026-06-01 13:13     ` Alexandre Courbot
2026-06-01 18:09       ` John Hubbard
2026-05-30  3:09 ` [PATCH v11 07/22] gpu: nova-core: don't assume 64-bit firmware images John Hubbard
2026-06-01  6:36   ` Eliot Courtney
2026-05-30  3:09 ` [PATCH v11 08/22] gpu: nova-core: add support for 32-bit " John Hubbard
2026-06-01  6:37   ` Eliot Courtney
2026-05-30  3:09 ` [PATCH v11 09/22] gpu: nova-core: add auto-detection of 32-bit, 64-bit " John Hubbard
2026-06-01  6:49   ` Eliot Courtney
2026-05-30  3:09 ` [PATCH v11 10/22] gpu: nova-core: Hopper/Blackwell: add FSP falcon engine stub John Hubbard
2026-06-01  7:47   ` Eliot Courtney
2026-06-01 16:10   ` Timur Tabi
2026-06-01 18:17     ` John Hubbard
2026-05-30  3:09 ` [PATCH v11 11/22] gpu: nova-core: Hopper/Blackwell: add FMC firmware image John Hubbard
2026-06-01  8:38   ` Eliot Courtney
2026-05-30  3:09 ` [PATCH v11 12/22] gpu: nova-core: Hopper/Blackwell: add FSP secure boot completion waiting John Hubbard
2026-06-01  7:48   ` Alexandre Courbot
2026-06-01  8:32     ` Eliot Courtney
2026-06-01 13:07       ` Alexandre Courbot
2026-06-01 18:18         ` John Hubbard
2026-05-30  3:09 ` [PATCH v11 13/22] gpu: nova-core: Hopper/Blackwell: add FMC signature extraction John Hubbard
2026-06-01  8:55   ` Eliot Courtney
2026-06-01 14:45   ` Alexandre Courbot
2026-06-01 14:49     ` Alexandre Courbot
2026-06-01 18:21       ` John Hubbard
2026-05-30  3:09 ` [PATCH v11 14/22] gpu: nova-core: Hopper/Blackwell: add FSP falcon EMEM operations John Hubbard
2026-05-30  3:09 ` [PATCH v11 15/22] gpu: nova-core: Hopper/Blackwell: add FSP message infrastructure John Hubbard
2026-05-30  3:09 ` [PATCH v11 16/22] gpu: nova-core: add MCTP/NVDM protocol types for firmware communication John Hubbard
2026-05-30  3:09 ` [PATCH v11 17/22] gpu: nova-core: Hopper/Blackwell: add FSP send/receive messaging John Hubbard
2026-05-30  3:09 ` [PATCH v11 18/22] gpu: nova-core: Hopper/Blackwell: add FspCotVersion type John Hubbard
2026-06-01 14:07   ` Alexandre Courbot
2026-06-01 18:23     ` John Hubbard
2026-05-30  3:09 ` [PATCH v11 19/22] gpu: nova-core: Hopper/Blackwell: add FSP Chain of Trust boot John Hubbard
2026-05-30  3:09 ` [PATCH v11 20/22] gpu: nova-core: Hopper/Blackwell: add GSP lockdown release polling John Hubbard
2026-05-30  3:09 ` [PATCH v11 21/22] gpu: nova-core: add non-sec2 unload path John Hubbard
2026-05-30  3:09 ` [PATCH v11 22/22] gpu: nova-core: gsp: enable FSP boot path John Hubbard
2026-05-30  3:21 ` [PATCH v11 00/22] gpu: nova-core: firmware: Hopper/Blackwell support John Hubbard

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=DIXJEO0ILZVH.2BOQ4HKC3396B@nvidia.com \
    --to=ecourtney@nvidia.com \
    --cc=a.hindborg@kernel.org \
    --cc=acourbot@nvidia.com \
    --cc=airlied@gmail.com \
    --cc=alex.gaynor@gmail.com \
    --cc=aliceryhl@google.com \
    --cc=apopple@nvidia.com \
    --cc=bhelgaas@google.com \
    --cc=bjorn3_gh@protonmail.com \
    --cc=boqun.feng@gmail.com \
    --cc=dakr@kernel.org \
    --cc=gary@garyguo.net \
    --cc=jhubbard@nvidia.com \
    --cc=joelagnelf@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lossin@kernel.org \
    --cc=ojeda@kernel.org \
    --cc=rust-for-linux@vger.kernel.org \
    --cc=shashanks@nvidia.com \
    --cc=simona@ffwll.ch \
    --cc=tmgross@umich.edu \
    --cc=ttabi@nvidia.com \
    --cc=zhiw@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.