Re: [PATCH] gpu: nova-core: reserve a larger GSP WPR2 heap when vGPU is enabled

All of lore.kernel.org
 help / color / mirror / Atom feed

From: "Alexandre Courbot" <acourbot@nvidia.com>
To: "Zhi Wang" <zhiw@nvidia.com>
Cc: <dakr@kernel.org>, <airlied@gmail.com>, <simona@ffwll.ch>,
	<ojeda@kernel.org>, <alex.gaynor@gmail.com>,
	<boqun.feng@gmail.com>, <gary@garyguo.net>,
	<bjorn3_gh@protonmail.com>, <lossin@kernel.org>,
	<a.hindborg@kernel.org>, <aliceryhl@google.com>,
	<tmgross@umich.edu>, <jhubbard@nvidia.com>,
	<ecourtney@nvidia.com>, <joelagnelf@nvidia.com>,
	<apopple@nvidia.com>, <cjia@nvidia.com>, <smitra@nvidia.com>,
	<kjaju@nvidia.com>, <alkumar@nvidia.com>, <ankita@nvidia.com>,
	<aniketa@nvidia.com>, <kwankhede@nvidia.com>,
	<targupta@nvidia.com>, <nova-gpu@lists.linux.dev>,
	<linux-kernel@vger.kernel.org>, <zhiwang@kernel.org>
Subject: Re: [PATCH] gpu: nova-core: reserve a larger GSP WPR2 heap when vGPU is enabled
Date: Tue, 16 Jun 2026 23:20:41 +0900	[thread overview]
Message-ID: <DJAJGC4LLOK3.YAX75103O00O@nvidia.com> (raw)
In-Reply-To: <20260604114339.1565660-10-zhiw@nvidia.com>

On Thu Jun 4, 2026 at 8:43 PM JST, Zhi Wang wrote:
> GSP-RM allocates independent RM sub-heaps for each VF partition inside
> the WPR2 region. The default baremetal heap sizing is far too small for
> vGPU instance, causing GSP-RM to hit out-of-memory failures during VF
> initialization.
>
> The host driver must reserve the correct heap size before GSP boots,
> because the WPR2 region is locked down by the hardware after boot and
> cannot be resized at runtime. The firmware determines the per-VF carve
> from the gspFwHeapVfPartitionCount field in the WPR2 metadata header.
>
> Select a pre-calibrated static heap size based on total_vfs (174 MB for
> 1 VM, 581 MB for 2-32 VFs, 1370 MB for 48 VFs) and set
> vf_partition_count accordingly. Extend FbLayout::new() and
> GspBootContext to propagate total_vfs through the boot path.
>
> Signed-off-by: Zhi Wang <zhiw@nvidia.com>
> ---
>  drivers/gpu/nova-core/fb.rs       | 17 +++++++++++++----
>  drivers/gpu/nova-core/gsp.rs      |  2 +-
>  drivers/gpu/nova-core/gsp/boot.rs | 14 +++++++++++---
>  drivers/gpu/nova-core/gsp/fw.rs   | 12 ++++++++++++
>  4 files changed, 37 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/nova-core/fb.rs b/drivers/gpu/nova-core/fb.rs
> index 725e428154cf..fb4e6aa9fda4 100644
> --- a/drivers/gpu/nova-core/fb.rs
> +++ b/drivers/gpu/nova-core/fb.rs
> @@ -171,7 +171,13 @@ pub(crate) struct FbLayout {
>  
>  impl FbLayout {
>      /// Computes the FB layout for `chipset` required to run the `gsp_fw` GSP firmware.
> -    pub(crate) fn new(chipset: Chipset, bar: Bar0<'_>, gsp_fw: &GspFirmware) -> Result<Self> {
> +    pub(crate) fn new(
> +        chipset: Chipset,
> +        bar: Bar0<'_>,
> +        gsp_fw: &GspFirmware,
> +        vgpu_requested: bool,
> +        total_vfs: u16,

`total_vfs` is only meaningful if `vgpu_requested` is true. So these two
parameters would be better modeled using an `Option`, or maybe even
better a dedicated parameter type for vGPU settings? Something like
(please pick a better name if any):

    enum VgpuRequest {
        Disabled,
        Requested {
            total_vfs: u16,
        }
    }

> +    ) -> Result<Self> {
>          let hal = hal::fb_hal(chipset);
>  
>          let fb = {
> @@ -236,8 +242,11 @@ pub(crate) fn new(chipset: Chipset, bar: Bar0<'_>, gsp_fw: &GspFirmware) -> Resu
>  
>          let wpr2_heap = {
>              const WPR2_HEAP_DOWN_ALIGN: Alignment = Alignment::new::<SZ_1M>();
> -            let wpr2_heap_size =
> -                gsp::LibosParams::from_chipset(chipset).wpr_heap_size(chipset, fb.end)?;
> +            let wpr2_heap_size = if vgpu_requested {
> +                gsp::vgpu_fw_heap_size(u32::from(total_vfs))
> +            } else {
> +                gsp::LibosParams::from_chipset(chipset).wpr_heap_size(chipset, fb.end)?
> +            };
>              let wpr2_heap_addr = (elf.start - wpr2_heap_size).align_down(WPR2_HEAP_DOWN_ALIGN);
>  
>              FbRange(wpr2_heap_addr..(elf.start).align_down(WPR2_HEAP_DOWN_ALIGN))
> @@ -265,7 +274,7 @@ pub(crate) fn new(chipset: Chipset, bar: Bar0<'_>, gsp_fw: &GspFirmware) -> Resu
>              wpr2_heap,
>              wpr2,
>              heap,
> -            vf_partition_count: 0,
> +            vf_partition_count: if vgpu_requested { total_vfs as u8 } else { 0 },
>              pmu_reserved_size: hal.pmu_reserved_size(),
>          })
>      }
> diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs
> index 94cd4a784b79..921b92c9eb92 100644
> --- a/drivers/gpu/nova-core/gsp.rs
> +++ b/drivers/gpu/nova-core/gsp.rs
> @@ -27,6 +27,7 @@
>  mod sequencer;
>  
>  pub(crate) use fw::{
> +    vgpu_fw_heap_size,
>      GspFmcBootParams,
>      GspFwWprMeta,
>      LibosParams, //
> @@ -59,7 +60,6 @@ pub(crate) struct GspBootContext<'a> {
>      pub(crate) gsp_falcon: &'a Falcon<GspFalcon>,
>      pub(crate) sec2_falcon: &'a Falcon<Sec2Falcon>,
>      pub(crate) vgpu_requested: Cell<bool>,
> -    #[expect(dead_code)]
>      pub(crate) total_vfs: u16,
>  }
>  
> diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs
> index 2981d02d15ad..7c1f3f962fbe 100644
> --- a/drivers/gpu/nova-core/gsp/boot.rs
> +++ b/drivers/gpu/nova-core/gsp/boot.rs
> @@ -111,7 +111,13 @@ pub(crate) fn boot(
>              GFP_KERNEL,
>          )?;
>  
> -        let fb_layout = FbLayout::new(ctx.chipset, ctx.bar, &gsp_fw)?;
> +        let fb_layout = FbLayout::new(
> +            ctx.chipset,
> +            ctx.bar,
> +            &gsp_fw,
> +            ctx.vgpu_requested.get(),
> +            ctx.total_vfs,
> +        )?;
>          dev_dbg!(dev, "{:#x?}\n", fb_layout);
>  
>          let wpr_meta = Coherent::init(dev, GFP_KERNEL, GspFwWprMeta::new(&gsp_fw, &fb_layout))?;
> @@ -138,8 +144,10 @@ pub(crate) fn boot(
>  
>          self.cmdq
>              .send_command_no_wait(ctx.bar, commands::SetSystemInfo::new(ctx.pdev, ctx.chipset))?;
> -        self.cmdq
> -            .send_command_no_wait(ctx.bar, commands::SetRegistry::new(ctx.vgpu_requested.get())?)?;
> +        self.cmdq.send_command_no_wait(
> +            ctx.bar,
> +            commands::SetRegistry::new(ctx.vgpu_requested.get())?,
> +        )?;
>  
>          hal.post_boot(&self, ctx, &gsp_fw)?;
>  
> diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs
> index 14424a2c2d83..2f3cbc5d5114 100644
> --- a/drivers/gpu/nova-core/gsp/fw.rs
> +++ b/drivers/gpu/nova-core/gsp/fw.rs
> @@ -101,6 +101,18 @@ pub(in crate::gsp) fn advance_cpu_write_ptr(qs: &Coherent<GspMem>, count: u32) {
>  pub(crate) const GSP_MSG_QUEUE_ELEMENT_SIZE_MAX: usize =
>      num::u32_as_usize(bindings::GSP_MSG_QUEUE_ELEMENT_SIZE_MAX);
>  
> +const GSP_FW_HEAP_SIZE_VGPU_1VM: u64 = 174 * u64::SZ_1M;
> +const GSP_FW_HEAP_SIZE_VGPU_DEFAULT: u64 = 581 * u64::SZ_1M;
> +const GSP_FW_HEAP_SIZE_VGPU_48VMS: u64 = 1370 * u64::SZ_1M;

Do we have a source of truth for these values? I can see definitions for
`GSP_FW_HEAP_SIZE_VGPU_DEFAULT` and `GSP_FW_HEAP_SIZE_VGPU_48VMS` in
OpenRM (meaning we could generate bindings for them), but cannot find a
reference for `GSP_FW_HEAP_SIZE_VGPU_1VM`...

     prev parent reply	other threads:[~2026-06-16 14:20 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-04 11:43 [PATCH 0/9] gpu: nova-core: boot GSP with vGPU enabled on Zhi Wang
2026-06-04 11:43 ` [PATCH 1/9] rust: pci: expose sriov_get_totalvfs() helper Zhi Wang
2026-06-05 14:08   ` Alexandre Courbot
2026-06-04 11:43 ` [PATCH 2/9] gpu: nova-core: factor out common FSP message header Zhi Wang
2026-06-05 13:21   ` Alexandre Courbot
2026-06-04 11:43 ` [PATCH 3/9] gpu: nova-core: return FSP response buffer to caller Zhi Wang
2026-06-05 13:25   ` Alexandre Courbot
2026-06-05 16:04     ` Zhi Wang
2026-06-09  6:07       ` Alexandre Courbot
2026-06-04 11:43 ` [PATCH 4/9] gpu: nova-core: read vGPU mode from FSP via PRC protocol Zhi Wang
2026-06-16  8:35   ` Alexandre Courbot
2026-06-04 11:43 ` [PATCH 5/9] gpu: nova-core: add FSP and PRC protocol documentation Zhi Wang
2026-06-16  8:17   ` Alexandre Courbot
2026-06-04 11:43 ` [PATCH 6/9] gpu: nova-core: consolidate GSP boot parameters into GspBootContext Zhi Wang
2026-06-16 14:13   ` Alexandre Courbot
2026-06-04 11:43 ` [PATCH 7/9] gpu: nova-core: add vGPU preludes Zhi Wang
2026-06-04 11:43 ` [PATCH 8/9] gpu: nova-core: set RMSetSriovMode when NVIDIA vGPU is enabled Zhi Wang
2026-06-04 11:43 ` [PATCH] gpu: nova-core: reserve a larger GSP WPR2 heap when " Zhi Wang
2026-06-16 14:20   ` Alexandre Courbot [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=DJAJGC4LLOK3.YAX75103O00O@nvidia.com \
    --to=acourbot@nvidia.com \
    --cc=a.hindborg@kernel.org \
    --cc=airlied@gmail.com \
    --cc=alex.gaynor@gmail.com \
    --cc=aliceryhl@google.com \
    --cc=alkumar@nvidia.com \
    --cc=aniketa@nvidia.com \
    --cc=ankita@nvidia.com \
    --cc=apopple@nvidia.com \
    --cc=bjorn3_gh@protonmail.com \
    --cc=boqun.feng@gmail.com \
    --cc=cjia@nvidia.com \
    --cc=dakr@kernel.org \
    --cc=ecourtney@nvidia.com \
    --cc=gary@garyguo.net \
    --cc=jhubbard@nvidia.com \
    --cc=joelagnelf@nvidia.com \
    --cc=kjaju@nvidia.com \
    --cc=kwankhede@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lossin@kernel.org \
    --cc=nova-gpu@lists.linux.dev \
    --cc=ojeda@kernel.org \
    --cc=simona@ffwll.ch \
    --cc=smitra@nvidia.com \
    --cc=targupta@nvidia.com \
    --cc=tmgross@umich.edu \
    --cc=zhiw@nvidia.com \
    --cc=zhiwang@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.