Re: [PATCH v2 4/7] gpu: nova-core: add vGPU preludes

NVIDIA GPU driver infrastructure
 help / color / mirror / Atom feed

From: "Alexandre Courbot" <acourbot@nvidia.com>
To: "Zhi Wang" <zhiw@nvidia.com>
Cc: <dakr@kernel.org>, <airlied@gmail.com>, <simona@ffwll.ch>,
	<ojeda@kernel.org>, <alex.gaynor@gmail.com>,
	<boqun.feng@gmail.com>, <gary@garyguo.net>,
	<bjorn3_gh@protonmail.com>, <lossin@kernel.org>,
	<a.hindborg@kernel.org>, <aliceryhl@google.com>,
	<tmgross@umich.edu>, <jhubbard@nvidia.com>,
	<ecourtney@nvidia.com>, <joelagnelf@nvidia.com>,
	<apopple@nvidia.com>, <cjia@nvidia.com>, <smitra@nvidia.com>,
	<kjaju@nvidia.com>, <alkumar@nvidia.com>, <ankita@nvidia.com>,
	<aniketa@nvidia.com>, <kwankhede@nvidia.com>,
	<targupta@nvidia.com>, <nova-gpu@lists.linux.dev>,
	<linux-kernel@vger.kernel.org>, <zhiwang@kernel.org>
Subject: Re: [PATCH v2 4/7] gpu: nova-core: add vGPU preludes
Date: Tue, 30 Jun 2026 22:21:36 +0900	[thread overview]
Message-ID: <DJMEYQBBBD7D.3G9I5PBPLAKFD@nvidia.com> (raw)
In-Reply-To: <20260622194353.1308872-5-zhiw@nvidia.com>

On Tue Jun 23, 2026 at 4:43 AM JST, Zhi Wang wrote:
> Detect vGPU state before GSP boot so later boot steps can consume a
> stable view of whether vGPU is enabled and how many SR-IOV VFs are
> available.
>
> Introduce VgpuManager to keep the detected vGPU state. The manager uses
> a GPU HAL capability gate, pci_sriov_get_totalvfs(), and the FSP PRC
> vGPU mode knob to decide whether vGPU is enabled for the current boot.
> vGPU is considered enabled only when at least two VFs are available.
> The state is read-only after construction and is referenced from
> GspBootContext instead of being copied into it.
>
> Signed-off-by: Zhi Wang <zhiw@nvidia.com>
> ---
>  drivers/gpu/nova-core/fsp.rs           |  1 -
>  drivers/gpu/nova-core/gpu.rs           | 12 ++++++
>  drivers/gpu/nova-core/gpu/hal.rs       |  3 ++
>  drivers/gpu/nova-core/gpu/hal/gh100.rs | 12 +++++-
>  drivers/gpu/nova-core/gpu/hal/tu102.rs |  5 +++
>  drivers/gpu/nova-core/gsp.rs           |  2 +
>  drivers/gpu/nova-core/gsp/boot.rs      |  7 +++
>  drivers/gpu/nova-core/nova_core.rs     |  1 +
>  drivers/gpu/nova-core/vgpu.rs          | 60 ++++++++++++++++++++++++++
>  9 files changed, 101 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/gpu/nova-core/vgpu.rs
>
> diff --git a/drivers/gpu/nova-core/fsp.rs b/drivers/gpu/nova-core/fsp.rs
> index 2bf01c2d1175..3b5006750693 100644
> --- a/drivers/gpu/nova-core/fsp.rs
> +++ b/drivers/gpu/nova-core/fsp.rs
> @@ -469,7 +469,6 @@ fn send_sync_fsp<M>(&mut self, dev: &device::Device, bar: Bar0<'_>, msg: &M) ->
>      /// Reads the active vGPU mode from FSP using the PRC protocol.
>      ///
>      /// Queries FSP's Management Partition for the active vGPU mode knob value.
> -    #[expect(dead_code)]
>      pub(crate) fn read_vgpu_mode(
>          &mut self,
>          dev: &device::Device<device::Bound>,
> diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
> index e5ebd79c9020..646eb6c63c21 100644
> --- a/drivers/gpu/nova-core/gpu.rs
> +++ b/drivers/gpu/nova-core/gpu.rs
> @@ -30,6 +30,7 @@
>          GspBootMethod, //
>      },
>      regs,
> +    vgpu::VgpuManager,
>  };
>  
>  mod hal;
> @@ -138,6 +139,11 @@ pub(crate) const fn arch(self) -> Architecture {
>      pub(crate) fn pci_config_mirror_range(self) -> Range<u32> {
>          hal::gpu_hal(self).pci_config_mirror_range()
>      }
> +
> +    /// Returns whether this chipset can support vGPU.
> +    pub(crate) fn supports_vgpu(self) -> bool {
> +        hal::gpu_hal(self).supports_vgpu(self)
> +    }

I noticed that this method is only used in the `vgpu` module. How about
moving it there, so vGPU functionality is concentrated under that
module?

You could add an `impl Chipset` block in `vgpu`, but since it becomes
module-local anyway I think we can just create a `vgpu::hal` module with
the `supports_vgpu` method (instead of having it in `gpu` as I initially
suggested) and call it locally. We will require a vGPU HAL for other
things, so this is a good way to start.

>  }
>  
>  // TODO
> @@ -267,6 +273,8 @@ struct GspResources<'gpu> {
>      // TODO: use different resource types for each boot method, and make the relevant Gsp methods
>      // generic against them.
>      fsp: Option<Fsp>,
> +    /// vGPU state detected before GSP boot.
> +    vgpu: VgpuManager,
>      /// GSP runtime data.
>      #[pin]
>      gsp: Gsp,
> @@ -311,6 +319,7 @@ fn drop(self: Pin<&mut Self>) {
>                      gsp_falcon: &*this.gsp_falcon,
>                      sec2_falcon: &*this.sec2_falcon,
>                      fsp: this.fsp.as_mut(),
> +                    vgpu: &*this.vgpu,
>                  },
>                  bundle,
>              )
> @@ -366,6 +375,8 @@ pub(crate) fn new(
>                      GspBootMethod::Fsp => Some(Fsp::wait_secure_boot(dev, bar, spec.chipset)?),
>                  },
>  
> +                vgpu: VgpuManager::new(pdev, spec.chipset, bar, fsp.as_mut())?,
> +
>                  gsp <- Gsp::new(pdev),
>  
>                  // This member must be initialized last, so the `UnloadBundle` can never be dropped
> @@ -378,6 +389,7 @@ pub(crate) fn new(
>                      gsp_falcon,
>                      sec2_falcon,
>                      fsp: fsp.as_mut(),
> +                    vgpu,
>                  })?,
>              }),
>  
> diff --git a/drivers/gpu/nova-core/gpu/hal.rs b/drivers/gpu/nova-core/gpu/hal.rs
> index 3f25882d0e56..2116c71242ec 100644
> --- a/drivers/gpu/nova-core/gpu/hal.rs
> +++ b/drivers/gpu/nova-core/gpu/hal.rs
> @@ -27,6 +27,9 @@ pub(crate) trait GpuHal {
>  
>      /// Returns the address range of the PCI config mirror space.
>      fn pci_config_mirror_range(&self) -> Range<u32>;
> +
> +    /// Returns whether this chipset can support vGPU.
> +    fn supports_vgpu(&self, chipset: Chipset) -> bool;
>  }
>  
>  pub(super) fn gpu_hal(chipset: Chipset) -> &'static dyn GpuHal {
> diff --git a/drivers/gpu/nova-core/gpu/hal/gh100.rs b/drivers/gpu/nova-core/gpu/hal/gh100.rs
> index e3f8ba0fab33..8e18206961ae 100644
> --- a/drivers/gpu/nova-core/gpu/hal/gh100.rs
> +++ b/drivers/gpu/nova-core/gpu/hal/gh100.rs
> @@ -7,7 +7,13 @@
>      prelude::*, //
>  };
>  
> -use crate::driver::Bar0;
> +use crate::{
> +    driver::Bar0,
> +    gpu::{
> +        Architecture,
> +        Chipset, //
> +    },
> +};
>  
>  use super::GpuHal;
>  
> @@ -28,6 +34,10 @@ fn pci_config_mirror_range(&self) -> Range<u32> {
>  
>          PCI_CONFIG_MIRROR_START..PCI_CONFIG_MIRROR_START + PCI_CONFIG_MIRROR_SIZE
>      }
> +
> +    fn supports_vgpu(&self, chipset: Chipset) -> bool {
> +        matches!(chipset.arch(), Architecture::BlackwellGB20x)
> +    }

The HAL already provides us with a way to match against the chipset, so
let's use that.

If this is moving under `vgpu`, all you need is a `tu102` HAl returning
`false`, and a `gb202` returning `true`.

>  }
>  
>  const GH100: Gh100 = Gh100;
> diff --git a/drivers/gpu/nova-core/gpu/hal/tu102.rs b/drivers/gpu/nova-core/gpu/hal/tu102.rs
> index b0732e53edea..1e2c7bbdb4ad 100644
> --- a/drivers/gpu/nova-core/gpu/hal/tu102.rs
> +++ b/drivers/gpu/nova-core/gpu/hal/tu102.rs
> @@ -32,6 +32,7 @@
>  
>  use crate::{
>      driver::Bar0,
> +    gpu::Chipset,
>      regs, //
>  };
>  
> @@ -94,6 +95,10 @@ fn pci_config_mirror_range(&self) -> Range<u32> {
>  
>          PCI_CONFIG_MIRROR_START..PCI_CONFIG_MIRROR_START + PCI_CONFIG_MIRROR_SIZE
>      }
> +
> +    fn supports_vgpu(&self, _chipset: Chipset) -> bool {
> +        false
> +    }
>  }
>  
>  const TU102: Tu102 = Tu102;
> diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs
> index ff438506070a..6821008d48d9 100644
> --- a/drivers/gpu/nova-core/gsp.rs
> +++ b/drivers/gpu/nova-core/gsp.rs
> @@ -51,6 +51,7 @@
>          },
>      },
>      num,
> +    vgpu::VgpuManager,
>  };
>  
>  pub(crate) const GSP_PAGE_SHIFT: usize = 12;
> @@ -64,6 +65,7 @@ pub(crate) struct GspBootContext<'a> {
>      pub(crate) gsp_falcon: &'a Falcon<GspFalcon>,
>      pub(crate) sec2_falcon: &'a Falcon<Sec2Falcon>,
>      pub(crate) fsp: Option<&'a mut Fsp>,
> +    pub(crate) vgpu: &'a VgpuManager,
>  }
>  
>  impl<'a> GspBootContext<'a> {
> diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs
> index 1e0d4793e96d..0a33cf4dd975 100644
> --- a/drivers/gpu/nova-core/gsp/boot.rs
> +++ b/drivers/gpu/nova-core/gsp/boot.rs
> @@ -47,6 +47,13 @@ pub(crate) fn boot(
>          let dev = pdev.as_ref();
>          let hal = super::hal::gsp_hal(chipset);
>  
> +        dev_dbg!(
> +            dev,
> +            "vGPU enabled: {}, total VFs: {}\n",
> +            ctx.vgpu.enabled(),
> +            ctx.vgpu.total_vfs()
> +        );
> +
>          let gsp_fw = KBox::pin_init(GspFirmware::new(dev, chipset, FIRMWARE_VERSION), GFP_KERNEL)?;
>  
>          let fb_layout = FbLayout::new(chipset, bar, &gsp_fw)?;
> diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs
> index 735b8e17c6b6..2df2f773ec8e 100644
> --- a/drivers/gpu/nova-core/nova_core.rs
> +++ b/drivers/gpu/nova-core/nova_core.rs
> @@ -26,6 +26,7 @@
>  mod regs;
>  mod sbuffer;
>  mod vbios;
> +mod vgpu;
>  
>  pub(crate) const MODULE_NAME: &core::ffi::CStr = <LocalModule as kernel::ModuleMetadata>::NAME;
>  
> diff --git a/drivers/gpu/nova-core/vgpu.rs b/drivers/gpu/nova-core/vgpu.rs
> new file mode 100644
> index 000000000000..08fa37d80b28
> --- /dev/null
> +++ b/drivers/gpu/nova-core/vgpu.rs
> @@ -0,0 +1,60 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +use kernel::{
> +    device,
> +    pci,
> +    prelude::*, //
> +};
> +
> +use crate::{
> +    driver::Bar0,
> +    fsp::{
> +        Fsp,
> +        VgpuMode, //
> +    },
> +    gpu::Chipset,
> +};
> +
> +/// vGPU state detected during GPU construction.
> +pub(crate) struct VgpuManager {
> +    enabled: bool,
> +    total_vfs: u16,
> +}
> +
> +impl VgpuManager {
> +    /// Creates a vGPU manager by querying SR-IOV and the FSP PRC vGPU knob.
> +    pub(crate) fn new(
> +        pdev: &pci::Device<device::Bound>,
> +        chipset: Chipset,
> +        bar: Bar0<'_>,
> +        fsp: Option<&mut Fsp>,
> +    ) -> Result<Self> {
> +        let total_vfs = if chipset.supports_vgpu() {
> +            pdev.sriov_get_totalvfs()
> +        } else {
> +            0
> +        };
> +
> +        let enabled = if total_vfs < 2 {

I know my previous feedback questioned the provenance of the value for
one VF, but it is fine to keep it if you have evidence that it works and
is useful. It just needs justification. :)

If we prefer to not support single-VF configurations, let's at least add
a comment explaining the value of 2 here.

> +            false
> +        } else if let Some(fsp) = fsp {
> +            let mode = fsp.read_vgpu_mode(pdev.as_ref(), bar)?;
> +
> +            mode == VgpuMode::Enabled
> +        } else {
> +            false
> +        };
> +
> +        Ok(Self { enabled, total_vfs })
> +    }
> +
> +    /// Returns whether vGPU mode is enabled for this boot.
> +    pub(crate) fn enabled(&self) -> bool {
> +        self.enabled
> +    }
> +
> +    /// Returns the total number of SR-IOV VFs supported by this device.
> +    pub(crate) fn total_vfs(&self) -> u16 {
> +        self.total_vfs
> +    }

These two methods (`enabled` and `total_vfs`) are good candidates to be
fused into a single one that returns an `enum`. After all, `total_vfs`
only makes sense if `enabled() == true` - if vGPU is not enabled, its
only valid value should be `0`, but we have no way to enforce that with
the current method.

Having a

  enum VgpuState {
    Disabled,
    Enabled { total_vfs: u16 },
  }

Would make it impossible from the get-go to express invalid state.

next prev parent reply	other threads:[~2026-06-30 13:21 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-22 19:43 [PATCH v2 0/7] gpu: nova-core: boot GSP with vGPU enabled Zhi Wang
2026-06-22 19:43 ` [PATCH v2 1/7] PCI/IOV: Return u16 from pci_sriov_get_totalvfs() Zhi Wang
2026-06-24 12:40   ` Alexandre Courbot
2026-06-24 13:39     ` David Laight
2026-06-24 14:59       ` Alexandre Courbot
2026-06-24 19:38         ` David Laight
2026-06-24 23:11           ` Gary Guo
2026-06-25  0:45             ` Alexandre Courbot
2026-06-22 19:43 ` [PATCH v2 2/7] rust: pci: Add sriov_get_totalvfs() helper Zhi Wang
2026-06-22 19:43 ` [PATCH v2 3/7] gpu: nova-core: read vGPU mode from FSP via PRC protocol Zhi Wang
2026-06-30 10:39   ` Alexandre Courbot
2026-06-22 19:43 ` [PATCH v2 4/7] gpu: nova-core: add vGPU preludes Zhi Wang
2026-06-30 13:21   ` Alexandre Courbot [this message]
2026-06-22 19:43 ` [PATCH v2 5/7] gpu: nova-core: build SetRegistry entries dynamically Zhi Wang
2026-06-22 19:43 ` [PATCH v2 6/7] gpu: nova-core: set RMSetSriovMode when vGPU is enabled Zhi Wang
2026-06-22 19:43 ` [PATCH v2 7/7] gpu: nova-core: reserve larger WPR2 heap for vGPU Zhi Wang
2026-06-30 13:26   ` Alexandre Courbot
2026-06-30 13:39   ` Alexandre Courbot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=DJMEYQBBBD7D.3G9I5PBPLAKFD@nvidia.com \
    --to=acourbot@nvidia.com \
    --cc=a.hindborg@kernel.org \
    --cc=airlied@gmail.com \
    --cc=alex.gaynor@gmail.com \
    --cc=aliceryhl@google.com \
    --cc=alkumar@nvidia.com \
    --cc=aniketa@nvidia.com \
    --cc=ankita@nvidia.com \
    --cc=apopple@nvidia.com \
    --cc=bjorn3_gh@protonmail.com \
    --cc=boqun.feng@gmail.com \
    --cc=cjia@nvidia.com \
    --cc=dakr@kernel.org \
    --cc=ecourtney@nvidia.com \
    --cc=gary@garyguo.net \
    --cc=jhubbard@nvidia.com \
    --cc=joelagnelf@nvidia.com \
    --cc=kjaju@nvidia.com \
    --cc=kwankhede@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lossin@kernel.org \
    --cc=nova-gpu@lists.linux.dev \
    --cc=ojeda@kernel.org \
    --cc=simona@ffwll.ch \
    --cc=smitra@nvidia.com \
    --cc=targupta@nvidia.com \
    --cc=tmgross@umich.edu \
    --cc=zhiw@nvidia.com \
    --cc=zhiwang@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox