rust-for-linux.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Alexandre Courbot" <acourbot@nvidia.com>
To: "Timur Tabi" <ttabi@nvidia.com>,
	"Danilo Krummrich" <dakr@kernel.org>,
	"Alexandre Courbot" <acourbot@nvidia.com>,
	"Joel Fernandes" <joelagnelf@nvidia.com>,
	"John Hubbard" <jhubbard@nvidia.com>,
	<nouveau@lists.freedesktop.org>, <rust-for-linux@vger.kernel.org>
Subject: Re: [PATCH v4 11/11] gpu: nova-core: add PIO support for loading firmware images
Date: Thu, 18 Dec 2025 21:59:25 +0900	[thread overview]
Message-ID: <DF1D026W0BQ9.1DUWS5LKR09TG@nvidia.com> (raw)
In-Reply-To: <20251218032955.979623-12-ttabi@nvidia.com>

On Thu Dec 18, 2025 at 12:29 PM JST, Timur Tabi wrote:
<snip>
> +    fn pio_wr<F: FalconFirmware<Target = E>>(
> +        &self,
> +        bar: &Bar0,
> +        fw: &F,
> +        target_mem: FalconMem,
> +        load_offsets: &FalconLoadTarget,
> +        port: u8,
> +        tag: u16,
> +    ) -> Result {
> +        let start = usize::from_safe_cast(load_offsets.src_start);
> +        let len = usize::from_safe_cast(load_offsets.len);
> +        let mem_base = u16::try_from(load_offsets.dst_start)?;
> +
> +        // SAFETY: as_slice() ensures that start+len is within range

That's not the safety concern - check the documentation for `as_slice`.
We need to ensure that there won't be any concurrent access to the DMA
object. Since we are the only user of it at this stage, that's a
guarantee we can indeed provide.

> +        let data = unsafe { fw.as_slice(start, len).map_err(|_| EINVAL)? };
> +
> +        self.pio_wr_bytes(bar, data, mem_base, target_mem, port, tag)
> +    }
> +
> +    /// Perform a PIO copy into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it.
> +    pub(crate) fn pio_load<F: FalconFirmware<Target = E>>(
> +        &self,
> +        bar: &Bar0,
> +        fw: &F,
> +        gbl: Option<&GenericBootloader>,
> +    ) -> Result {
> +        let imem_sec = fw.imem_sec_load_params();
> +        let imem_ns = fw.imem_ns_load_params().ok_or(EINVAL)?;
> +        let dmem = fw.dmem_load_params();
> +
> +        regs::NV_PFALCON_FBIF_CTL::read(bar, &E::ID)
> +            .set_allow_phys_no_ctx(true)
> +            .write(bar, &E::ID);
> +
> +        regs::NV_PFALCON_FALCON_DMACTL::default().write(bar, &E::ID);
> +
> +        // If the Generic Bootloader was passed, then use it to boot FRTS
> +        if let Some(gbl) = gbl {
> +            let dst_start = u16::try_from(0x10000 - gbl.desc.code_size)?;
> +            let data = &gbl.ucode[..usize::from_safe_cast(gbl.desc.code_size)];
> +            let tag = u16::try_from(gbl.desc.start_tag)?;
> +
> +            self.pio_wr_bytes(bar, data, dst_start, FalconMem::ImemNonSecure, 0, tag)?;
> +
> +            // This structure tells the generic bootloader where to find the FWSEC
> +            // image.
> +            let dmem_desc = BootloaderDmemDescV2 {
> +                reserved: [0; 4],
> +                signature: [0; 4],
> +                ctx_dma: 4, // FALCON_DMAIDX_PHYS_SYS_NCOH
> +                code_dma_base: fw.dma_handle(),
> +                non_sec_code_off: imem_ns.dst_start,
> +                non_sec_code_size: imem_ns.len,
> +                sec_code_off: imem_sec.dst_start,
> +                sec_code_size: imem_sec.len,
> +                code_entry_point: 0,
> +                data_dma_base: fw.dma_handle() + u64::from(dmem.src_start),
> +                data_size: dmem.len,
> +                argc: 0,
> +                argv: 0,
> +            };
> +
> +            regs::NV_PFALCON_FBIF_TRANSCFG::update(bar, &E::ID, 4, |v| {
> +                v.set_target(FalconFbifTarget::CoherentSysmem)
> +                    .set_mem_type(FalconFbifMemType::Physical)
> +            });
> +
> +            self.pio_wr_bytes(bar, dmem_desc.as_bytes(), 0, FalconMem::Dmem, 0, 0)?;

So this `if` branch is really special-casing the generic bootloader. But
at the end of the day it just does these things:

- Write an `ImemNonSecure` section,
- Write an `Dmem` section,
- Program the `TRANSCFG` register so the bootloader can initiate the DMA
  transfer.

The first two steps can be expressed as a set of `FalconLoadTarget`s.
That way they can be handled by the non-generic-bootloader path, and we
can remove the `gbl` argument.

So `FwsecFirmware` could have an optional member that contains both the
generic bootloader and the `BootloaderDmemDescV2` corresponding to it.
If that optional member is `Some`, then it returns the `FalconLoadTarget`s
corresponding to the generic bootloader. Otherwise, it behaves as
before.

Interestingly there is no `ImemSecure` section to write so I guess we
will have to make `imem_sec_load_params` return an `Option` as well.

And `NV_PFALCON_FBIF_TRANSCFG` is always programmed as the worst thing
that can happen is that we don't use the DMA engine if there is no
generic bootloader.

> +        } else {
> +            self.pio_wr(
> +                bar,
> +                fw,
> +                FalconMem::ImemNonSecure,
> +                &imem_ns,
> +                0,
> +                u16::try_from(imem_ns.dst_start >> 8)?,
> +            )?;
> +            self.pio_wr(
> +                bar,
> +                fw,
> +                FalconMem::ImemSecure,
> +                &imem_sec,
> +                0,
> +                u16::try_from(imem_sec.dst_start >> 8)?,
> +            )?;
> +            self.pio_wr(bar, fw, FalconMem::Dmem, &dmem, 0, 0)?;
> +        }
> +
> +        self.hal.program_brom(self, bar, &fw.brom_params())?;
> +
> +        // Set `BootVec` to start of non-secure code.
> +        regs::NV_PFALCON_FALCON_BOOTVEC::default()
> +            .set_value(fw.boot_addr())
> +            .write(bar, &E::ID);
> +
> +        Ok(())
> +    }
> +
>      /// Perform a DMA write according to `load_offsets` from `dma_handle` into the falcon's
>      /// `target_mem`.
>      ///
> diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs
> index 44897feb82a4..26efbf4f6760 100644
> --- a/drivers/gpu/nova-core/firmware.rs
> +++ b/drivers/gpu/nova-core/firmware.rs
> @@ -31,7 +31,7 @@
>  pub(crate) const FIRMWARE_VERSION: &str = "570.144";
>  
>  /// Requests the GPU firmware `name` suitable for `chipset`, with version `ver`.
> -fn request_firmware(
> +pub(crate) fn request_firmware(

There is no need to change the visibility of this function.

>      dev: &device::Device,
>      chipset: gpu::Chipset,
>      name: &str,
> @@ -321,7 +321,7 @@ fn no_patch_signature(self) -> FirmwareDmaObject<F, Signed> {
>  /// Header common to most firmware files.
>  #[repr(C)]
>  #[derive(Debug, Clone)]
> -struct BinHdr {
> +pub(crate) struct BinHdr {

Same for this type.

>      /// Magic number, must be `0x10de`.
>      bin_magic: u32,
>      /// Version of the header.
> diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs
> index 1c1dcdacf5f5..4c26257bbfe0 100644
> --- a/drivers/gpu/nova-core/firmware/fwsec.rs
> +++ b/drivers/gpu/nova-core/firmware/fwsec.rs
> @@ -40,12 +40,15 @@
>          FalconLoadTarget, //
>      },
>      firmware::{
> +        BinHdr,
>          FalconUCodeDesc,
>          FirmwareDmaObject,
>          FirmwareSignature,
>          Signed,
>          Unsigned, //
> +        FIRMWARE_VERSION,
>      },
> +    gpu::Chipset,
>      num::{
>          FromSafeCast,
>          IntoSafeCast, //
> @@ -213,6 +216,72 @@ unsafe fn transmute_mut<T: Sized + FromBytes + AsBytes>(
>      T::from_bytes_mut(unsafe { fw.as_slice_mut(offset, size_of::<T>())? }).ok_or(EINVAL)
>  }
>  
> +/// Descriptor used by RM to figure out the requirements of the boot loader.
> +#[repr(C)]
> +#[derive(Debug, Clone)]
> +pub(crate) struct BootloaderDesc {
> +    /// Starting tag of bootloader.
> +    pub start_tag: u32,
> +    /// DMEM offset where [`BootloaderDmemDescV2`] is to be loaded.
> +    pub dmem_load_off: u32,
> +    /// Offset of code section in the image.
> +    pub code_off: u32,
> +    /// Size of code section in the image.
> +    pub code_size: u32,
> +    /// Offset of data section in the image.
> +    pub data_off: u32,
> +    /// Size of data section in the image.
> +    pub data_size: u32,
> +}
> +// SAFETY: any byte sequence is valid for this struct.
> +unsafe impl FromBytes for BootloaderDesc {}
> +// SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability.
> +unsafe impl AsBytes for BootloaderDesc {}

We only need to implement `FromBytes` for this type, `AsBytes` is not
needed.

> +
> +/// Structure used by the boot-loader to load the rest of the code.
> +///
> +/// This has to be filled by the GPU driver and copied into DMEM at offset
> +/// [`BootloaderDesc.dmem_load_off`].
> +#[repr(C, packed)]
> +#[derive(Debug, Clone)]
> +pub(crate) struct BootloaderDmemDescV2 {
> +    /// Reserved, should always be first element.
> +    pub reserved: [u32; 4],
> +    /// 16B signature for secure code, 0s if no secure code.
> +    pub signature: [u32; 4],
> +    /// DMA context used by the bootloader while loading code/data.
> +    pub ctx_dma: u32,
> +    /// 256B-aligned physical FB address where code is located.
> +    pub code_dma_base: u64,
> +    /// Offset from `code_dma_base` where the non-secure code is located (must be multiple of 256).
> +    pub non_sec_code_off: u32,
> +    /// Size of the non-secure code part.
> +    pub non_sec_code_size: u32,
> +    /// Offset from `code_dma_base` where the secure code is located (must be multiple of 256).
> +    pub sec_code_off: u32,
> +    /// Size of the secure code part.
> +    pub sec_code_size: u32,
> +    /// Code entry point invoked by the bootloader after code is loaded.
> +    pub code_entry_point: u32,
> +    /// 256B-aligned physical FB address where data is located.
> +    pub data_dma_base: u64,
> +    /// Size of data block (should be multiple of 256B).
> +    pub data_size: u32,
> +    /// Arguments to be passed to the target firmware being loaded.
> +    pub argc: u32,
> +    /// Number of arguments to be passed to the target firmware being loaded.
> +    pub argv: u32,
> +}
> +// SAFETY: any byte sequence is valid for this struct.
> +unsafe impl FromBytes for BootloaderDmemDescV2 {}
> +// SAFETY: This struct doesn't contain uninitialized bytes and doesn't have interior mutability.
> +unsafe impl AsBytes for BootloaderDmemDescV2 {}

Here we can do without `FromBytes`.

> +
> +pub(crate) struct GenericBootloader {
> +    pub desc: BootloaderDesc,
> +    pub ucode: Vec<u8, kernel::alloc::allocator::Kmalloc>,
> +}
> +
>  /// The FWSEC microcode, extracted from the BIOS and to be run on the GSP falcon.
>  ///
>  /// It is responsible for e.g. carving out the WPR2 region as the first step of the GSP bootflow.
> @@ -221,6 +290,8 @@ pub(crate) struct FwsecFirmware {
>      desc: FalconUCodeDesc,
>      /// GPU-accessible DMA object containing the firmware.
>      ucode: FirmwareDmaObject<Self, Signed>,
> +    /// Generic bootloader
> +    gen_bootloader: Option<GenericBootloader>,
>  }
>  
>  impl FalconLoadParams for FwsecFirmware {
> @@ -275,7 +346,19 @@ fn brom_params(&self) -> FalconBromParams {
>      }
>  
>      fn boot_addr(&self) -> u32 {
> -        0
> +        match &self.desc {
> +            FalconUCodeDesc::V2(_v2) => {
> +                // On V2 platforms, the boot address is extracted from the
> +                // generic bootloader, because the gbl is what actually copies
> +                // FWSEC into memory, so that is what needs to be booted.
> +                if let Some(ref gbl) = self.gen_bootloader {
> +                    gbl.desc.start_tag << 8
> +                } else {
> +                    0
> +                }
> +            }
> +            FalconUCodeDesc::V3(_v3) => 0,
> +        }
>      }
>  }
>  
> @@ -376,6 +459,7 @@ impl FwsecFirmware {
>      /// command.
>      pub(crate) fn new(
>          dev: &Device<device::Bound>,
> +        chipset: Chipset,
>          falcon: &Falcon<Gsp>,
>          bar: &Bar0,
>          bios: &Vbios,
> @@ -432,9 +516,49 @@ pub(crate) fn new(
>              ucode_dma.no_patch_signature()
>          };
>  
> +        // The Generic Bootloader exists only on Turing and GA100.  To avoid a bogus
> +        // console error message on other platforms, only try to load it if it's
> +        // supposed to be there.
> +        let gbl_fw = if chipset < Chipset::GA102 {
> +            super::request_firmware(dev, chipset, "gen_bootloader", FIRMWARE_VERSION)
> +        } else {
> +            Err(ENOENT)
> +        };

Using `Err` to indicate no firmware means that we will proceed even if
`request_firmware` returns an error. This should be:

    let gbl_fw = if chipset < Chipset::GA102 {
        Some(super::request_firmware(dev, chipset, "gen_bootloader", FIRMWARE_VERSION)?)
    } else {
        None
    };

> +
> +        let gbl = match gbl_fw {
> +            Ok(fw) => {
> +                let hdr = fw
> +                    .data()
> +                    .get(0..size_of::<BinHdr>())
> +                    .and_then(BinHdr::from_bytes_copy)
> +                    .ok_or(EINVAL)?;
> +
> +                let desc_offset = usize::from_safe_cast(hdr.header_offset);
> +                let desc = fw
> +                    .data()
> +                    .get(desc_offset..desc_offset + size_of::<BootloaderDesc>())
> +                    .and_then(BootloaderDesc::from_bytes_copy)
> +                    .ok_or(EINVAL)?;
> +
> +                let ucode_start = usize::from_safe_cast(hdr.data_offset);
> +                let ucode_size = usize::from_safe_cast(hdr.data_size);
> +                let ucode_data = fw
> +                    .data()
> +                    .get(ucode_start..ucode_start + ucode_size)
> +                    .ok_or(EINVAL)?;
> +
> +                let mut ucode = KVec::new();
> +                ucode.extend_from_slice(ucode_data, GFP_KERNEL)?;
> +
> +                Some(GenericBootloader { desc, ucode })
> +            }
> +            Err(_) => None,
> +        };
> +

Actually, let's put that code into a new `GenBootloader` type. You can
follow the example of `BooterFirmware`, which is quite similar (only a
bit more complex).


  reply	other threads:[~2025-12-18 12:59 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-18  3:29 [PATCH v4 00/11] gpu: nova-core: add Turing support Timur Tabi
2025-12-18  3:29 ` [PATCH v4 01/11] gpu: nova-core: rename Imem to ImemSecure Timur Tabi
2025-12-31 19:29   ` John Hubbard
2025-12-18  3:29 ` [PATCH v4 02/11] gpu: nova-core: add ImemNonSecure section infrastructure Timur Tabi
2025-12-31 19:35   ` John Hubbard
2025-12-18  3:29 ` [PATCH v4 03/11] gpu: nova-core: support header parsing on Turing/GA100 Timur Tabi
2025-12-31 19:58   ` John Hubbard
2025-12-18  3:29 ` [PATCH v4 04/11] gpu: nova-core: add support for Turing/GA100 fwsignature Timur Tabi
2025-12-31 19:28   ` John Hubbard
2025-12-18  3:29 ` [PATCH v4 05/11] gpu: nova-core: add NV_PFALCON_FALCON_DMATRFCMD::with_falcon_mem() Timur Tabi
2025-12-18 11:24   ` Alexandre Courbot
2025-12-31 21:35   ` John Hubbard
2025-12-18  3:29 ` [PATCH v4 06/11] gpu: nova-core: move some functions into the HAL Timur Tabi
2025-12-18 11:25   ` Alexandre Courbot
2025-12-31 22:07   ` John Hubbard
2025-12-18  3:29 ` [PATCH v4 07/11] gpu: nova-core: Add basic Turing HAL Timur Tabi
2025-12-31 22:54   ` John Hubbard
2025-12-18  3:29 ` [PATCH v4 08/11] gpu: nova-core: add Falcon HAL method supports_dma() Timur Tabi
2025-12-18  7:48   ` Alexandre Courbot
2025-12-19 12:49     ` Alexandre Courbot
2025-12-18  3:29 ` [PATCH v4 09/11] gpu: nova-core: add FalconUCodeDescV2 support Timur Tabi
2025-12-18  8:01   ` Alexandre Courbot
2025-12-18  3:29 ` [PATCH v4 10/11] gpu: nova-core: align LibosMemoryRegionInitArgument size to page size Timur Tabi
2025-12-18 11:54   ` Alexandre Courbot
2025-12-18 22:51     ` Timur Tabi
2025-12-19  3:43       ` Alexandre Courbot
2025-12-19  4:34         ` Timur Tabi
2025-12-19  5:55           ` Alexandre Courbot
2025-12-18  3:29 ` [PATCH v4 11/11] gpu: nova-core: add PIO support for loading firmware images Timur Tabi
2025-12-18 12:59   ` Alexandre Courbot [this message]
2025-12-30 22:56     ` Timur Tabi
2025-12-31 23:22       ` Timur Tabi
2025-12-31 21:30     ` Timur Tabi
2025-12-28 17:45 ` [PATCH v4 00/11] gpu: nova-core: add Turing support Ewan Chorynski
2025-12-30 21:42   ` Timur Tabi
2026-01-04 10:21     ` Ewan Chorynski
2026-01-04 15:01       ` Timur Tabi
2025-12-31  2:58 ` John Hubbard
2025-12-31  4:26   ` Timur Tabi
2025-12-31  6:17     ` John Hubbard
2025-12-31 16:33       ` Timur Tabi
2025-12-31 17:29         ` John Hubbard
2025-12-31 19:15           ` John Hubbard
2025-12-31 19:23             ` Timur Tabi
2025-12-31 20:06 ` John Hubbard
2025-12-31 20:11   ` Timur Tabi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=DF1D026W0BQ9.1DUWS5LKR09TG@nvidia.com \
    --to=acourbot@nvidia.com \
    --cc=dakr@kernel.org \
    --cc=jhubbard@nvidia.com \
    --cc=joelagnelf@nvidia.com \
    --cc=nouveau@lists.freedesktop.org \
    --cc=rust-for-linux@vger.kernel.org \
    --cc=ttabi@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).