All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Danilo Krummrich" <dakr@kernel.org>
To: "Tim Kovalenko via B4 Relay"
	<devnull+tim.kovalenko.proton.me@kernel.org>
Cc: tim.kovalenko@proton.me,
	"Alexandre Courbot" <acourbot@nvidia.com>,
	"Alice Ryhl" <aliceryhl@google.com>,
	"David Airlie" <airlied@gmail.com>,
	"Simona Vetter" <simona@ffwll.ch>,
	"Miguel Ojeda" <ojeda@kernel.org>, "Gary Guo" <gary@garyguo.net>,
	"Björn Roy Baron" <bjorn3_gh@protonmail.com>,
	"Benno Lossin" <lossin@kernel.org>,
	"Andreas Hindborg" <a.hindborg@kernel.org>,
	"Trevor Gross" <tmgross@umich.edu>,
	"Boqun Feng" <boqun@kernel.org>,
	"Nathan Chancellor" <nathan@kernel.org>,
	"Nicolas Schier" <nsc@kernel.org>,
	"Abdiel Janulgue" <abdiel.janulgue@gmail.com>,
	"Daniel Almeida" <daniel.almeida@collabora.com>,
	"Robin Murphy" <robin.murphy@arm.com>,
	nouveau@lists.freedesktop.org, dri-devel@lists.freedesktop.org,
	linux-kernel@vger.kernel.org, rust-for-linux@vger.kernel.org,
	linux-kbuild@vger.kernel.org, driver-core@lists.linux.dev
Subject: Re: [PATCH v4 4/4] gpu: nova-core: fix stack overflow in GSP memory allocation
Date: Mon, 09 Mar 2026 20:40:36 +0100	[thread overview]
Message-ID: <DGYI9CQRWXU3.XNDVWJE0DP6H@kernel.org> (raw)
In-Reply-To: <20260309-drm-rust-next-v4-4-4ef485b19a4c@proton.me>

On Mon Mar 9, 2026 at 5:34 PM CET, Tim Kovalenko via B4 Relay wrote:
> From: Tim Kovalenko <tim.kovalenko@proton.me>
>
> The `Cmdq::new` function was allocating a `PteArray` struct on the stack
> and was causing a stack overflow with 8216 bytes.
>
> Modify the `PteArray` to calculate and write the Page Table Entries
> directly into the coherent DMA buffer one-by-one. This reduces the stack
> usage quite a lot.
>

Reported-by: Gary Guo <gary@garyguo.net>
Closes: https://rust-for-linux.zulipchat.com/#narrow/channel/509436-Nova/topic/.60Cmdq.3A.3Anew.60.20uses.20excessive.20stack.20size/near/570375549
Fixes: f38b4f105cfc ("gpu: nova-core: Create initial Gsp")

> Signed-off-by: Tim Kovalenko <tim.kovalenko@proton.me>

A few nits below, but I can fix them up [1] on apply.

> +        for (i, chunk) in pte_region.chunks_exact_mut(size_of::<u64>()).enumerate() {
> +            let pte_value = start_addr
> +                .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT)
> +                .ok_or(EOVERFLOW)?;

This should use PteArray::entry().

It would also be nice to get rid of the unsafe {} and use dma_write!() instead,
but this can be a follow-up patch.

> +
> +            chunk.copy_from_slice(&pte_value.to_ne_bytes());
> +        }
> +
>          Ok(obj)
>      }
>  }
> diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs
> index 0056bfbf0a44cfbc5a0ca08d069f881b877e1edc..c8327d3098f73f9b880eee99038ad10a16e1e32d 100644
> --- a/drivers/gpu/nova-core/gsp/cmdq.rs
> +++ b/drivers/gpu/nova-core/gsp/cmdq.rs
> @@ -202,7 +202,20 @@ fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
>  
>          let gsp_mem =
>              CoherentAllocation::<GspMem>::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?;
> -        dma_write!(gsp_mem, [0]?.ptes, PteArray::new(gsp_mem.dma_handle())?);
> +
> +        const NUM_PTES: usize = GSP_PAGE_SIZE / size_of::<u64>();

We can avoid duplicating this by making it a constant of GspMem.

> +        let start = gsp_mem.dma_handle();
> +        // One by one GSP Page write to the memory to avoid stack overflow when allocating
> +        // the whole array at once.
> +        for i in 0..NUM_PTES {
> +            dma_write!(
> +                gsp_mem,
> +                [0]?.ptes.0[i],
> +                PteArray::<NUM_PTES>::entry(start, i)?
> +            );
> +        }

-- [1] --

diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs
index 20170e483e04..b5ea14b7dad7 100644
--- a/drivers/gpu/nova-core/gsp.rs
+++ b/drivers/gpu/nova-core/gsp.rs
@@ -48,6 +48,7 @@ unsafe impl<const NUM_ENTRIES: usize> AsBytes for PteArray<NUM_ENTRIES> {}
 
 impl<const NUM_PAGES: usize> PteArray<NUM_PAGES> {
     /// Returns the page table entry for `index`, for a mapping starting at `start` DmaAddress.
+    // TODO: Replace with `IoView` projection once available.
     fn entry(start: DmaAddress, index: usize) -> Result<u64> {
         start
             .checked_add(num::usize_as_u64(index) << GSP_PAGE_SHIFT)
@@ -90,12 +91,9 @@ fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
                 .as_slice_mut(size_of::<u64>(), NUM_PAGES * size_of::<u64>())?
         };
 
-        // This is a  one by one GSP Page write to the memory
-        // to avoid stack overflow when allocating the whole array at once.
+        // Write values one by one to avoid an on-stack instance of `PteArray`.
         for (i, chunk) in pte_region.chunks_exact_mut(size_of::<u64>()).enumerate() {
-            let pte_value = start_addr
-                .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT)
-                .ok_or(EOVERFLOW)?;
+            let pte_value = PteArray::<NUM_PAGES>::entry(start_addr, i)?;
 
             chunk.copy_from_slice(&pte_value.to_ne_bytes());
         }
diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs
index 9107a1473797..aa42d180f0d5 100644
--- a/drivers/gpu/nova-core/gsp/cmdq.rs
+++ b/drivers/gpu/nova-core/gsp/cmdq.rs
@@ -159,7 +159,7 @@ struct Msgq {
 #[repr(C)]
 struct GspMem {
     /// Self-mapping page table entries.
-    ptes: PteArray<{ GSP_PAGE_SIZE / size_of::<u64>() }>,
+    ptes: PteArray<{ Self::PTE_ARRAY_SIZE }>,
     /// CPU queue: the driver writes commands here, and the GSP reads them. It also contains the
     /// write and read pointers that the CPU updates.
     ///
@@ -172,6 +172,10 @@ struct GspMem {
     gspq: Msgq,
 }
 
+impl GspMem {
+    const PTE_ARRAY_SIZE: usize = GSP_PAGE_SIZE / size_of::<u64>();
+}
+
 // SAFETY: These structs don't meet the no-padding requirements of AsBytes but
 // that is not a problem because they are not used outside the kernel.
 unsafe impl AsBytes for GspMem {}
@@ -202,16 +206,14 @@ fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
         let gsp_mem =
             CoherentAllocation::<GspMem>::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?;
 
-        const NUM_PTES: usize = GSP_PAGE_SIZE / size_of::<u64>();
-
         let start = gsp_mem.dma_handle();
         // One by one GSP Page write to the memory to avoid stack overflow when allocating
         // the whole array at once.
-        for i in 0..NUM_PTES {
+        for i in 0..GspMem::PTE_ARRAY_SIZE {
             dma_write!(
                 gsp_mem,
                 [0]?.ptes.0[i],
-                PteArray::<NUM_PTES>::entry(start, i)?
+                PteArray::<{ GspMem::PTE_ARRAY_SIZE }>::entry(start, i)?
             );
         }
 


WARNING: multiple messages have this Message-ID (diff)
From: "Danilo Krummrich" <dakr@kernel.org>
To: "Tim Kovalenko via B4 Relay"
	<devnull+tim.kovalenko.proton.me@kernel.org>
Cc: tim.kovalenko@proton.me,
	"Alexandre Courbot" <acourbot@nvidia.com>,
	"Alice Ryhl" <aliceryhl@google.com>,
	"Simona Vetter" <simona@ffwll.ch>,
	"Miguel Ojeda" <ojeda@kernel.org>, "Gary Guo" <gary@garyguo.net>,
	"Björn Roy Baron" <bjorn3_gh@protonmail.com>,
	"Benno Lossin" <lossin@kernel.org>,
	"Andreas Hindborg" <a.hindborg@kernel.org>,
	"Trevor Gross" <tmgross@umich.edu>,
	"Boqun Feng" <boqun@kernel.org>,
	"Nathan Chancellor" <nathan@kernel.org>,
	"Nicolas Schier" <nsc@kernel.org>,
	"Abdiel Janulgue" <abdiel.janulgue@gmail.com>,
	"Daniel Almeida" <daniel.almeida@collabora.com>,
	"Robin Murphy" <robin.murphy@arm.com>,
	nouveau@lists.freedesktop.org, dri-devel@lists.freedesktop.org,
	linux-kernel@vger.kernel.org, rust-for-linux@vger.kernel.org,
	linux-kbuild@vger.kernel.org, driver-core@lists.linux.dev
Subject: Re: [PATCH v4 4/4] gpu: nova-core: fix stack overflow in GSP memory allocation
Date: Mon, 09 Mar 2026 20:40:36 +0100	[thread overview]
Message-ID: <DGYI9CQRWXU3.XNDVWJE0DP6H@kernel.org> (raw)
In-Reply-To: <20260309-drm-rust-next-v4-4-4ef485b19a4c@proton.me>

On Mon Mar 9, 2026 at 5:34 PM CET, Tim Kovalenko via B4 Relay wrote:
> From: Tim Kovalenko <tim.kovalenko@proton.me>
>
> The `Cmdq::new` function was allocating a `PteArray` struct on the stack
> and was causing a stack overflow with 8216 bytes.
>
> Modify the `PteArray` to calculate and write the Page Table Entries
> directly into the coherent DMA buffer one-by-one. This reduces the stack
> usage quite a lot.
>

Reported-by: Gary Guo <gary@garyguo.net>
Closes: https://rust-for-linux.zulipchat.com/#narrow/channel/509436-Nova/topic/.60Cmdq.3A.3Anew.60.20uses.20excessive.20stack.20size/near/570375549
Fixes: f38b4f105cfc ("gpu: nova-core: Create initial Gsp")

> Signed-off-by: Tim Kovalenko <tim.kovalenko@proton.me>

A few nits below, but I can fix them up [1] on apply.

> +        for (i, chunk) in pte_region.chunks_exact_mut(size_of::<u64>()).enumerate() {
> +            let pte_value = start_addr
> +                .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT)
> +                .ok_or(EOVERFLOW)?;

This should use PteArray::entry().

It would also be nice to get rid of the unsafe {} and use dma_write!() instead,
but this can be a follow-up patch.

> +
> +            chunk.copy_from_slice(&pte_value.to_ne_bytes());
> +        }
> +
>          Ok(obj)
>      }
>  }
> diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs
> index 0056bfbf0a44cfbc5a0ca08d069f881b877e1edc..c8327d3098f73f9b880eee99038ad10a16e1e32d 100644
> --- a/drivers/gpu/nova-core/gsp/cmdq.rs
> +++ b/drivers/gpu/nova-core/gsp/cmdq.rs
> @@ -202,7 +202,20 @@ fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
>  
>          let gsp_mem =
>              CoherentAllocation::<GspMem>::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?;
> -        dma_write!(gsp_mem, [0]?.ptes, PteArray::new(gsp_mem.dma_handle())?);
> +
> +        const NUM_PTES: usize = GSP_PAGE_SIZE / size_of::<u64>();

We can avoid duplicating this by making it a constant of GspMem.

> +        let start = gsp_mem.dma_handle();
> +        // One by one GSP Page write to the memory to avoid stack overflow when allocating
> +        // the whole array at once.
> +        for i in 0..NUM_PTES {
> +            dma_write!(
> +                gsp_mem,
> +                [0]?.ptes.0[i],
> +                PteArray::<NUM_PTES>::entry(start, i)?
> +            );
> +        }

-- [1] --

diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs
index 20170e483e04..b5ea14b7dad7 100644
--- a/drivers/gpu/nova-core/gsp.rs
+++ b/drivers/gpu/nova-core/gsp.rs
@@ -48,6 +48,7 @@ unsafe impl<const NUM_ENTRIES: usize> AsBytes for PteArray<NUM_ENTRIES> {}
 
 impl<const NUM_PAGES: usize> PteArray<NUM_PAGES> {
     /// Returns the page table entry for `index`, for a mapping starting at `start` DmaAddress.
+    // TODO: Replace with `IoView` projection once available.
     fn entry(start: DmaAddress, index: usize) -> Result<u64> {
         start
             .checked_add(num::usize_as_u64(index) << GSP_PAGE_SHIFT)
@@ -90,12 +91,9 @@ fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
                 .as_slice_mut(size_of::<u64>(), NUM_PAGES * size_of::<u64>())?
         };
 
-        // This is a  one by one GSP Page write to the memory
-        // to avoid stack overflow when allocating the whole array at once.
+        // Write values one by one to avoid an on-stack instance of `PteArray`.
         for (i, chunk) in pte_region.chunks_exact_mut(size_of::<u64>()).enumerate() {
-            let pte_value = start_addr
-                .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT)
-                .ok_or(EOVERFLOW)?;
+            let pte_value = PteArray::<NUM_PAGES>::entry(start_addr, i)?;
 
             chunk.copy_from_slice(&pte_value.to_ne_bytes());
         }
diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs
index 9107a1473797..aa42d180f0d5 100644
--- a/drivers/gpu/nova-core/gsp/cmdq.rs
+++ b/drivers/gpu/nova-core/gsp/cmdq.rs
@@ -159,7 +159,7 @@ struct Msgq {
 #[repr(C)]
 struct GspMem {
     /// Self-mapping page table entries.
-    ptes: PteArray<{ GSP_PAGE_SIZE / size_of::<u64>() }>,
+    ptes: PteArray<{ Self::PTE_ARRAY_SIZE }>,
     /// CPU queue: the driver writes commands here, and the GSP reads them. It also contains the
     /// write and read pointers that the CPU updates.
     ///
@@ -172,6 +172,10 @@ struct GspMem {
     gspq: Msgq,
 }
 
+impl GspMem {
+    const PTE_ARRAY_SIZE: usize = GSP_PAGE_SIZE / size_of::<u64>();
+}
+
 // SAFETY: These structs don't meet the no-padding requirements of AsBytes but
 // that is not a problem because they are not used outside the kernel.
 unsafe impl AsBytes for GspMem {}
@@ -202,16 +206,14 @@ fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
         let gsp_mem =
             CoherentAllocation::<GspMem>::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?;
 
-        const NUM_PTES: usize = GSP_PAGE_SIZE / size_of::<u64>();
-
         let start = gsp_mem.dma_handle();
         // One by one GSP Page write to the memory to avoid stack overflow when allocating
         // the whole array at once.
-        for i in 0..NUM_PTES {
+        for i in 0..GspMem::PTE_ARRAY_SIZE {
             dma_write!(
                 gsp_mem,
                 [0]?.ptes.0[i],
-                PteArray::<NUM_PTES>::entry(start, i)?
+                PteArray::<{ GspMem::PTE_ARRAY_SIZE }>::entry(start, i)?
             );
         }
 


  reply	other threads:[~2026-03-09 19:40 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-09 16:34 [PATCH v4 0/4] Fixes the stack overflow Tim Kovalenko
2026-03-09 16:34 ` Tim Kovalenko via B4 Relay
2026-03-09 16:34 ` [PATCH v4 1/4] rust: ptr: add `KnownSize` trait to support DST size info extraction Tim Kovalenko
2026-03-09 16:34   ` Tim Kovalenko via B4 Relay
2026-03-09 16:34 ` [PATCH v4 2/4] rust: ptr: add projection infrastructure Tim Kovalenko
2026-03-09 16:34   ` Tim Kovalenko via B4 Relay
2026-03-09 16:34 ` [PATCH v4 3/4] rust: dma: use pointer projection infra for `dma_{read,write}` macro Tim Kovalenko
2026-03-09 16:34   ` Tim Kovalenko via B4 Relay
2026-03-09 16:34 ` [PATCH v4 4/4] gpu: nova-core: fix stack overflow in GSP memory allocation Tim Kovalenko
2026-03-09 16:34   ` Tim Kovalenko via B4 Relay
2026-03-09 19:40   ` Danilo Krummrich [this message]
2026-03-09 19:40     ` Danilo Krummrich
2026-03-09 22:40     ` Miguel Ojeda
2026-03-09 22:40       ` Miguel Ojeda
2026-03-10  1:40   ` Alexandre Courbot
2026-03-10  1:40     ` Alexandre Courbot
2026-03-10  1:51     ` Gary Guo
2026-03-10  1:51       ` Gary Guo
2026-03-10  2:28       ` Alexandre Courbot
2026-03-10  2:28         ` Alexandre Courbot
2026-03-10 11:20         ` Danilo Krummrich
2026-03-10 11:20           ` Danilo Krummrich
2026-03-10 17:40   ` Danilo Krummrich
2026-03-10 17:40     ` Danilo Krummrich
2026-03-09 17:00 ` [PATCH v4 0/4] Fixes the stack overflow Danilo Krummrich
2026-03-09 17:00   ` Danilo Krummrich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=DGYI9CQRWXU3.XNDVWJE0DP6H@kernel.org \
    --to=dakr@kernel.org \
    --cc=a.hindborg@kernel.org \
    --cc=abdiel.janulgue@gmail.com \
    --cc=acourbot@nvidia.com \
    --cc=airlied@gmail.com \
    --cc=aliceryhl@google.com \
    --cc=bjorn3_gh@protonmail.com \
    --cc=boqun@kernel.org \
    --cc=daniel.almeida@collabora.com \
    --cc=devnull+tim.kovalenko.proton.me@kernel.org \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=driver-core@lists.linux.dev \
    --cc=gary@garyguo.net \
    --cc=linux-kbuild@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lossin@kernel.org \
    --cc=nathan@kernel.org \
    --cc=nouveau@lists.freedesktop.org \
    --cc=nsc@kernel.org \
    --cc=ojeda@kernel.org \
    --cc=robin.murphy@arm.com \
    --cc=rust-for-linux@vger.kernel.org \
    --cc=simona@ffwll.ch \
    --cc=tim.kovalenko@proton.me \
    --cc=tmgross@umich.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.