From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 3A3213451CC; Mon, 9 Mar 2026 19:40:43 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal:i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1773085243; cv=none; b=QDQaohMYB9vt71bK6fG9gkeomlbWC3bOtDDmWv0olfkMqxTAR8D9czeM+FylphJEBWMqzwVzu2yG2VFK9T54RrWk19uFg4ppDpfSH8sckYGFcqA/rIxWRiQdc+fdzM2CUgfxmgbGG0rxUJjZbuZ7RfiOuuCuPwCvmlXuuish8DQ= ARC-Message-Signature:i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1773085243; c=relaxed/simple; bh=ykEDyFWqy0dWXj89fHrjAyafwdENidwWPunnvZCcBsE=; h=Mime-Version:Content-Type:Date:Message-Id:To:From:Subject:Cc: References:In-Reply-To; b=JlkGHeu6ar/LXcmevgmrm5Rv7ek1o91ZS4K4HDdrVD4J7ESpee50FGqmvsdiLIWhYrGwPQLknvppSV8FdMblsczt1amFmNlZg5v55a/QdYHnFy5fj70wMU52S+6o5VusDweMZDP0nv7/M8/hqoc4l3LWjWmFfAG4Fxu0RFfw+vY= ARC-Authentication-Results:i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=rd3ms1Qk; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="rd3ms1Qk" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 9822BC4CEF7; Mon, 9 Mar 2026 19:40:38 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1773085242; bh=ykEDyFWqy0dWXj89fHrjAyafwdENidwWPunnvZCcBsE=; h=Date:To:From:Subject:Cc:References:In-Reply-To:From; b=rd3ms1QkWISKBK/kda8ZvCMUc8dUM6BDjqWnDSwTxnHEVTaJxsnj62YNmUcrNsZUq rHhBNMVYVbSmOttFJlipiFCze2aX20rShfa5uF+O9peJ1GHMIKJsLMHuKfTPXRk4Eo 3l4lrQ0ld6HuU8dspVmvmzIGmm14LjkHh5gdY8srnZzNwA+MUXLAlBgTv+IjKJLRo0 8efqtIGgwtJCi2MmWvchNg7hYj1Xao8rHPKiawOJw4WOtKMMptErwMy2V5xq9nfzk6 Taf1FbV6N2ZnYNldwgYDZaIcdg6GduReD+VDwF+cF16DdBWOT/+q8FHnmM5VfpZvMQ qvNg5IvGuvBdg== Precedence: bulk X-Mailing-List: driver-core@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: Mime-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset=UTF-8 Date: Mon, 09 Mar 2026 20:40:36 +0100 Message-Id: To: "Tim Kovalenko via B4 Relay" From: "Danilo Krummrich" Subject: Re: [PATCH v4 4/4] gpu: nova-core: fix stack overflow in GSP memory allocation Cc: , "Alexandre Courbot" , "Alice Ryhl" , "David Airlie" , "Simona Vetter" , "Miguel Ojeda" , "Gary Guo" , =?utf-8?q?Bj=C3=B6rn_Roy_Baron?= , "Benno Lossin" , "Andreas Hindborg" , "Trevor Gross" , "Boqun Feng" , "Nathan Chancellor" , "Nicolas Schier" , "Abdiel Janulgue" , "Daniel Almeida" , "Robin Murphy" , , , , , , References: <20260309-drm-rust-next-v4-0-4ef485b19a4c@proton.me> <20260309-drm-rust-next-v4-4-4ef485b19a4c@proton.me> In-Reply-To: <20260309-drm-rust-next-v4-4-4ef485b19a4c@proton.me> On Mon Mar 9, 2026 at 5:34 PM CET, Tim Kovalenko via B4 Relay wrote: > From: Tim Kovalenko > > The `Cmdq::new` function was allocating a `PteArray` struct on the stack > and was causing a stack overflow with 8216 bytes. > > Modify the `PteArray` to calculate and write the Page Table Entries > directly into the coherent DMA buffer one-by-one. This reduces the stack > usage quite a lot. > Reported-by: Gary Guo Closes: https://rust-for-linux.zulipchat.com/#narrow/channel/509436-Nova/to= pic/.60Cmdq.3A.3Anew.60.20uses.20excessive.20stack.20size/near/570375549 Fixes: f38b4f105cfc ("gpu: nova-core: Create initial Gsp") > Signed-off-by: Tim Kovalenko A few nits below, but I can fix them up [1] on apply. > + for (i, chunk) in pte_region.chunks_exact_mut(size_of::()).= enumerate() { > + let pte_value =3D start_addr > + .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT) > + .ok_or(EOVERFLOW)?; This should use PteArray::entry(). It would also be nice to get rid of the unsafe {} and use dma_write!() inst= ead, but this can be a follow-up patch. > + > + chunk.copy_from_slice(&pte_value.to_ne_bytes()); > + } > + > Ok(obj) > } > } > diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gs= p/cmdq.rs > index 0056bfbf0a44cfbc5a0ca08d069f881b877e1edc..c8327d3098f73f9b880eee990= 38ad10a16e1e32d 100644 > --- a/drivers/gpu/nova-core/gsp/cmdq.rs > +++ b/drivers/gpu/nova-core/gsp/cmdq.rs > @@ -202,7 +202,20 @@ fn new(dev: &device::Device) -> Resul= t { > =20 > let gsp_mem =3D > CoherentAllocation::::alloc_coherent(dev, 1, GFP_KER= NEL | __GFP_ZERO)?; > - dma_write!(gsp_mem, [0]?.ptes, PteArray::new(gsp_mem.dma_handle(= ))?); > + > + const NUM_PTES: usize =3D GSP_PAGE_SIZE / size_of::(); We can avoid duplicating this by making it a constant of GspMem. > + let start =3D gsp_mem.dma_handle(); > + // One by one GSP Page write to the memory to avoid stack overfl= ow when allocating > + // the whole array at once. > + for i in 0..NUM_PTES { > + dma_write!( > + gsp_mem, > + [0]?.ptes.0[i], > + PteArray::::entry(start, i)? > + ); > + } -- [1] -- diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs index 20170e483e04..b5ea14b7dad7 100644 --- a/drivers/gpu/nova-core/gsp.rs +++ b/drivers/gpu/nova-core/gsp.rs @@ -48,6 +48,7 @@ unsafe impl AsBytes for PteArra= y {} =20 impl PteArray { /// Returns the page table entry for `index`, for a mapping starting a= t `start` DmaAddress. + // TODO: Replace with `IoView` projection once available. fn entry(start: DmaAddress, index: usize) -> Result { start .checked_add(num::usize_as_u64(index) << GSP_PAGE_SHIFT) @@ -90,12 +91,9 @@ fn new(dev: &device::Device) -> Result { .as_slice_mut(size_of::(), NUM_PAGES * size_of::= ())? }; =20 - // This is a one by one GSP Page write to the memory - // to avoid stack overflow when allocating the whole array at once= . + // Write values one by one to avoid an on-stack instance of `PteAr= ray`. for (i, chunk) in pte_region.chunks_exact_mut(size_of::()).en= umerate() { - let pte_value =3D start_addr - .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT) - .ok_or(EOVERFLOW)?; + let pte_value =3D PteArray::::entry(start_addr, i)?= ; =20 chunk.copy_from_slice(&pte_value.to_ne_bytes()); } diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/= cmdq.rs index 9107a1473797..aa42d180f0d5 100644 --- a/drivers/gpu/nova-core/gsp/cmdq.rs +++ b/drivers/gpu/nova-core/gsp/cmdq.rs @@ -159,7 +159,7 @@ struct Msgq { #[repr(C)] struct GspMem { /// Self-mapping page table entries. - ptes: PteArray<{ GSP_PAGE_SIZE / size_of::() }>, + ptes: PteArray<{ Self::PTE_ARRAY_SIZE }>, /// CPU queue: the driver writes commands here, and the GSP reads them= . It also contains the /// write and read pointers that the CPU updates. /// @@ -172,6 +172,10 @@ struct GspMem { gspq: Msgq, } =20 +impl GspMem { + const PTE_ARRAY_SIZE: usize =3D GSP_PAGE_SIZE / size_of::(); +} + // SAFETY: These structs don't meet the no-padding requirements of AsBytes= but // that is not a problem because they are not used outside the kernel. unsafe impl AsBytes for GspMem {} @@ -202,16 +206,14 @@ fn new(dev: &device::Device) -> Result= { let gsp_mem =3D CoherentAllocation::::alloc_coherent(dev, 1, GFP_KERNE= L | __GFP_ZERO)?; =20 - const NUM_PTES: usize =3D GSP_PAGE_SIZE / size_of::(); - let start =3D gsp_mem.dma_handle(); // One by one GSP Page write to the memory to avoid stack overflow= when allocating // the whole array at once. - for i in 0..NUM_PTES { + for i in 0..GspMem::PTE_ARRAY_SIZE { dma_write!( gsp_mem, [0]?.ptes.0[i], - PteArray::::entry(start, i)? + PteArray::<{ GspMem::PTE_ARRAY_SIZE }>::entry(start, i)? ); } =20 From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.lore.kernel.org (Postfix) with ESMTPS id A5781FCA17F for ; Mon, 9 Mar 2026 19:40:48 +0000 (UTC) Received: from kara.freedesktop.org (unknown [131.252.210.166]) by gabe.freedesktop.org (Postfix) with ESMTPS id DB00D10E5C5; Mon, 9 Mar 2026 19:40:46 +0000 (UTC) Authentication-Results: gabe.freedesktop.org; dkim=fail reason="signature verification failed" (2048-bit key; unprotected) header.d=kernel.org header.i=@kernel.org header.b="rd3ms1Qk"; dkim-atps=neutral Received: from kara.freedesktop.org (localhost [127.0.0.1]) by kara.freedesktop.org (Postfix) with ESMTP id 0B97844F7F; Mon, 9 Mar 2026 19:30:15 +0000 (UTC) ARC-Seal: i=1; cv=none; a=rsa-sha256; d=lists.freedesktop.org; s=20240201; t=1773084614; b=M21CYX0H/VDvM41DXvbtlQ1F9pH4eGRWyliBPhOAdY1hKVAWR9EGLPWu5/6w8Lfx0DsDe exvkm1RdhfYafkrMTBfoB1sLisH1sIc/siA+q3Wt1P9IL1ppBHlq47PGLmjw+qrYvHf2YSu trqdREGqghLNfHC1I1vNAA4GZ8ihv7Lk1tMBd+nViz1xESgROwROoySQ9j1izXBJJ7tcrVe tJTWFAvRg4InfdmqoPgwbQdkkKUV5hmFfmImSPAgamXA4hUpTrrSZq3HpGi7XyuQEPqCb35 hnOlxYhtv2qNirWl+rehFxMXohTHREJISpPYmJrtSNnnhso75H0B3abwZysQ== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=lists.freedesktop.org; s=20240201; t=1773084614; h=from : sender : reply-to : subject : date : message-id : to : cc : mime-version : content-type : content-transfer-encoding : content-id : content-description : resent-date : resent-from : resent-sender : resent-to : resent-cc : resent-message-id : in-reply-to : references : list-id : list-help : list-unsubscribe : list-subscribe : list-post : list-owner : list-archive; bh=k6SpxggCoJNJmj3FCG9LFGlfDgc9vWsA6uqiW/1k1xc=; b=Q1fGTZ7esab2eR6RR5vZpzag90RDEwEV7hRlm8TPp1I9ay7nN5q39N8ca7h7IGUO1b92k p2KD4tslSjjGWE80wvWbwQY1bZCTBLy+sAfhJ4lDIpRq+/dLNlOfYIv112Rn/wzq+XS8FOP 4b2/4ErjfBAdc1eB8KujOTWZDre6IqtMxO/kxb9bBzsA0Q+Ih+eV5tJsMol19Gjt8DbjHMY 1sGWWsgxwj0DIPmLM8MMu1KvpVK60uV21wgMwZtrDhUjZYWZTo7Fc125Cvejr9ZG509yE/E jgcFJvmcsIAEAlpIkVr2D7pRbrAk9VSiDpLdlARU4+kPc+bLaL+xE5i+FzzQ== ARC-Authentication-Results: i=1; mail.freedesktop.org; dkim=pass header.d=kernel.org; arc=none (Message is not ARC signed); dmarc=pass (Used From Domain Record) header.from=kernel.org policy.dmarc=quarantine Authentication-Results: mail.freedesktop.org; dkim=pass header.d=kernel.org; arc=none (Message is not ARC signed); dmarc=pass (Used From Domain Record) header.from=kernel.org policy.dmarc=quarantine Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) by kara.freedesktop.org (Postfix) with ESMTPS id 746C8404AB for ; Mon, 9 Mar 2026 19:30:12 +0000 (UTC) Received: from tor.source.kernel.org (tor.source.kernel.org [172.105.4.254]) by gabe.freedesktop.org (Postfix) with ESMTPS id 0252110E5BE; Mon, 9 Mar 2026 19:40:43 +0000 (UTC) Received: from smtp.kernel.org (transwarp.subspace.kernel.org [100.75.92.58]) by tor.source.kernel.org (Postfix) with ESMTP id 40523600AE; Mon, 9 Mar 2026 19:40:43 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 9822BC4CEF7; Mon, 9 Mar 2026 19:40:38 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1773085242; bh=ykEDyFWqy0dWXj89fHrjAyafwdENidwWPunnvZCcBsE=; h=Date:To:From:Subject:Cc:References:In-Reply-To:From; b=rd3ms1QkWISKBK/kda8ZvCMUc8dUM6BDjqWnDSwTxnHEVTaJxsnj62YNmUcrNsZUq rHhBNMVYVbSmOttFJlipiFCze2aX20rShfa5uF+O9peJ1GHMIKJsLMHuKfTPXRk4Eo 3l4lrQ0ld6HuU8dspVmvmzIGmm14LjkHh5gdY8srnZzNwA+MUXLAlBgTv+IjKJLRo0 8efqtIGgwtJCi2MmWvchNg7hYj1Xao8rHPKiawOJw4WOtKMMptErwMy2V5xq9nfzk6 Taf1FbV6N2ZnYNldwgYDZaIcdg6GduReD+VDwF+cF16DdBWOT/+q8FHnmM5VfpZvMQ qvNg5IvGuvBdg== Mime-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset=UTF-8 Date: Mon, 09 Mar 2026 20:40:36 +0100 Message-Id: To: "Tim Kovalenko via B4 Relay" From: "Danilo Krummrich" Subject: Re: [PATCH v4 4/4] gpu: nova-core: fix stack overflow in GSP memory allocation References: <20260309-drm-rust-next-v4-0-4ef485b19a4c@proton.me> <20260309-drm-rust-next-v4-4-4ef485b19a4c@proton.me> In-Reply-To: <20260309-drm-rust-next-v4-4-4ef485b19a4c@proton.me> Message-ID-Hash: QXXX3D65VABHJAG2O3BP2HU2N3JNW5XH X-Message-ID-Hash: QXXX3D65VABHJAG2O3BP2HU2N3JNW5XH X-MailFrom: dakr@kernel.org X-Mailman-Rule-Hits: nonmember-moderation X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation CC: tim.kovalenko@proton.me, Alexandre Courbot , Alice Ryhl , Simona Vetter , Miguel Ojeda , Gary Guo , =?utf-8?q?Bj=C3=B6rn_Roy_Baron?= , Benno Lossin , Andreas Hindborg , Trevor Gross , Boqun Feng , Nathan Chancellor , Nicolas Schier , Abdiel Janulgue , Daniel Almeida , Robin Murphy , nouveau@lists.freedesktop.org, dri-devel@lists.freedesktop.org, linux-kernel@vger.kernel.org, rust-for-linux@vger.kernel.org, linux-kbuild@vger.kernel.org, driver-core@lists.linux.dev X-Mailman-Version: 3.3.8 Precedence: list List-Id: Nouveau development list Archived-At: Archived-At: List-Archive: List-Archive: List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: On Mon Mar 9, 2026 at 5:34 PM CET, Tim Kovalenko via B4 Relay wrote: > From: Tim Kovalenko > > The `Cmdq::new` function was allocating a `PteArray` struct on the stack > and was causing a stack overflow with 8216 bytes. > > Modify the `PteArray` to calculate and write the Page Table Entries > directly into the coherent DMA buffer one-by-one. This reduces the stack > usage quite a lot. > Reported-by: Gary Guo Closes: https://rust-for-linux.zulipchat.com/#narrow/channel/509436-Nova/to= pic/.60Cmdq.3A.3Anew.60.20uses.20excessive.20stack.20size/near/570375549 Fixes: f38b4f105cfc ("gpu: nova-core: Create initial Gsp") > Signed-off-by: Tim Kovalenko A few nits below, but I can fix them up [1] on apply. > + for (i, chunk) in pte_region.chunks_exact_mut(size_of::()).= enumerate() { > + let pte_value =3D start_addr > + .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT) > + .ok_or(EOVERFLOW)?; This should use PteArray::entry(). It would also be nice to get rid of the unsafe {} and use dma_write!() inst= ead, but this can be a follow-up patch. > + > + chunk.copy_from_slice(&pte_value.to_ne_bytes()); > + } > + > Ok(obj) > } > } > diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gs= p/cmdq.rs > index 0056bfbf0a44cfbc5a0ca08d069f881b877e1edc..c8327d3098f73f9b880eee990= 38ad10a16e1e32d 100644 > --- a/drivers/gpu/nova-core/gsp/cmdq.rs > +++ b/drivers/gpu/nova-core/gsp/cmdq.rs > @@ -202,7 +202,20 @@ fn new(dev: &device::Device) -> Resul= t { > =20 > let gsp_mem =3D > CoherentAllocation::::alloc_coherent(dev, 1, GFP_KER= NEL | __GFP_ZERO)?; > - dma_write!(gsp_mem, [0]?.ptes, PteArray::new(gsp_mem.dma_handle(= ))?); > + > + const NUM_PTES: usize =3D GSP_PAGE_SIZE / size_of::(); We can avoid duplicating this by making it a constant of GspMem. > + let start =3D gsp_mem.dma_handle(); > + // One by one GSP Page write to the memory to avoid stack overfl= ow when allocating > + // the whole array at once. > + for i in 0..NUM_PTES { > + dma_write!( > + gsp_mem, > + [0]?.ptes.0[i], > + PteArray::::entry(start, i)? > + ); > + } -- [1] -- diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs index 20170e483e04..b5ea14b7dad7 100644 --- a/drivers/gpu/nova-core/gsp.rs +++ b/drivers/gpu/nova-core/gsp.rs @@ -48,6 +48,7 @@ unsafe impl AsBytes for PteArra= y {} =20 impl PteArray { /// Returns the page table entry for `index`, for a mapping starting a= t `start` DmaAddress. + // TODO: Replace with `IoView` projection once available. fn entry(start: DmaAddress, index: usize) -> Result { start .checked_add(num::usize_as_u64(index) << GSP_PAGE_SHIFT) @@ -90,12 +91,9 @@ fn new(dev: &device::Device) -> Result { .as_slice_mut(size_of::(), NUM_PAGES * size_of::= ())? }; =20 - // This is a one by one GSP Page write to the memory - // to avoid stack overflow when allocating the whole array at once= . + // Write values one by one to avoid an on-stack instance of `PteAr= ray`. for (i, chunk) in pte_region.chunks_exact_mut(size_of::()).en= umerate() { - let pte_value =3D start_addr - .checked_add(num::usize_as_u64(i) << GSP_PAGE_SHIFT) - .ok_or(EOVERFLOW)?; + let pte_value =3D PteArray::::entry(start_addr, i)?= ; =20 chunk.copy_from_slice(&pte_value.to_ne_bytes()); } diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/= cmdq.rs index 9107a1473797..aa42d180f0d5 100644 --- a/drivers/gpu/nova-core/gsp/cmdq.rs +++ b/drivers/gpu/nova-core/gsp/cmdq.rs @@ -159,7 +159,7 @@ struct Msgq { #[repr(C)] struct GspMem { /// Self-mapping page table entries. - ptes: PteArray<{ GSP_PAGE_SIZE / size_of::() }>, + ptes: PteArray<{ Self::PTE_ARRAY_SIZE }>, /// CPU queue: the driver writes commands here, and the GSP reads them= . It also contains the /// write and read pointers that the CPU updates. /// @@ -172,6 +172,10 @@ struct GspMem { gspq: Msgq, } =20 +impl GspMem { + const PTE_ARRAY_SIZE: usize =3D GSP_PAGE_SIZE / size_of::(); +} + // SAFETY: These structs don't meet the no-padding requirements of AsBytes= but // that is not a problem because they are not used outside the kernel. unsafe impl AsBytes for GspMem {} @@ -202,16 +206,14 @@ fn new(dev: &device::Device) -> Result= { let gsp_mem =3D CoherentAllocation::::alloc_coherent(dev, 1, GFP_KERNE= L | __GFP_ZERO)?; =20 - const NUM_PTES: usize =3D GSP_PAGE_SIZE / size_of::(); - let start =3D gsp_mem.dma_handle(); // One by one GSP Page write to the memory to avoid stack overflow= when allocating // the whole array at once. - for i in 0..NUM_PTES { + for i in 0..GspMem::PTE_ARRAY_SIZE { dma_write!( gsp_mem, [0]?.ptes.0[i], - PteArray::::entry(start, i)? + PteArray::<{ GspMem::PTE_ARRAY_SIZE }>::entry(start, i)? ); } =20