[PATCH v1 RESEND 4/4] drm/tyr: add GPU reset handling

public inbox for rust-for-linux@vger.kernel.org
 help / color / mirror / Atom feed

From: "Onur Özkan" <work@onurozkan.dev>
To: linux-kernel@vger.kernel.org
Cc: dakr@kernel.org, aliceryhl@google.com,
	daniel.almeida@collabora.com, airlied@gmail.com, simona@ffwll.ch,
	dri-devel@lists.freedesktop.org, rust-for-linux@vger.kernel.org,
	"Onur Özkan" <work@onurozkan.dev>,
	"Deborah Brouwer" <deborah.brouwer@collabora.com>
Subject: [PATCH v1 RESEND 4/4] drm/tyr: add GPU reset handling
Date: Fri, 13 Mar 2026 12:16:44 +0300	[thread overview]
Message-ID: <20260313091646.16938-5-work@onurozkan.dev> (raw)
In-Reply-To: <20260313091646.16938-1-work@onurozkan.dev>

Move Tyr reset logic into a new reset module and add async reset work.

This adds:
- ResetHandle with internal controller state
- a dedicated ordered reset workqueue
- a pending flag to avoid duplicate queued resets
- run_reset() as the shared synchronous reset helper

Probe now calls reset::run_reset() before normal init. Driver data now
keeps ResetHandle so reset work is drained before clocks and regulators
are dropped.

Tested-by: Deborah Brouwer <deborah.brouwer@collabora.com>
Signed-off-by: Onur Özkan <work@onurozkan.dev>
---
 drivers/gpu/drm/tyr/driver.rs |  40 +++-----
 drivers/gpu/drm/tyr/reset.rs  | 180 ++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/tyr/tyr.rs    |   1 +
 3 files changed, 192 insertions(+), 29 deletions(-)
 create mode 100644 drivers/gpu/drm/tyr/reset.rs

diff --git a/drivers/gpu/drm/tyr/driver.rs b/drivers/gpu/drm/tyr/driver.rs
index f7951804e4e0..c80238a21ff2 100644
--- a/drivers/gpu/drm/tyr/driver.rs
+++ b/drivers/gpu/drm/tyr/driver.rs
@@ -6,11 +6,8 @@
         OptionalClk, //
     },
     device::{
-        Bound,
-        Core,
-        Device, //
+        Core, //
     },
-    devres::Devres,
     dma::{
         Device as DmaDevice,
         DmaMask, //
@@ -22,10 +19,7 @@
         Registered,
         UnregisteredDevice, //
     },
-    io::poll,
-    new_mutex,
-    of,
-    platform,
+    new_mutex, of, platform,
     prelude::*,
     regulator,
     regulator::Regulator,
@@ -35,17 +29,15 @@
         Arc,
         Mutex, //
     },
-    time, //
 };
 
 use crate::{
     file::TyrDrmFileData,
     fw::Firmware,
     gem::BoData,
-    gpu,
     gpu::GpuInfo,
     mmu::Mmu,
-    regs, //
+    reset, //
 };
 
 pub(crate) type IoMem = kernel::io::mem::IoMem<SZ_2M>;
@@ -62,6 +54,11 @@ pub(crate) struct TyrPlatformDriverData {
 
 #[pin_data]
 pub(crate) struct TyrDrmDeviceData {
+    // `ResetHandle::drop()` drains queued/running works and this must happen
+    // before clocks/regulators are dropped. So keep this field before them to
+    // ensure the correct drop order.
+    pub(crate) reset: reset::ResetHandle,
+
     pub(crate) pdev: ARef<platform::Device>,
 
     pub(crate) fw: Arc<Firmware>,
@@ -90,22 +87,6 @@ unsafe impl Send for TyrDrmDeviceData {}
 // SAFETY: This will be removed in a future patch.
 unsafe impl Sync for TyrDrmDeviceData {}
 
-fn issue_soft_reset(dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result {
-    // Clear any stale reset-complete IRQ state before issuing a new soft reset.
-    regs::GPU_IRQ_CLEAR.write(dev, iomem, regs::GPU_IRQ_RAWSTAT_RESET_COMPLETED)?;
-    regs::GPU_CMD.write(dev, iomem, regs::GPU_CMD_SOFT_RESET)?;
-
-    poll::read_poll_timeout(
-        || regs::GPU_IRQ_RAWSTAT.read(dev, iomem),
-        |status| *status & regs::GPU_IRQ_RAWSTAT_RESET_COMPLETED != 0,
-        time::Delta::from_millis(1),
-        time::Delta::from_millis(100),
-    )
-    .inspect_err(|_| dev_err!(dev, "GPU reset failed."))?;
-
-    Ok(())
-}
-
 kernel::of_device_table!(
     OF_TABLE,
     MODULE_OF_TABLE,
@@ -138,8 +119,7 @@ fn probe(
         let request = pdev.io_request_by_index(0).ok_or(ENODEV)?;
         let iomem = Arc::pin_init(request.iomap_sized::<SZ_2M>(), GFP_KERNEL)?;
 
-        issue_soft_reset(pdev.as_ref(), &iomem)?;
-        gpu::l2_power_on(pdev.as_ref(), &iomem)?;
+        reset::run_reset(pdev.as_ref(), &iomem)?;
 
         let gpu_info = GpuInfo::new(pdev.as_ref(), &iomem)?;
         gpu_info.log(pdev);
@@ -153,6 +133,7 @@ fn probe(
 
         let uninit_ddev = UnregisteredDevice::<TyrDrmDriver>::new(pdev.as_ref())?;
         let platform: ARef<platform::Device> = pdev.into();
+        let reset = reset::ResetHandle::new(platform.clone(), iomem.clone())?;
 
         let mmu = Mmu::new(pdev, iomem.as_arc_borrow(), &gpu_info)?;
 
@@ -178,6 +159,7 @@ fn probe(
                     _mali: mali_regulator,
                     _sram: sram_regulator,
                 }),
+                reset,
                 gpu_info,
         });
         let ddev = Registration::new_foreign_owned(uninit_ddev, pdev.as_ref(), data, 0)?;
diff --git a/drivers/gpu/drm/tyr/reset.rs b/drivers/gpu/drm/tyr/reset.rs
new file mode 100644
index 000000000000..29dfae98b0dd
--- /dev/null
+++ b/drivers/gpu/drm/tyr/reset.rs
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+
+//! Provides asynchronous reset handling for the Tyr DRM driver via
+//! [`ResetHandle`], which runs reset work on a dedicated ordered
+//! workqueue and avoids duplicate pending resets.
+
+use kernel::{
+    device::{
+        Bound,
+        Device, //
+    },
+    devres::Devres,
+    io::poll,
+    platform,
+    prelude::*,
+    sync::{
+        aref::ARef,
+        atomic::{
+            Acquire,
+            Atomic,
+            Relaxed,
+            Release, //
+        },
+        Arc,
+    },
+    time,
+    workqueue::{
+        self,
+        Work, //
+    },
+};
+
+use crate::{
+    driver::IoMem,
+    gpu,
+    regs, //
+};
+
+/// Manages asynchronous GPU reset handling and ensures only a single reset
+/// work is pending at a time.
+#[pin_data]
+struct Controller {
+    /// Platform device reference needed for reset operations and logging.
+    pdev: ARef<platform::Device>,
+    /// Mapped register space needed for reset operations.
+    iomem: Arc<Devres<IoMem>>,
+    /// Atomic flag for controlling the scheduling pending state.
+    pending: Atomic<bool>,
+    /// Dedicated ordered workqueue for reset operations.
+    wq: workqueue::OrderedQueue,
+    /// Work item backing async reset processing.
+    #[pin]
+    work: Work<Controller>,
+}
+
+kernel::impl_has_work! {
+    impl HasWork<Controller> for Controller { self.work }
+}
+
+impl workqueue::WorkItem for Controller {
+    type Pointer = Arc<Self>;
+
+    fn run(this: Arc<Self>) {
+        this.reset_work();
+    }
+}
+
+impl Controller {
+    /// Creates a [`Controller`] instance.
+    fn new(pdev: ARef<platform::Device>, iomem: Arc<Devres<IoMem>>) -> Result<Arc<Self>> {
+        let wq = workqueue::OrderedQueue::new(c"tyr-reset-wq", 0)?;
+
+        Arc::pin_init(
+            try_pin_init!(Self {
+                pdev,
+                iomem,
+                pending: Atomic::new(false),
+                wq,
+                work <- kernel::new_work!("tyr::reset"),
+            }),
+            GFP_KERNEL,
+        )
+    }
+
+    /// Processes one scheduled reset request.
+    ///
+    /// Panthor reference:
+    /// - drivers/gpu/drm/panthor/panthor_device.c::panthor_device_reset_work()
+    fn reset_work(self: &Arc<Self>) {
+        dev_info!(self.pdev.as_ref(), "GPU reset work is started.\n");
+
+        // SAFETY: `Controller` is part of driver-private data and only exists
+        // while the platform device is bound.
+        let pdev = unsafe { self.pdev.as_ref().as_bound() };
+        if let Err(e) = run_reset(pdev, &self.iomem) {
+            dev_err!(self.pdev.as_ref(), "GPU reset failed: {:?}\n", e);
+        } else {
+            dev_info!(self.pdev.as_ref(), "GPU reset work is done.\n");
+        }
+
+        self.pending.store(false, Release);
+    }
+}
+
+/// Reset handle that shuts down pending work gracefully on drop.
+pub(crate) struct ResetHandle(Arc<Controller>);
+
+impl ResetHandle {
+    /// Creates a [`ResetHandle`] instance.
+    pub(crate) fn new(pdev: ARef<platform::Device>, iomem: Arc<Devres<IoMem>>) -> Result<Self> {
+        Ok(Self(Controller::new(pdev, iomem)?))
+    }
+
+    /// Schedules reset work.
+    #[expect(dead_code)]
+    pub(crate) fn schedule(&self) {
+        // TODO: Similar to `panthor_device_schedule_reset()` in Panthor, add a
+        // power management check once Tyr supports it.
+
+        // Keep only one reset request running or queued. If one is already pending,
+        // we ignore new schedule requests.
+        if self.0.pending.cmpxchg(false, true, Relaxed).is_ok()
+            && self.0.wq.enqueue(self.0.clone()).is_err()
+        {
+            self.0.pending.store(false, Release);
+        }
+    }
+
+    /// Returns true if a reset is queued or in progress.
+    ///
+    /// Note that the state can change immediately after the return.
+    #[inline]
+    #[expect(dead_code)]
+    pub(crate) fn is_pending(&self) -> bool {
+        self.0.pending.load(Acquire)
+    }
+}
+
+impl Drop for ResetHandle {
+    fn drop(&mut self) {
+        // Drain queued/running work and block future queueing attempts for this
+        // work item before clocks/regulators are torn down.
+        // SAFETY: drop executes in a sleepable context.
+        unsafe { self.0.work.disable_sync() };
+    }
+}
+
+/// Issues a soft reset command and waits for reset-complete IRQ status.
+fn issue_soft_reset(dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result {
+    // Clear any stale reset-complete IRQ state before issuing a new soft reset.
+    regs::GPU_IRQ_CLEAR.write(dev, iomem, regs::GPU_IRQ_RAWSTAT_RESET_COMPLETED)?;
+    regs::GPU_CMD.write(dev, iomem, regs::GPU_CMD_SOFT_RESET)?;
+
+    poll::read_poll_timeout(
+        || regs::GPU_IRQ_RAWSTAT.read(dev, iomem),
+        |status| *status & regs::GPU_IRQ_RAWSTAT_RESET_COMPLETED != 0,
+        time::Delta::from_millis(1),
+        time::Delta::from_millis(100),
+    )
+    .inspect_err(|_| dev_err!(dev, "GPU reset failed."))?;
+
+    Ok(())
+}
+
+/// Runs one synchronous GPU reset pass.
+///
+/// Its visibility is `pub(super)` only so the probe path can run an
+/// initial reset; it is not part of this module's public API.
+///
+/// On success, the GPU is left in a state suitable for reinitialization.
+///
+/// The reset sequence is as follows:
+/// 1. Trigger a GPU soft reset.
+/// 2. Wait for the reset-complete IRQ status.
+/// 3. Power L2 back on.
+pub(super) fn run_reset(dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result {
+    issue_soft_reset(dev, iomem)?;
+    gpu::l2_power_on(dev, iomem)?;
+    Ok(())
+}
diff --git a/drivers/gpu/drm/tyr/tyr.rs b/drivers/gpu/drm/tyr/tyr.rs
index 18b0668bb217..d0349bc49f27 100644
--- a/drivers/gpu/drm/tyr/tyr.rs
+++ b/drivers/gpu/drm/tyr/tyr.rs
@@ -14,6 +14,7 @@
 mod gpu;
 mod mmu;
 mod regs;
+mod reset;
 mod slot;
 mod vm;
 
-- 
2.51.2

next prev parent reply	other threads:[~2026-03-13  9:25 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-13  9:16 [PATCH v1 RESEND 0/4] drm/tyr: implement GPU reset API Onur Özkan
2026-03-13  9:16 ` [PATCH v1 RESEND 1/4] drm/tyr: clear reset IRQ before soft reset Onur Özkan
2026-03-19 10:47   ` Boris Brezillon
2026-03-13  9:16 ` [PATCH v1 RESEND 2/4] rust: add Work::disable_sync Onur Özkan
2026-03-13 12:00   ` Alice Ryhl
2026-03-15 10:45     ` Onur Özkan
2026-03-13  9:16 ` [PATCH v1 RESEND 3/4] rust: add ordered workqueue wrapper Onur Özkan
2026-03-13  9:16 ` Onur Özkan [this message]
2026-03-13 14:56   ` [PATCH v1 RESEND 4/4] drm/tyr: add GPU reset handling Daniel Almeida
2026-03-15 10:44     ` Onur Özkan
2026-03-19 11:08   ` Boris Brezillon
2026-03-19 12:51     ` Onur Özkan
2026-03-13  9:52 ` [PATCH v1 RESEND 0/4] drm/tyr: implement GPU reset API Alice Ryhl
2026-03-13 11:12   ` Onur Özkan
2026-03-13 11:26     ` Alice Ryhl

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:f7951804e4e dfblob:c80238a21ff dfblob:29dfae98b0d
dfblob:18b0668bb21 dfblob:d0349bc49f2 )
 OR (
bs:"[PATCH v1 RESEND 4/4] drm/tyr: add GPU reset handling" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260313091646.16938-5-work@onurozkan.dev \
    --to=work@onurozkan.dev \
    --cc=airlied@gmail.com \
    --cc=aliceryhl@google.com \
    --cc=dakr@kernel.org \
    --cc=daniel.almeida@collabora.com \
    --cc=deborah.brouwer@collabora.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=rust-for-linux@vger.kernel.org \
    --cc=simona@ffwll.ch \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox