public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Boris Brezillon <boris.brezillon@collabora.com>
To: Ketil Johnsen <ketil.johnsen@arm.com>
Cc: "David Airlie" <airlied@gmail.com>,
	"Simona Vetter" <simona@ffwll.ch>,
	"Maarten Lankhorst" <maarten.lankhorst@linux.intel.com>,
	"Maxime Ripard" <mripard@kernel.org>,
	"Thomas Zimmermann" <tzimmermann@suse.de>,
	"Jonathan Corbet" <corbet@lwn.net>,
	"Shuah Khan" <skhan@linuxfoundation.org>,
	"Sumit Semwal" <sumit.semwal@linaro.org>,
	"Benjamin Gaignard" <benjamin.gaignard@collabora.com>,
	"Brian Starkey" <Brian.Starkey@arm.com>,
	"John Stultz" <jstultz@google.com>,
	"T.J. Mercier" <tjmercier@google.com>,
	"Christian König" <christian.koenig@amd.com>,
	"Steven Price" <steven.price@arm.com>,
	"Liviu Dudau" <liviu.dudau@arm.com>,
	"Daniel Almeida" <daniel.almeida@collabora.com>,
	"Alice Ryhl" <aliceryhl@google.com>,
	"Matthias Brugger" <matthias.bgg@gmail.com>,
	"AngeloGioacchino Del Regno"
	<angelogioacchino.delregno@collabora.com>,
	dri-devel@lists.freedesktop.org, linux-doc@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-media@vger.kernel.org,
	linaro-mm-sig@lists.linaro.org,
	linux-arm-kernel@lists.infradead.org,
	linux-mediatek@lists.infradead.org,
	"Florent Tomasin" <florent.tomasin@arm.com>,
	"Paul Toadere" <paul.toadere@arm.com>,
	"Samuel Percival" <samuel.percival@arm.com>
Subject: Re: [PATCH 7/8] drm/panthor: Add support for entering and exiting protected mode
Date: Wed, 6 May 2026 10:51:45 +0200	[thread overview]
Message-ID: <20260506105145.6f25181d@fedora> (raw)
In-Reply-To: <20260505140516.1372388-8-ketil.johnsen@arm.com>

On Tue,  5 May 2026 16:05:13 +0200
Ketil Johnsen <ketil.johnsen@arm.com> wrote:

Here comes the second part of the review :-).

> diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c
> index 2ab444ee8c710..e93042eaf3fc8 100644
> --- a/drivers/gpu/drm/panthor/panthor_gpu.c
> +++ b/drivers/gpu/drm/panthor/panthor_gpu.c
> @@ -100,8 +100,11 @@ static void panthor_gpu_irq_handler(struct panthor_device *ptdev, u32 status)
>  			 fault_status, panthor_exception_name(ptdev, fault_status & 0xFF),
>  			 address);
>  	}
> -	if (status & GPU_IRQ_PROTM_FAULT)
> +	if (status & GPU_IRQ_PROTM_FAULT) {
>  		drm_warn(&ptdev->base, "GPU Fault in protected mode\n");
> +		panthor_gpu_disable_protm_fault_interrupt(ptdev);

It's only used in a single place, so I'd just inline the content of
panthor_gpu_disable_protm_fault_interrupt() here. Also, I think
panthor_gpu_disable_protm_fault_interrupt() is not taking the right
lock (see below).

> +		panthor_device_schedule_reset(ptdev);
> +	}
>  
>  	spin_lock(&ptdev->gpu->reqs_lock);
>  	if (status & ptdev->gpu->pending_reqs) {
> @@ -367,6 +370,10 @@ int panthor_gpu_soft_reset(struct panthor_device *ptdev)
>  	unsigned long flags;
>  
>  	spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
> +
> +	/** Re-enable the protm_irq_fault when reset is complete */
> +	ptdev->gpu->irq.mask |= GPU_IRQ_PROTM_FAULT;

panthor_irq::mask should only be modified with the
panthor_irq::mask_lock held. Besides, we have a helper for
that:

	panthor_gpu_irq_enable_events(&ptdev->gpu->irq,	GPU_IRQ_PROTM_FAULT);

> +
>  	if (!drm_WARN_ON(&ptdev->base,
>  			 ptdev->gpu->pending_reqs & GPU_IRQ_RESET_COMPLETED)) {
>  		ptdev->gpu->pending_reqs |= GPU_IRQ_RESET_COMPLETED;
> @@ -427,3 +434,8 @@ void panthor_gpu_resume(struct panthor_device *ptdev)
>  	panthor_hw_l2_power_on(ptdev);
>  }
>  
> +void panthor_gpu_disable_protm_fault_interrupt(struct panthor_device *ptdev)
> +{
> +	scoped_guard(spinlock_irqsave, &ptdev->gpu->reqs_lock)
> +		ptdev->gpu->irq.mask &= ~GPU_IRQ_PROTM_FAULT;

Same problem with wrong lock being taken to modify the mask, and
panthor_gpu_irq_disable_events() probably being a better option?

> +}
> diff --git a/drivers/gpu/drm/panthor/panthor_gpu.h b/drivers/gpu/drm/panthor/panthor_gpu.h
> index 12c263a399281..ca66c73f543e6 100644
> --- a/drivers/gpu/drm/panthor/panthor_gpu.h
> +++ b/drivers/gpu/drm/panthor/panthor_gpu.h
> @@ -54,4 +54,10 @@ int panthor_gpu_soft_reset(struct panthor_device *ptdev);
>  void panthor_gpu_power_changed_off(struct panthor_device *ptdev);
>  int panthor_gpu_power_changed_on(struct panthor_device *ptdev);
>  
> +/**
> + * panthor_gpu_disable_protm_fault_interrupt() - Disable GPU_PROTECTED_FAULT interrupt
> + * @ptdev: Device.
> + */
> +void panthor_gpu_disable_protm_fault_interrupt(struct panthor_device *ptdev);
> +
>  #endif
> diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
> index 07f54176ec1bf..702f537905b56 100644
> --- a/drivers/gpu/drm/panthor/panthor_mmu.c
> +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
> @@ -31,6 +31,7 @@
>  #include <linux/sizes.h>
>  
>  #include "panthor_device.h"
> +#include "panthor_fw.h"
>  #include "panthor_gem.h"
>  #include "panthor_gpu.h"
>  #include "panthor_heap.h"
> @@ -1704,8 +1705,12 @@ static int panthor_vm_lock_region(struct panthor_vm *vm, u64 start, u64 size)
>  	if (drm_WARN_ON(&ptdev->base, vm->locked_region.size))
>  		return -EINVAL;
>  
> +	down_read(&ptdev->protm.lock);
> +
>  	mutex_lock(&ptdev->mmu->as.slots_lock);
>  	if (vm->as.id >= 0 && size) {
> +		panthor_fw_protm_exit_sync(ptdev);
> +
>  		/* Lock the region that needs to be updated */
>  		gpu_write64(ptdev, AS_LOCKADDR(vm->as.id),
>  			    pack_region_range(ptdev, &start, &size));
> @@ -1720,6 +1725,9 @@ static int panthor_vm_lock_region(struct panthor_vm *vm, u64 start, u64 size)
>  	}
>  	mutex_unlock(&ptdev->mmu->as.slots_lock);
>  
> +	if (ret)
> +		up_read(&ptdev->protm.lock);

Let's try to keep the locked section local to a function: the protm.lock
should, IMHO, be taken/released in panthor_vm_exec_op(). If we go for
that, this also makes the panthor_vm_lock_region() vs
panthor_vm_expand_locked_region() distinction useless, though it's
probably fine to keep it for clarity.

> +
>  	return ret;
>  }
>  
> @@ -1805,6 +1813,8 @@ static void panthor_vm_unlock_region(struct panthor_vm *vm)
>  	vm->locked_region.start = 0;
>  	vm->locked_region.size = 0;
>  	mutex_unlock(&ptdev->mmu->as.slots_lock);
> +
> +	up_read(&ptdev->protm.lock);
>  }
>  
>  static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status)
> diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
> index 987072bd867c4..acb04250c7def 100644
> --- a/drivers/gpu/drm/panthor/panthor_sched.c
> +++ b/drivers/gpu/drm/panthor/panthor_sched.c
> @@ -308,6 +308,15 @@ struct panthor_scheduler {
>  		 */
>  		struct list_head stopped_groups;
>  	} reset;
> +
> +	/** @protm: Protected mode related fields. */
> +	struct {
> +		/** @protected_mode: True if GPU is in protected mode. */
> +		bool protected_mode;

nit: s/protected_mode/enabled/s. But do we even need that boolean?
Isn't active_group != NULL providing the same info?

> +
> +		/** @active_group: The active protected group. */
> +		struct panthor_group *active_group;
> +	} protm;
>  };
>  
>  /**
> @@ -570,6 +579,16 @@ struct panthor_group {
>  	/** @fatal_queues: Bitmask reflecting the queues that hit a fatal exception. */
>  	u32 fatal_queues;
>  
> +	/**
> +	 * @protm_pending_queues: Bitmask reflecting the queues that are waiting
> +	 *                        on a CS_PROTM_PENDING.
> +	 *
> +	 * The GPU will set the bit associated to the queue pending protected mode
> +	 * when a PROT_REGION command is executing or when trying to resume previously
> +	 * suspended protected mode jobs.
> +	 */
> +	u32 protm_pending_queues;
> +
>  	/** @tiler_oom: Mask of queues that have a tiler OOM event to process. */
>  	atomic_t tiler_oom;
>  
> @@ -1176,6 +1195,7 @@ queue_resume_timeout(struct panthor_queue *queue)
>   * @ptdev: Device.
>   * @csg_id: Group slot ID.
>   * @cs_id: Queue slot ID.
> + * @protm_ack: Acknowledge pending protected mode queues
>   *
>   * Program a queue slot with the queue information so things can start being
>   * executed on this queue.
> @@ -1472,6 +1492,34 @@ csg_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 priority)
>  	return 0;
>  }
>  
> +static void
> +cs_slot_process_protm_pending_event_locked(struct panthor_device *ptdev,
> +					   u32 csg_id, u32 cs_id)
> +{
> +	struct panthor_scheduler *sched = ptdev->scheduler;
> +	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
> +	struct panthor_group *group = csg_slot->group;
> +
> +	lockdep_assert_held(&sched->lock);
> +
> +	if (!group)
> +		return;
> +
> +	/* No protected memory heap, a user space program tried to
> +	 * submit a protected mode jobs resulting in the GPU raising
> +	 * a CS_PROTM_PENDING request.
> +	 *
> +	 * This scenario is invalid and the protected mode jobs must
> +	 * not be allowed to progress.
> +	 */
> +	if (!ptdev->protm.heap)
> +		return;

Should we flag the group unusable when that happens and schedule it out
as soon as possible?

	if (!ptdev->protm.heap)
		group->fatal_queues |= BIT(cs_id);
	else
		group->protm_pending_queues |= BIT(cs_id);

	sched_queue_delayed_work(sched, tick, 0);

> +
> +	group->protm_pending_queues |= BIT(cs_id);
> +
> +	sched_queue_delayed_work(sched, tick, 0);
> +}
> +
>  static void
>  cs_slot_process_fatal_event_locked(struct panthor_device *ptdev,
>  				   u32 csg_id, u32 cs_id)
> @@ -1718,6 +1766,9 @@ static bool cs_slot_process_irq_locked(struct panthor_device *ptdev,
>  	if (events & CS_TILER_OOM)
>  		cs_slot_process_tiler_oom_event_locked(ptdev, csg_id, cs_id);
>  
> +	if (events & CS_PROTM_PENDING)
> +		cs_slot_process_protm_pending_event_locked(ptdev, csg_id, cs_id);
> +
>  	/* We don't acknowledge the TILER_OOM event since its handling is
>  	 * deferred to a separate work.
>  	 */
> @@ -1848,6 +1899,17 @@ static void sched_process_idle_event_locked(struct panthor_device *ptdev)
>  	sched_queue_delayed_work(ptdev->scheduler, tick, 0);
>  }
>  
> +static void sched_process_protm_exit_event_locked(struct panthor_device *ptdev)
> +{
> +	lockdep_assert_held(&ptdev->scheduler->lock);
> +
> +	/* Acknowledge the protm exit and schedule a tick. */
> +	panthor_fw_protm_exit(ptdev);

Let's just inline the content of panthor_fw_protm_exit() here.*
It doesn't make sense to have all these indirections, especially
since PROTM and scheduling are intertwined anyway, so I consider
it part of the scheduler responsibility (just like the scheduler
deals with other GLB events).

The same goes for the other panthor_fw_protm_ helpers defined in
panthor_fw.c, I think panthor_sched.c would be a better fit for
those, or even inlining their content if they are only used in
a single place.

> +	sched_queue_delayed_work(ptdev->scheduler, tick, 0);
> +	ptdev->scheduler->protm.protected_mode = false;
> +	ptdev->scheduler->protm.active_group = NULL;
> +}
> +
>  /**
>   * sched_process_global_irq_locked() - Process the scheduling part of a global IRQ
>   * @ptdev: Device.
> @@ -1863,6 +1925,9 @@ static void sched_process_global_irq_locked(struct panthor_device *ptdev)
>  	ack = READ_ONCE(glb_iface->output->ack);
>  	evts = (req ^ ack) & GLB_EVT_MASK;
>  
> +	if (evts & GLB_PROTM_EXIT)
> +		sched_process_protm_exit_event_locked(ptdev);
> +
>  	if (evts & GLB_IDLE)
>  		sched_process_idle_event_locked(ptdev);
>  }
> @@ -1872,23 +1937,71 @@ static void process_fw_events_work(struct work_struct *work)
>  	struct panthor_scheduler *sched = container_of(work, struct panthor_scheduler,
>  						      fw_events_work);
>  	u32 events = atomic_xchg(&sched->fw_events, 0);
> +	u32 csg_events = events & ~JOB_INT_GLOBAL_IF;
>  	struct panthor_device *ptdev = sched->ptdev;
>  
>  	mutex_lock(&sched->lock);
>  
> +	while (csg_events) {
> +		u32 csg_id = ffs(csg_events) - 1;
> +
> +		sched_process_csg_irq_locked(ptdev, csg_id);
> +		csg_events &= ~BIT(csg_id);
> +	}

I'm sure you have a good reason to re-order the processing
of CSG and GLB events, and it'd be good to have it documented
in a comment.

> +
>  	if (events & JOB_INT_GLOBAL_IF) {
>  		sched_process_global_irq_locked(ptdev);
>  		events &= ~JOB_INT_GLOBAL_IF;
>  	}
>  
> -	while (events) {
> -		u32 csg_id = ffs(events) - 1;
> +	mutex_unlock(&sched->lock);
> +}
>  
> -		sched_process_csg_irq_locked(ptdev, csg_id);
> -		events &= ~BIT(csg_id);
> +static void handle_protm_fault(struct panthor_device *ptdev)

This is a bit of a misnomer, I think. It doesn't seem to be triggered
by a FAULT event, it's more a timeout on a PROTM section that would
lead to a reset being scheduled, and this "blocked-in-protm" situation
being detected as part of the reset.

> +{
> +	struct panthor_scheduler *sched = ptdev->scheduler;
> +	u32 csg_id;
> +	struct panthor_group *protm_group;
> +
> +	guard(mutex)(&sched->lock);
> +
> +	if (!sched->protm.protected_mode)
> +		return;
> +
> +	protm_group = sched->protm.active_group;
> +
> +	if (drm_WARN_ON(&ptdev->base, !protm_group))
> +		return;

See, that's a perfect example of sched->protm.protected_mode being redundant.
Now you're left with a potential protected_mode=true,active_group=NULL
inconsistency you don't expect.

> +
> +	/* Group will be terminated by the device reset */
> +	protm_group->fatal_queues |= GENMASK(protm_group->queue_count - 1, 0);
> +
> +	if (!panthor_fw_protm_exit_wait_event_timeout(ptdev))
> +		goto cleanup_protm;
> +
> +	/**
> +	 * GPU failed to exit protected mode. Mark all non-protected mode CSGs

	/* GPU failed to exit protected mode. Mark all non-protected mode CSGs

> +	 * as suspended so that they are unaffected by the GPU reset.
> +	 */
> +
> +	for (csg_id = 0; csg_id < sched->csg_slot_count; csg_id++) {
> +		struct panthor_group *group = sched->csg_slots[csg_id].group;
> +
> +		if (!group || group == protm_group)
> +			continue;
> +
> +		group->state = PANTHOR_CS_GROUP_SUSPENDED;
> +
> +		group_unbind_locked(group);
> +
> +		list_move(&group->run_node, group_is_idle(group) ?
> +						&sched->groups.idle[group->priority] :
> +						&sched->groups.runnable[group->priority]);

nit: Let's use a local struct list_head * variable to select the right list.

>  	}
>  
> -	mutex_unlock(&sched->lock);
> +cleanup_protm:
> +	sched->protm.protected_mode = false;
> +	sched->protm.active_group = NULL;
>  }
>  
>  /**
> @@ -2029,6 +2142,7 @@ struct panthor_sched_tick_ctx {
>  	bool immediate_tick;
>  	bool stop_tick;
>  	u32 csg_upd_failed_mask;
> +	struct panthor_group *protm_group;
>  };
>  
>  static bool
> @@ -2299,6 +2413,7 @@ tick_ctx_evict_group(struct panthor_scheduler *sched,
>  
>  static void
>  tick_ctx_reschedule_group(struct panthor_scheduler *sched,
> +			  struct panthor_sched_tick_ctx *ctx,
>  			  struct panthor_csg_slots_upd_ctx *upd_ctx,
>  			  struct panthor_group *group,
>  			  int new_csg_prio)
> @@ -2321,6 +2436,30 @@ tick_ctx_reschedule_group(struct panthor_scheduler *sched,
>  					csg_iface->output->ack ^ CSG_ENDPOINT_CONFIG,
>  					CSG_ENDPOINT_CONFIG);
>  	}
> +
> +	if (ctx->protm_group == group) {
> +		for (u32 q = 0; q < group->queue_count; q++) {
> +			struct panthor_fw_cs_iface *cs_iface;
> +
> +			if (!(group->protm_pending_queues & BIT(q)))
> +				continue;
> +
> +			cs_iface = panthor_fw_get_cs_iface(ptdev, group->csg_id, q);
> +			panthor_fw_update_reqs(cs_iface, req, cs_iface->output->ack,
> +					       CS_PROTM_PENDING);
> +		}
> +
> +		panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack,
> +				       group->protm_pending_queues);
> +		csgs_upd_ctx_ring_doorbell(upd_ctx, group->csg_id);
> +		group->protm_pending_queues = 0;
> +
> +		/*
> +		 * We only allow one protected group to run at same time,
> +		 * as it makes it easier to handle faults in protected mode.

It's more something to document in the panthor_scheduler::protm::active_group
section.

> +		 */
> +		sched->protm.active_group = group;

Would it make sense to move this logic to a tick_ctx_handle_protm_group()
helper that's called before/after tick_ctx_reschedule_group()? This way
there's no extra if (ctx->protm_group == group) conditional branch in here.


static void
tick_ctx_handle_protm_group(struct panthor_scheduler *sched,
			    struct panthor_sched_tick_ctx *ctx,
 			    struct panthor_csg_slots_upd_ctx *upd_ctx)
{
	struct panthor_device *ptdev = sched->ptdev;
	struct panthor_group *group = ctx->protm_group;
	struct panthor_fw_csg_iface *csg_iface;

	if (!group || drm_WARN_ON(&ptdev->base, group->csg_id < 0))
		return;

	csg_iface = panthor_fw_get_csg_iface(ptdev, group->csg_id);
	for (u32 q = 0; q < group->queue_count; q++) {
		struct panthor_fw_cs_iface *cs_iface;

		if (!(group->protm_pending_queues & BIT(q)))
			continue;

		cs_iface = panthor_fw_get_cs_iface(ptdev, group->csg_id, q);
		panthor_fw_update_reqs(cs_iface, req, cs_iface->output->ack,
				       CS_PROTM_PENDING);
	}

	panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack,
			       group->protm_pending_queues);
	csgs_upd_ctx_ring_doorbell(upd_ctx, group->csg_id);
	group->protm_pending_queues = 0;
	sched->protm.active_group = group;
}

> +	}
>  }
>  
>  static void
> @@ -2336,6 +2475,17 @@ tick_ctx_schedule_group(struct panthor_scheduler *sched,
>  	group_bind_locked(group, csg_id);
>  	csg_slot_prog_locked(ptdev, csg_id, csg_prio);
>  
> +	/* If the group was waiting for protected mode before suspension,
> +	 * and the tick context enters this mode, it should be serviced
> +	 * immediately because the slot reset should have set the
> +	 * CS_PROTM_PENDING bit to zero, and cs_prog_slot_locked() sets it to
> +	 * zero too.
> +	 * It's not clear if we will get a new CS_PROTM_PENDING event in that
> +	 * case, but it should be safe either way.
> +	 */
> +	if (group->protm_pending_queues && ctx->protm_group)
> +		group->protm_pending_queues = 0;

I'd move this to the path where we do the SUSPEND, or group_unbind(), even.

> +
>  	csgs_upd_ctx_queue_reqs(ptdev, upd_ctx, csg_id,
>  				group->state == PANTHOR_CS_GROUP_SUSPENDED ?
>  				CSG_STATE_RESUME : CSG_STATE_START,
> @@ -2365,7 +2515,7 @@ tick_ctx_apply(struct panthor_scheduler *sched, struct panthor_sched_tick_ctx *c
>  
>  		/* Update priorities on already running groups. */
>  		list_for_each_entry(group, &ctx->groups[prio], run_node) {
> -			tick_ctx_reschedule_group(sched, &upd_ctx, group, new_csg_prio--);
> +			tick_ctx_reschedule_group(sched, ctx, &upd_ctx, group, new_csg_prio--);
>  		}
>  	}
>  
> @@ -2457,6 +2607,15 @@ tick_ctx_apply(struct panthor_scheduler *sched, struct panthor_sched_tick_ctx *c
>  
>  	sched->used_csg_slot_count = ctx->group_count;
>  	sched->might_have_idle_groups = ctx->idle_group_count > 0;
> +
> +	if (ctx->protm_group) {
> +		ret = panthor_fw_protm_enter(ptdev);
> +		if (ret) {
> +			panthor_device_schedule_reset(ptdev);
> +			ctx->csg_upd_failed_mask = U32_MAX;

It's weird to flag it as all CSGs update failed. Should we instead
have

			/* If we failed to enter PROTM, consider the group who
			 * requested it as failed.
			 */
			ctx->csg_upd_failed_mask |= BIT(ctx->protm_group->csg_id);

> +		}
> +		sched->protm.protected_mode = true;

I'd move that to a tick_ctx_service_protm_req() helper that has the
panthor_fw_protm_enter() inlined, because again, it doesn't make
sense to have this defined in panthor_fw.c if the only user lives
in panthor_sched.c

> +	}
>  }
>  
>  static u64
> @@ -2490,7 +2649,7 @@ static void tick_work(struct work_struct *work)
>  	u64 resched_target = sched->resched_target;
>  	u64 remaining_jiffies = 0, resched_delay;
>  	u64 now = get_jiffies_64();
> -	int prio, ret, cookie;
> +	int prio, protm_prio, ret, cookie;
>  	bool full_tick;
>  
>  	if (!drm_dev_enter(&ptdev->base, &cookie))
> @@ -2564,14 +2723,49 @@ static void tick_work(struct work_struct *work)
>  		}
>  	}
>  
> +	/* Check if the highest priority group want to switch to protected mode */
> +	for (protm_prio = PANTHOR_CSG_PRIORITY_COUNT - 1; protm_prio >= 0; protm_prio--) {
> +		struct panthor_group *group;
> +
> +		group = list_first_entry_or_null(&ctx.groups[protm_prio],
> +						 struct panthor_group,
> +						 run_node);
> +		if (group) {
> +			ctx.protm_group = group;
> +			break;
> +		}

Should this be

		if (group) {
			if (group->protm_pending_queues)
				ctx.protm_group = group;

			break;
		}

?

> +	}
> +
>  	/* If we have free CSG slots left, pick idle groups */
> -	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1;
> -	     prio >= 0 && !tick_ctx_is_full(sched, &ctx);
> -	     prio--) {

How about we keep it a single indentation level and skip higher prios if
PROTM is requested:

		/* Pick only idle groups with equal or lower priority than the
		 * group triggering protected mode. Do not bother picking
		 * unscheduled idle groups.
		 */
		if (ctx.protm_group && prio < protm_prio)
			continue;

This saves us an indentation level and limits the code duplication.

> -		/* Check the old_group queue first to avoid reprogramming the slots */
> -		tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], false, true);
> -		tick_ctx_pick_groups_from_list(sched, &ctx, &sched->groups.idle[prio],
> -					       false, false);
> +	if (ctx.protm_group) {
> +		/* Pick only idle groups with equal or lower priority than the
> +		 * group triggering protected mode. Do not bother picking
> +		 * unscheduled idle groups.
> +		 */
> +		for (prio = protm_prio;
> +		     prio >= 0 && !tick_ctx_is_full(sched, &ctx);
> +		     prio--)
> +			tick_ctx_pick_groups_from_list(sched, &ctx,
> +						       &ctx.old_groups[prio],
> +						       false, true);
> +	} else {
> +		/* No switch to protected, just pick any idle group according
> +		 * to priority
> +		 */
> +		for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1;
> +		     prio >= 0 && !tick_ctx_is_full(sched, &ctx);
> +		     prio--) {
> +			/* Check the old_group queue first to avoid
> +			 * reprogramming the slots
> +			 */
> +			tick_ctx_pick_groups_from_list(sched, &ctx,
> +						       &ctx.old_groups[prio],
> +						       false, true);
> +			tick_ctx_pick_groups_from_list(sched, &ctx,
> +						       &sched->groups.idle[prio],
> +						       false, false);
> +		}
> +
>  	}
>  
>  	tick_ctx_apply(sched, &ctx);
> @@ -2993,6 +3187,8 @@ void panthor_sched_pre_reset(struct panthor_device *ptdev)
>  	cancel_work_sync(&sched->sync_upd_work);
>  	cancel_delayed_work_sync(&sched->tick_work);
>  
> +	handle_protm_fault(ptdev);

I actually wonder if this should be part of the panthor_sched_suspend()
logic. That is, we would automatically flag all non-protm groups as
suspended if the GPU was in PROTM mode at the time the hang happened.

> +
>  	panthor_sched_suspend(ptdev);
>  
>  	/* Stop all groups that might still accept jobs, so we don't get passed

Regards,

Boris

  parent reply	other threads:[~2026-05-06  8:51 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-05 14:05 [PATCH 0/8] drm/panthor: Protected mode support for Mali CSF GPUs Ketil Johnsen
2026-05-05 14:05 ` [PATCH 1/8] dma-heap: Add proper kref handling on dma-buf heaps Ketil Johnsen
2026-05-05 15:20   ` Boris Brezillon
2026-05-05 15:39     ` Maxime Ripard
2026-05-05 16:40       ` Boris Brezillon
2026-05-05 14:05 ` [PATCH 2/8] dma-heap: Provide accessors so that in-kernel drivers can allocate dmabufs from specific heaps Ketil Johnsen
2026-05-05 15:45   ` Boris Brezillon
2026-05-05 14:05 ` [PATCH 3/8] drm/panthor: De-duplicate FW memory section sync Ketil Johnsen
2026-05-05 15:47   ` Boris Brezillon
2026-05-05 14:05 ` [PATCH 4/8] drm/panthor: Add support for protected memory allocation in panthor Ketil Johnsen
2026-05-05 16:15   ` Boris Brezillon
2026-05-06 10:08   ` Maxime Ripard
2026-05-06 10:50     ` Boris Brezillon
2026-05-06 13:12       ` Maxime Ripard
2026-05-06 15:05         ` Boris Brezillon
2026-05-06 12:43     ` Nicolas Frattaroli
2026-05-06 13:31       ` Maxime Ripard
2026-05-06 12:28   ` Nicolas Frattaroli
2026-05-05 14:05 ` [PATCH 5/8] drm/panthor: Minor scheduler refactoring Ketil Johnsen
2026-05-05 16:19   ` Boris Brezillon
2026-05-06 10:33   ` Boris Brezillon
2026-05-05 14:05 ` [PATCH 6/8] drm/panthor: Explicit expansion of locked VM region Ketil Johnsen
2026-05-05 16:32   ` Boris Brezillon
2026-05-06 15:14   ` Nicolas Frattaroli
2026-05-05 14:05 ` [PATCH 7/8] drm/panthor: Add support for entering and exiting protected mode Ketil Johnsen
2026-05-05 17:11   ` Boris Brezillon
2026-05-06  8:51   ` Boris Brezillon [this message]
2026-05-05 14:05 ` [PATCH 8/8] drm/panthor: Expose protected rendering features Ketil Johnsen
2026-05-06  9:14   ` Boris Brezillon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260506105145.6f25181d@fedora \
    --to=boris.brezillon@collabora.com \
    --cc=Brian.Starkey@arm.com \
    --cc=airlied@gmail.com \
    --cc=aliceryhl@google.com \
    --cc=angelogioacchino.delregno@collabora.com \
    --cc=benjamin.gaignard@collabora.com \
    --cc=christian.koenig@amd.com \
    --cc=corbet@lwn.net \
    --cc=daniel.almeida@collabora.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=florent.tomasin@arm.com \
    --cc=jstultz@google.com \
    --cc=ketil.johnsen@arm.com \
    --cc=linaro-mm-sig@lists.linaro.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-media@vger.kernel.org \
    --cc=linux-mediatek@lists.infradead.org \
    --cc=liviu.dudau@arm.com \
    --cc=maarten.lankhorst@linux.intel.com \
    --cc=matthias.bgg@gmail.com \
    --cc=mripard@kernel.org \
    --cc=paul.toadere@arm.com \
    --cc=samuel.percival@arm.com \
    --cc=simona@ffwll.ch \
    --cc=skhan@linuxfoundation.org \
    --cc=steven.price@arm.com \
    --cc=sumit.semwal@linaro.org \
    --cc=tjmercier@google.com \
    --cc=tzimmermann@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox