public inbox for amd-gfx@lists.freedesktop.org
 help / color / mirror / Atom feed
From: "Christian König" <ckoenig.leichtzumerken@gmail.com>
To: "Zhou, David(ChunMing)" <David1.Zhou@amd.com>,
	"Grodzovsky, Andrey" <Andrey.Grodzovsky@amd.com>,
	"dri-devel@lists.freedesktop.org"
	<dri-devel@lists.freedesktop.org>,
	"amd-gfx@lists.freedesktop.org" <amd-gfx@lists.freedesktop.org>,
	"eric@anholt.net" <eric@anholt.net>,
	"etnaviv@lists.freedesktop.org" <etnaviv@lists.freedesktop.org>
Cc: "Liu, Monk" <Monk.Liu@amd.com>
Subject: Re: [PATCH 2/2] drm/sched: Rework HW fence processing.
Date: Fri, 7 Dec 2018 09:19:51 +0100	[thread overview]
Message-ID: <e05f4102-1995-ea6b-caf5-4598414726fc@gmail.com> (raw)
In-Reply-To: <BY1PR12MB05028DBBDB411053EE6B01EDB4AA0@BY1PR12MB0502.namprd12.prod.outlook.com>

Am 07.12.18 um 04:18 schrieb Zhou, David(ChunMing):
>
>> -----Original Message-----
>> From: dri-devel <dri-devel-bounces@lists.freedesktop.org> On Behalf Of
>> Andrey Grodzovsky
>> Sent: Friday, December 07, 2018 1:41 AM
>> To: dri-devel@lists.freedesktop.org; amd-gfx@lists.freedesktop.org;
>> ckoenig.leichtzumerken@gmail.com; eric@anholt.net;
>> etnaviv@lists.freedesktop.org
>> Cc: Liu, Monk <Monk.Liu@amd.com>
>> Subject: [PATCH 2/2] drm/sched: Rework HW fence processing.
>>
>> Expedite job deletion from ring mirror list to the HW fence signal callback
>> instead from finish_work, together with waiting for all such fences to signal in
>> drm_sched_stop we garantee that already signaled job will not be processed
>> twice.
>> Remove the sched finish fence callback and just submit finish_work directly
>> from the HW fence callback.
>>
>> Suggested-by: Christian Koenig <Christian.Koenig@amd.com>
>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>> ---
>>   drivers/gpu/drm/scheduler/sched_fence.c |  4 +++-
>> drivers/gpu/drm/scheduler/sched_main.c  | 39 ++++++++++++++++----------
>> -------
>>   include/drm/gpu_scheduler.h             | 10 +++++++--
>>   3 files changed, 30 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c
>> b/drivers/gpu/drm/scheduler/sched_fence.c
>> index d8d2dff..e62c239 100644
>> --- a/drivers/gpu/drm/scheduler/sched_fence.c
>> +++ b/drivers/gpu/drm/scheduler/sched_fence.c
>> @@ -151,7 +151,8 @@ struct drm_sched_fence
>> *to_drm_sched_fence(struct dma_fence *f)
>> EXPORT_SYMBOL(to_drm_sched_fence);
>>
>>   struct drm_sched_fence *drm_sched_fence_create(struct
>> drm_sched_entity *entity,
>> -					       void *owner)
>> +					       void *owner,
>> +					       struct drm_sched_job *s_job)
>>   {
>>   	struct drm_sched_fence *fence = NULL;
>>   	unsigned seq;
>> @@ -163,6 +164,7 @@ struct drm_sched_fence
>> *drm_sched_fence_create(struct drm_sched_entity *entity,
>>   	fence->owner = owner;
>>   	fence->sched = entity->rq->sched;
>>   	spin_lock_init(&fence->lock);
>> +	fence->s_job = s_job;
>>
>>   	seq = atomic_inc_return(&entity->fence_seq);
>>   	dma_fence_init(&fence->scheduled,
>> &drm_sched_fence_ops_scheduled, diff --git
>> a/drivers/gpu/drm/scheduler/sched_main.c
>> b/drivers/gpu/drm/scheduler/sched_main.c
>> index 8fb7f86..2860037 100644
>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>> @@ -284,31 +284,17 @@ static void drm_sched_job_finish(struct
>> work_struct *work)
>>   	cancel_delayed_work_sync(&sched->work_tdr);
>>
>>   	spin_lock_irqsave(&sched->job_list_lock, flags);
>> -	/* remove job from ring_mirror_list */
>> -	list_del_init(&s_job->node);
>> -	/* queue TDR for next job */
>>   	drm_sched_start_timeout(sched);
>>   	spin_unlock_irqrestore(&sched->job_list_lock, flags);
>>
>>   	sched->ops->free_job(s_job);
>>   }
>>
>> -static void drm_sched_job_finish_cb(struct dma_fence *f,
>> -				    struct dma_fence_cb *cb)
>> -{
>> -	struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
>> -						 finish_cb);
>> -	schedule_work(&job->finish_work);
>> -}
>> -
>>   static void drm_sched_job_begin(struct drm_sched_job *s_job)  {
>>   	struct drm_gpu_scheduler *sched = s_job->sched;
>>   	unsigned long flags;
>>
>> -	dma_fence_add_callback(&s_job->s_fence->finished, &s_job-
>>> finish_cb,
>> -			       drm_sched_job_finish_cb);
>> -
>>   	spin_lock_irqsave(&sched->job_list_lock, flags);
>>   	list_add_tail(&s_job->node, &sched->ring_mirror_list);
>>   	drm_sched_start_timeout(sched);
>> @@ -418,13 +404,17 @@ void drm_sched_start(struct drm_gpu_scheduler
>> *sched, bool unpark_only)  {
>>   	struct drm_sched_job *s_job, *tmp;
>>   	bool found_guilty = false;
>> -	unsigned long flags;
>>   	int r;
>>
>>   	if (unpark_only)
>>   		goto unpark;
>>
>> -	spin_lock_irqsave(&sched->job_list_lock, flags);
>> +	/*
>> +	 * Locking the list is not required here as the sched thread is parked
>> +	 * so no new jobs are being pushed in to HW and in drm_sched_stop
>> we
>> +	 * flushed any in flight jobs who didn't signal yet. Also concurrent
>> +	 * GPU recovers can't run in parallel.
>> +	 */
>>   	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list,
>> node) {
>>   		struct drm_sched_fence *s_fence = s_job->s_fence;
>>   		struct dma_fence *fence;
>> @@ -453,7 +443,6 @@ void drm_sched_start(struct drm_gpu_scheduler
>> *sched, bool unpark_only)
>>   	}
>>
>>   	drm_sched_start_timeout(sched);
>> -	spin_unlock_irqrestore(&sched->job_list_lock, flags);
>>
>>   unpark:
>>   	kthread_unpark(sched->thread);
>> @@ -505,7 +494,7 @@ int drm_sched_job_init(struct drm_sched_job *job,
>>   	job->sched = sched;
>>   	job->entity = entity;
>>   	job->s_priority = entity->rq - sched->sched_rq;
>> -	job->s_fence = drm_sched_fence_create(entity, owner);
>> +	job->s_fence = drm_sched_fence_create(entity, owner, job);
>>   	if (!job->s_fence)
>>   		return -ENOMEM;
>>   	job->id = atomic64_inc_return(&sched->job_id_count);
>> @@ -593,15 +582,25 @@ static void drm_sched_process_job(struct
>> dma_fence *f, struct dma_fence_cb *cb)
>>   	struct drm_sched_fence *s_fence =
>>   		container_of(cb, struct drm_sched_fence, cb);
>>   	struct drm_gpu_scheduler *sched = s_fence->sched;
>> +	struct drm_sched_job *s_job = s_fence->s_job;
>
> Seems we are back to original, Christian argued s_fence and s_job are with different lifetime , Do their lifetime become same now?

No, that still doesn't work. The scheduler fence lives much longer than 
the job, so we would have a dangling pointer here.

The real question is why we actually need that? I mean we just need to 
move the callback structure into the job, don't we?

Christian.

>
> -David
>
>> +	unsigned long flags;
>> +
>> +	cancel_delayed_work(&sched->work_tdr);
>>
>> -	dma_fence_get(&s_fence->finished);
>>   	atomic_dec(&sched->hw_rq_count);
>>   	atomic_dec(&sched->num_jobs);
>> +
>> +	spin_lock_irqsave(&sched->job_list_lock, flags);
>> +	/* remove job from ring_mirror_list */
>> +	list_del_init(&s_job->node);
>> +	spin_unlock_irqrestore(&sched->job_list_lock, flags);
>> +
>>   	drm_sched_fence_finished(s_fence);
>>
>>   	trace_drm_sched_process_job(s_fence);
>> -	dma_fence_put(&s_fence->finished);
>>   	wake_up_interruptible(&sched->wake_up_worker);
>> +
>> +	schedule_work(&s_job->finish_work);
>>   }
>>
>>   /**
>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>> index c94b592..23855c6 100644
>> --- a/include/drm/gpu_scheduler.h
>> +++ b/include/drm/gpu_scheduler.h
>> @@ -115,6 +115,8 @@ struct drm_sched_rq {
>>   	struct drm_sched_entity		*current_entity;
>>   };
>>
>> +struct drm_sched_job;
>> +
>>   /**
>>    * struct drm_sched_fence - fences corresponding to the scheduling of a job.
>>    */
>> @@ -160,6 +162,9 @@ struct drm_sched_fence {
>>            * @owner: job owner for debugging
>>            */
>>   	void				*owner;
>> +
>> +	/* Back pointer to owning job */
>> +	struct drm_sched_job 		*s_job;
>>   };
>>
>>   struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); @@
>> -330,8 +335,9 @@ void drm_sched_entity_set_priority(struct
>> drm_sched_entity *entity,
>>   				   enum drm_sched_priority priority);  bool
>> drm_sched_entity_is_ready(struct drm_sched_entity *entity);
>>
>> -struct drm_sched_fence *drm_sched_fence_create(
>> -	struct drm_sched_entity *s_entity, void *owner);
>> +struct drm_sched_fence *drm_sched_fence_create(struct
>> drm_sched_entity *s_entity,
>> +					       void *owner,
>> +					       struct drm_sched_job *s_job);
>>   void drm_sched_fence_scheduled(struct drm_sched_fence *fence);  void
>> drm_sched_fence_finished(struct drm_sched_fence *fence);
>>
>> --
>> 2.7.4
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

  reply	other threads:[~2018-12-07  8:19 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-06 17:41 [PATCH 1/2] drm/sched: Refactor ring mirror list handling Andrey Grodzovsky
2018-12-06 17:41 ` [PATCH 2/2] drm/sched: Rework HW fence processing Andrey Grodzovsky
2018-12-06 18:12   ` Grodzovsky, Andrey
2018-12-07  3:18   ` Zhou, David(ChunMing)
2018-12-07  8:19     ` Christian König [this message]
2018-12-07 15:22       ` Grodzovsky, Andrey
2018-12-07 15:26         ` Koenig, Christian
     [not found] ` <1544118074-24910-1-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
2018-12-06 18:33   ` [PATCH 1/2] drm/sched: Refactor ring mirror list handling Christian König
     [not found]     ` <ebd8c8bf-cd6d-b04f-b682-6cac4723ec40-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2018-12-06 21:27       ` Grodzovsky, Andrey

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e05f4102-1995-ea6b-caf5-4598414726fc@gmail.com \
    --to=ckoenig.leichtzumerken@gmail.com \
    --cc=Andrey.Grodzovsky@amd.com \
    --cc=David1.Zhou@amd.com \
    --cc=Monk.Liu@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=christian.koenig@amd.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=eric@anholt.net \
    --cc=etnaviv@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox