From: Luben Tuikov <luben.tuikov@amd.com>
To: "Mike Lothian" <mike@fireburn.co.uk>,
"Christian König" <ckoenig.leichtzumerken@gmail.com>
Cc: amd-gfx@lists.freedesktop.org, dri-devel@lists.freedesktop.org,
"Christian König" <christian.koenig@amd.com>
Subject: Re: [PATCH 10/13] drm/amdgpu: use scheduler depenencies for CS
Date: Wed, 21 Dec 2022 10:52:01 -0500 [thread overview]
Message-ID: <2ce4ce81-d345-4e6d-edf4-d3133aece267@amd.com> (raw)
In-Reply-To: <CAHbf0-GPVQ4tRgtOLUkP8TW4T9+XGuQQQ70h-DoW9GhspWCa=w@mail.gmail.com>
On 2022-12-21 10:34, Mike Lothian wrote:
> On Fri, 14 Oct 2022 at 09:47, Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>>
>> Entirely remove the sync obj in the job.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 21 ++++++++++-----------
>> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h | 2 ++
>> drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 9 +--------
>> drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 1 -
>> 4 files changed, 13 insertions(+), 20 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index d45b86bcf7fa..0528c2b1db6e 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -426,7 +426,7 @@ static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p,
>> dma_fence_put(old);
>> }
>>
>> - r = amdgpu_sync_fence(&p->gang_leader->sync, fence);
>> + r = amdgpu_sync_fence(&p->sync, fence);
>> dma_fence_put(fence);
>> if (r)
>> return r;
>> @@ -448,7 +448,7 @@ static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,
>> return r;
>> }
>>
>> - r = amdgpu_sync_fence(&p->gang_leader->sync, fence);
>> + r = amdgpu_sync_fence(&p->sync, fence);
>> if (r)
>> goto error;
>>
>> @@ -1108,7 +1108,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
>> if (r)
>> return r;
>>
>> - r = amdgpu_sync_fence(&job->sync, fpriv->prt_va->last_pt_update);
>> + r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update);
>> if (r)
>> return r;
>>
>> @@ -1119,7 +1119,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
>> if (r)
>> return r;
>>
>> - r = amdgpu_sync_fence(&job->sync, bo_va->last_pt_update);
>> + r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update);
>> if (r)
>> return r;
>> }
>> @@ -1138,7 +1138,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
>> if (r)
>> return r;
>>
>> - r = amdgpu_sync_fence(&job->sync, bo_va->last_pt_update);
>> + r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update);
>> if (r)
>> return r;
>> }
>> @@ -1151,7 +1151,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
>> if (r)
>> return r;
>>
>> - r = amdgpu_sync_fence(&job->sync, vm->last_update);
>> + r = amdgpu_sync_fence(&p->sync, vm->last_update);
>> if (r)
>> return r;
>>
>> @@ -1183,7 +1183,6 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
>> static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
>> {
>> struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>> - struct amdgpu_job *leader = p->gang_leader;
>> struct amdgpu_bo_list_entry *e;
>> unsigned int i;
>> int r;
>> @@ -1195,14 +1194,14 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
>>
>> sync_mode = amdgpu_bo_explicit_sync(bo) ?
>> AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
>> - r = amdgpu_sync_resv(p->adev, &leader->sync, resv, sync_mode,
>> + r = amdgpu_sync_resv(p->adev, &p->sync, resv, sync_mode,
>> &fpriv->vm);
>> if (r)
>> return r;
>> }
>>
>> - for (i = 0; i < p->gang_size - 1; ++i) {
>> - r = amdgpu_sync_clone(&leader->sync, &p->jobs[i]->sync);
>> + for (i = 0; i < p->gang_size; ++i) {
>> + r = amdgpu_sync_push_to_job(&p->sync, p->jobs[i]);
>> if (r)
>> return r;
>> }
>> @@ -1248,7 +1247,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>> struct dma_fence *fence;
>>
>> fence = &p->jobs[i]->base.s_fence->scheduled;
>> - r = amdgpu_sync_fence(&leader->sync, fence);
>> + r = drm_sched_job_add_dependency(&leader->base, fence);
>> if (r)
>> goto error_cleanup;
>> }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
>> index cbaa19b2b8a3..207e801c24ed 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
>> @@ -75,6 +75,8 @@ struct amdgpu_cs_parser {
>>
>> unsigned num_post_deps;
>> struct amdgpu_cs_post_dep *post_deps;
>> +
>> + struct amdgpu_sync sync;
>> };
>>
>> int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> index ba98d65835b4..b8494c3b3b8a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> @@ -106,7 +106,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>> (*job)->base.sched = &adev->rings[0]->sched;
>> (*job)->vm = vm;
>>
>> - amdgpu_sync_create(&(*job)->sync);
>> amdgpu_sync_create(&(*job)->explicit_sync);
>> (*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
>> (*job)->vm_pd_addr = AMDGPU_BO_INVALID_OFFSET;
>> @@ -174,9 +173,7 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
>>
>> drm_sched_job_cleanup(s_job);
>>
>> - amdgpu_sync_free(&job->sync);
>> amdgpu_sync_free(&job->explicit_sync);
>> -
>> dma_fence_put(&job->hw_fence);
>> }
>>
>> @@ -202,7 +199,6 @@ void amdgpu_job_free(struct amdgpu_job *job)
>> drm_sched_job_cleanup(&job->base);
>>
>> amdgpu_job_free_resources(job);
>> - amdgpu_sync_free(&job->sync);
>> amdgpu_sync_free(&job->explicit_sync);
>> if (job->gang_submit != &job->base.s_fence->scheduled)
>> dma_fence_put(job->gang_submit);
>> @@ -246,10 +242,9 @@ amdgpu_job_dependency(struct drm_sched_job *sched_job,
>> {
>> struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->rq->sched);
>> struct amdgpu_job *job = to_amdgpu_job(sched_job);
>> - struct dma_fence *fence;
>> + struct dma_fence *fence = NULL;
>> int r;
>>
>> - fence = amdgpu_sync_get_fence(&job->sync);
>> while (fence == NULL && job->vm && !job->vmid) {
>> r = amdgpu_vmid_grab(job->vm, ring, job, &fence);
>> if (r)
>> @@ -273,8 +268,6 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
>> job = to_amdgpu_job(sched_job);
>> finished = &job->base.s_fence->finished;
>>
>> - BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL));
>> -
>> trace_amdgpu_sched_run_job(job);
>>
>> /* Skip job if VRAM is lost and never resubmit gangs */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
>> index 9c10b9bd0084..6558839fda03 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
>> @@ -47,7 +47,6 @@ enum amdgpu_ib_pool_type;
>> struct amdgpu_job {
>> struct drm_sched_job base;
>> struct amdgpu_vm *vm;
>> - struct amdgpu_sync sync;
>> struct amdgpu_sync explicit_sync;
>> struct dma_fence hw_fence;
>> struct dma_fence *gang_submit;
>> --
>> 2.25.1
>>
>
> Hi, I've been testing the Mesh shader benchmark in GravityMark and
> I've bisected my laptop freezing up and rebooting, to this commit
>
> 1728baa7e4e60054bf13dd9b1212d133cbd53b3f is the first bad commit
> commit 1728baa7e4e60054bf13dd9b1212d133cbd53b3f
> Author: Christian König <christian.koenig@amd.com>
> Date: Thu Sep 29 14:04:01 2022 +0200
>
> drm/amdgpu: use scheduler dependencies for CS
>
> Entirely remove the sync obj in the job.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Reviewed-by: Luben Tuikov <luben.tuikov@amd.com>
> Link: https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpatchwork.freedesktop.org%2Fpatch%2Fmsgid%2F20221014084641.128280-11-christian.koenig%40amd.com&data=05%7C01%7Cluben.tuikov%40amd.com%7C89490e3fad4843fd789308dae368e10a%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638072336848708258%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&sdata=yinQfgx3pcqZjCzafxTysYlhb4RUwJN8t8cb2VjOOes%3D&reserved=0
>
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 21 ++++++++++-----------
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h | 2 ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 9 +--------
> drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 1 -
> 4 files changed, 13 insertions(+), 20 deletions(-)
>
> This is on a prime system 6800M with the latest mesa
>
> I tried reverting this patch however it didn't revert cleanly, and my
> attempt doesn't work and only partially freezes up the system
>
> Would you like me to open a bug for this on
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgitlab.freedesktop.org%2Fdrm%2Famd%2F-%2Fissues&data=05%7C01%7Cluben.tuikov%40amd.com%7C89490e3fad4843fd789308dae368e10a%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638072336848708258%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&sdata=M8d6vBXgByuQCRm9844a9jYtIDfuDy7efv3NM03Bmho%3D&reserved=0 ?
>
Hi Mike,
Could you try this patch:
https://lore.kernel.org/all/20221219104718.21677-1-christian.koenig@amd.com/
Regards,
Luben
next prev parent reply other threads:[~2022-12-21 15:52 UTC|newest]
Thread overview: 46+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-14 8:46 Fixes for scheduler hang when killing a process Christian König
2022-10-14 8:46 ` [PATCH 01/13] drm/scheduler: fix fence ref counting Christian König
2022-10-25 3:23 ` Luna Nova
2022-10-25 11:35 ` Christian König
2022-10-14 8:46 ` [PATCH 02/13] drm/scheduler: add drm_sched_job_add_resv_dependencies Christian König
2022-10-14 8:46 ` [PATCH 03/13] drm/amdgpu: use drm_sched_job_add_resv_dependencies for moves Christian König
2022-10-14 8:46 ` [PATCH 04/13] drm/amdgpu: drop the fence argument from amdgpu_vmid_grab Christian König
2022-10-14 8:46 ` [PATCH 05/13] drm/amdgpu: drop amdgpu_sync " Christian König
2022-10-23 1:25 ` Luben Tuikov
2022-10-24 10:54 ` Christian König
2022-10-14 8:46 ` [PATCH 06/13] drm/amdgpu: cleanup scheduler job initialization Christian König
2022-10-23 1:50 ` Luben Tuikov
2022-10-14 8:46 ` [PATCH 07/13] drm/amdgpu: move explicit sync check into the CS Christian König
2022-10-14 8:46 ` [PATCH 08/13] drm/amdgpu: use scheduler depenencies for VM updates Christian König
2022-10-24 5:50 ` Luben Tuikov
2022-10-14 8:46 ` [PATCH 09/13] drm/amdgpu: use scheduler depenencies for UVD msgs Christian König
2022-10-24 5:53 ` Luben Tuikov
2022-10-14 8:46 ` [PATCH 10/13] drm/amdgpu: use scheduler depenencies for CS Christian König
2022-10-24 5:55 ` Luben Tuikov
2022-12-21 15:34 ` Mike Lothian
2022-12-21 15:47 ` Mike Lothian
2022-12-21 15:52 ` Luben Tuikov [this message]
2022-12-21 15:55 ` Mike Lothian
2022-10-14 8:46 ` [PATCH 11/13] drm/scheduler: remove drm_sched_dependency_optimized Christian König
2022-10-14 8:46 ` [PATCH 12/13] drm/scheduler: rework entity flush, kill and fini Christian König
2022-11-17 2:36 ` Dmitry Osipenko
2022-11-17 9:53 ` Christian König
2022-11-17 12:47 ` Dmitry Osipenko
2022-11-17 12:55 ` Christian König
2022-11-17 12:59 ` Dmitry Osipenko
2022-11-17 13:00 ` Dmitry Osipenko
2022-11-17 13:11 ` Christian König
2022-11-17 14:41 ` Dmitry Osipenko
2022-11-17 15:09 ` Christian König
2022-11-17 15:11 ` Dmitry Osipenko
2022-12-28 16:27 ` Rob Clark
2022-12-28 16:52 ` Rob Clark
2023-01-01 18:29 ` youling257
2023-01-02 9:24 ` Dmitry Osipenko
2023-01-02 14:17 ` youling 257
2023-01-02 15:08 ` Dmitry Osipenko
2022-10-14 8:46 ` [PATCH 13/13] drm/scheduler: rename dependency callback into prepare_job Christian König
2022-10-23 1:35 ` Fixes for scheduler hang when killing a process Luben Tuikov
2022-10-24 7:00 ` Luben Tuikov
-- strict thread matches above, loose matches on Subject: below --
2022-12-21 21:12 [PATCH 10/13] drm/amdgpu: use scheduler depenencies for CS Bert Karwatzki
2022-12-21 21:59 Bert Karwatzki
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=2ce4ce81-d345-4e6d-edf4-d3133aece267@amd.com \
--to=luben.tuikov@amd.com \
--cc=amd-gfx@lists.freedesktop.org \
--cc=christian.koenig@amd.com \
--cc=ckoenig.leichtzumerken@gmail.com \
--cc=dri-devel@lists.freedesktop.org \
--cc=mike@fireburn.co.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox