* [PATCH] drm/amdgpu:fix world switch hang
@ 2017-06-06 9:27 Monk Liu
[not found] ` <1496741253-24964-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
0 siblings, 1 reply; 3+ messages in thread
From: Monk Liu @ 2017-06-06 9:27 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu
for SR-IOV, we must keep the pipeline-sync in the protection
of COND_EXEC, otherwise the command consumed by CPG is not
consistent when world switch triggerd, e.g.:
world switch hit and the IB frame is skipped so the fence
won't signal, thus CP will jump to the next DMAframe's pipeline-sync
command, and it will make CP hang foever.
after pipelin-sync moved into COND_EXEC the consistency can be
guaranteed
Change-Id: I3059535745e2e1215343299b17f27fe081fc01bc
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 5 +++--
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 +++++--
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +-
3 files changed, 9 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 025e03b..947bd34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -130,6 +130,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
unsigned i;
int r = 0;
+ bool need_pipe_sync = false;
if (num_ibs == 0)
return -EINVAL;
@@ -165,7 +166,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
if (ring->funcs->emit_pipeline_sync && job &&
((tmp = amdgpu_sync_get_fence(&job->sched_sync)) ||
amdgpu_vm_need_pipeline_sync(ring, job))) {
- amdgpu_ring_emit_pipeline_sync(ring);
+ need_pipe_sync = true;
fence_put(tmp);
}
@@ -173,7 +174,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
ring->funcs->insert_start(ring);
if (vm) {
- r = amdgpu_vm_flush(ring, job);
+ r = amdgpu_vm_flush(ring, job, need_pipe_sync);
if (r) {
amdgpu_ring_undo(ring);
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b065f8b..6d62e34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -747,7 +747,7 @@ static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
*
* Emit a VM flush when it is necessary.
*/
-int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
+int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync)
{
struct amdgpu_device *adev = ring->adev;
unsigned vmhub = ring->funcs->vmhub;
@@ -770,12 +770,15 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
vm_flush_needed = true;
}
- if (!vm_flush_needed && !gds_switch_needed)
+ if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
return 0;
if (ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring);
+ if (need_pipe_sync)
+ amdgpu_ring_emit_pipeline_sync(ring);
+
if (ring->funcs->emit_vm_flush && vm_flush_needed) {
struct fence *fence;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 9aa00d9..cc0009f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -222,7 +222,7 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
struct amdgpu_sync *sync, struct fence *fence,
struct amdgpu_job *job);
-int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job);
+int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
unsigned vmid);
void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev);
--
2.7.4
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 3+ messages in thread[parent not found: <1496741253-24964-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>]
* Re: [PATCH] drm/amdgpu:fix world switch hang [not found] ` <1496741253-24964-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org> @ 2017-06-06 12:01 ` Christian König [not found] ` <f4d58615-318c-a093-e31a-2fa8995a6a85-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> 0 siblings, 1 reply; 3+ messages in thread From: Christian König @ 2017-06-06 12:01 UTC (permalink / raw) To: Monk Liu, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW Am 06.06.2017 um 11:27 schrieb Monk Liu: > for SR-IOV, we must keep the pipeline-sync in the protection > of COND_EXEC, otherwise the command consumed by CPG is not > consistent when world switch triggerd, e.g.: > > world switch hit and the IB frame is skipped so the fence > won't signal, thus CP will jump to the next DMAframe's pipeline-sync > command, and it will make CP hang foever. > > after pipelin-sync moved into COND_EXEC the consistency can be > guaranteed > > Change-Id: I3059535745e2e1215343299b17f27fe081fc01bc > Signed-off-by: Monk Liu <Monk.Liu@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 5 +++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 +++++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +- > 3 files changed, 9 insertions(+), 5 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > index 025e03b..947bd34 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > @@ -130,6 +130,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, > > unsigned i; > int r = 0; > + bool need_pipe_sync = false; > > if (num_ibs == 0) > return -EINVAL; > @@ -165,7 +166,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, > if (ring->funcs->emit_pipeline_sync && job && > ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) || > amdgpu_vm_need_pipeline_sync(ring, job))) { > - amdgpu_ring_emit_pipeline_sync(ring); > + need_pipe_sync = true; > fence_put(tmp); > } > > @@ -173,7 +174,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, > ring->funcs->insert_start(ring); > > if (vm) { > - r = amdgpu_vm_flush(ring, job); > + r = amdgpu_vm_flush(ring, job, need_pipe_sync); > if (r) { > amdgpu_ring_undo(ring); > return r; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index b065f8b..6d62e34 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -747,7 +747,7 @@ static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev) > * > * Emit a VM flush when it is necessary. > */ > -int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) > +int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync) > { > struct amdgpu_device *adev = ring->adev; > unsigned vmhub = ring->funcs->vmhub; > @@ -770,12 +770,15 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) > vm_flush_needed = true; > } > > - if (!vm_flush_needed && !gds_switch_needed) > + if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) > return 0; > > if (ring->funcs->init_cond_exec) > patch_offset = amdgpu_ring_init_cond_exec(ring); > > + if (need_pipe_sync) > + amdgpu_ring_emit_pipeline_sync(ring); > + > if (ring->funcs->emit_vm_flush && vm_flush_needed) { > struct fence *fence; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > index 9aa00d9..cc0009f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > @@ -222,7 +222,7 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, > int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, > struct amdgpu_sync *sync, struct fence *fence, > struct amdgpu_job *job); > -int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job); > +int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync); > void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub, > unsigned vmid); > void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev); _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 3+ messages in thread
[parent not found: <f4d58615-318c-a093-e31a-2fa8995a6a85-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>]
* Re: [PATCH] drm/amdgpu:fix world switch hang [not found] ` <f4d58615-318c-a093-e31a-2fa8995a6a85-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org> @ 2017-06-07 1:52 ` zhoucm1 0 siblings, 0 replies; 3+ messages in thread From: zhoucm1 @ 2017-06-07 1:52 UTC (permalink / raw) To: Christian König, Monk Liu, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW On 2017年06月06日 20:01, Christian König wrote: > Am 06.06.2017 um 11:27 schrieb Monk Liu: >> for SR-IOV, we must keep the pipeline-sync in the protection >> of COND_EXEC, otherwise the command consumed by CPG is not >> consistent when world switch triggerd, e.g.: >> >> world switch hit and the IB frame is skipped so the fence >> won't signal, thus CP will jump to the next DMAframe's pipeline-sync >> command, and it will make CP hang foever. >> >> after pipelin-sync moved into COND_EXEC the consistency can be >> guaranteed >> >> Change-Id: I3059535745e2e1215343299b17f27fe081fc01bc >> Signed-off-by: Monk Liu <Monk.Liu@amd.com> > > Reviewed-by: Christian König <christian.koenig@amd.com> > >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 5 +++-- >> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 +++++-- >> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +- >> 3 files changed, 9 insertions(+), 5 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c >> index 025e03b..947bd34 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c >> @@ -130,6 +130,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, >> unsigned num_ibs, >> unsigned i; >> int r = 0; >> + bool need_pipe_sync = false; >> if (num_ibs == 0) >> return -EINVAL; >> @@ -165,7 +166,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, >> unsigned num_ibs, >> if (ring->funcs->emit_pipeline_sync && job && >> ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) || >> amdgpu_vm_need_pipeline_sync(ring, job))) { >> - amdgpu_ring_emit_pipeline_sync(ring); >> + need_pipe_sync = true; >> fence_put(tmp); >> } >> @@ -173,7 +174,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring >> *ring, unsigned num_ibs, >> ring->funcs->insert_start(ring); >> if (vm) { >> - r = amdgpu_vm_flush(ring, job); >> + r = amdgpu_vm_flush(ring, job, need_pipe_sync); There is a potential case lost, if no-vm job needs pipeline_sync, then which will be lost. Regards, David Zhou >> if (r) { >> amdgpu_ring_undo(ring); >> return r; >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> index b065f8b..6d62e34 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> @@ -747,7 +747,7 @@ static bool amdgpu_vm_is_large_bar(struct >> amdgpu_device *adev) >> * >> * Emit a VM flush when it is necessary. >> */ >> -int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) >> +int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job >> *job, bool need_pipe_sync) >> { >> struct amdgpu_device *adev = ring->adev; >> unsigned vmhub = ring->funcs->vmhub; >> @@ -770,12 +770,15 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, >> struct amdgpu_job *job) >> vm_flush_needed = true; >> } >> - if (!vm_flush_needed && !gds_switch_needed) >> + if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) >> return 0; >> if (ring->funcs->init_cond_exec) >> patch_offset = amdgpu_ring_init_cond_exec(ring); >> + if (need_pipe_sync) >> + amdgpu_ring_emit_pipeline_sync(ring); >> + >> if (ring->funcs->emit_vm_flush && vm_flush_needed) { >> struct fence *fence; >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >> index 9aa00d9..cc0009f 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >> @@ -222,7 +222,7 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, >> int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, >> struct amdgpu_sync *sync, struct fence *fence, >> struct amdgpu_job *job); >> -int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job); >> +int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job >> *job, bool need_pipe_sync); >> void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub, >> unsigned vmid); >> void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev); > > > _______________________________________________ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2017-06-07 1:52 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-06-06 9:27 [PATCH] drm/amdgpu:fix world switch hang Monk Liu
[not found] ` <1496741253-24964-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2017-06-06 12:01 ` Christian König
[not found] ` <f4d58615-318c-a093-e31a-2fa8995a6a85-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-06-07 1:52 ` zhoucm1
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.