From mboxrd@z Thu Jan 1 00:00:00 1970 From: "j.glisse" Subject: Re: [PATCH 03/10] drm/radeon: rework ring syncing code Date: Thu, 24 May 2012 11:57:36 -0400 Message-ID: <20120524155735.GD3467@gmail.com> References: <1337845754-3718-1-git-send-email-deathsimple@vodafone.de> <1337845754-3718-3-git-send-email-deathsimple@vodafone.de> Mime-Version: 1.0 Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: quoted-printable Return-path: Received: from mail-vb0-f49.google.com (mail-vb0-f49.google.com [209.85.212.49]) by gabe.freedesktop.org (Postfix) with ESMTP id 816349F031 for ; Thu, 24 May 2012 08:57:54 -0700 (PDT) Received: by vbbfo1 with SMTP id fo1so7087640vbb.36 for ; Thu, 24 May 2012 08:57:53 -0700 (PDT) Content-Disposition: inline In-Reply-To: <1337845754-3718-3-git-send-email-deathsimple@vodafone.de> List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: dri-devel-bounces+sf-dri-devel=m.gmane.org@lists.freedesktop.org Errors-To: dri-devel-bounces+sf-dri-devel=m.gmane.org@lists.freedesktop.org To: Christian =?iso-8859-1?Q?K=F6nig?= Cc: dri-devel@lists.freedesktop.org List-Id: dri-devel@lists.freedesktop.org On Thu, May 24, 2012 at 09:49:07AM +0200, Christian K=F6nig wrote: > Move inter ring syncing with semaphores into the > existing ring allocations, with that we need to > lock the ring mutex only once. > = > Signed-off-by: Christian K=F6nig Reviewed-by: Jerome Glisse > --- > drivers/gpu/drm/radeon/evergreen_blit_kms.c | 3 +- > drivers/gpu/drm/radeon/r600.c | 5 +- > drivers/gpu/drm/radeon/r600_blit_kms.c | 24 +++++++-- > drivers/gpu/drm/radeon/radeon.h | 6 +-- > drivers/gpu/drm/radeon/radeon_asic.h | 5 +- > drivers/gpu/drm/radeon/radeon_cs.c | 38 +++----------- > drivers/gpu/drm/radeon/radeon_ring.c | 30 +++++++++-- > drivers/gpu/drm/radeon/radeon_semaphore.c | 71 ++++++++++-----------= ------ > drivers/gpu/drm/radeon/radeon_test.c | 6 +-- > drivers/gpu/drm/radeon/radeon_ttm.c | 20 -------- > 10 files changed, 92 insertions(+), 116 deletions(-) > = > diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/dr= m/radeon/evergreen_blit_kms.c > index 1e96bd4..e512560 100644 > --- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c > +++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c > @@ -622,7 +622,8 @@ int evergreen_blit_init(struct radeon_device *rdev) > rdev->r600_blit.primitives.draw_auto =3D draw_auto; > rdev->r600_blit.primitives.set_default_state =3D set_default_state; > = > - rdev->r600_blit.ring_size_common =3D 55; /* shaders + def state */ > + rdev->r600_blit.ring_size_common =3D 8; /* sync semaphore */ > + rdev->r600_blit.ring_size_common +=3D 55; /* shaders + def state */ > rdev->r600_blit.ring_size_common +=3D 16; /* fence emit for VB IB */ > rdev->r600_blit.ring_size_common +=3D 5; /* done copy */ > rdev->r600_blit.ring_size_common +=3D 16; /* fence emit for done copy */ > diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c > index e5279f9..a8d8c44 100644 > --- a/drivers/gpu/drm/radeon/r600.c > +++ b/drivers/gpu/drm/radeon/r600.c > @@ -2371,15 +2371,16 @@ int r600_copy_blit(struct radeon_device *rdev, > unsigned num_gpu_pages, > struct radeon_fence **fence) > { > + struct radeon_semaphore *sem =3D NULL; > struct radeon_sa_bo *vb =3D NULL; > int r; > = > - r =3D r600_blit_prepare_copy(rdev, num_gpu_pages, &vb); > + r =3D r600_blit_prepare_copy(rdev, num_gpu_pages, fence, &vb, &sem); > if (r) { > return r; > } > r600_kms_blit_copy(rdev, src_offset, dst_offset, num_gpu_pages, vb); > - r600_blit_done_copy(rdev, fence, vb); > + r600_blit_done_copy(rdev, fence, vb, sem); > return 0; > } > = > diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/rad= eon/r600_blit_kms.c > index 02f4eeb..2b8d641 100644 > --- a/drivers/gpu/drm/radeon/r600_blit_kms.c > +++ b/drivers/gpu/drm/radeon/r600_blit_kms.c > @@ -512,7 +512,8 @@ int r600_blit_init(struct radeon_device *rdev) > rdev->r600_blit.primitives.draw_auto =3D draw_auto; > rdev->r600_blit.primitives.set_default_state =3D set_default_state; > = > - rdev->r600_blit.ring_size_common =3D 40; /* shaders + def state */ > + rdev->r600_blit.ring_size_common =3D 8; /* sync semaphore */ > + rdev->r600_blit.ring_size_common +=3D 40; /* shaders + def state */ > rdev->r600_blit.ring_size_common +=3D 5; /* done copy */ > rdev->r600_blit.ring_size_common +=3D 16; /* fence emit for done copy */ > = > @@ -666,7 +667,8 @@ static unsigned r600_blit_create_rect(unsigned num_gp= u_pages, > = > = > int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_= pages, > - struct radeon_sa_bo **vb) > + struct radeon_fence **fence, struct radeon_sa_bo **vb, > + struct radeon_semaphore **sem) > { > struct radeon_ring *ring =3D &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; > int r; > @@ -689,22 +691,37 @@ int r600_blit_prepare_copy(struct radeon_device *rd= ev, unsigned num_gpu_pages, > return r; > } > = > + r =3D radeon_semaphore_create(rdev, sem); > + if (r) { > + radeon_sa_bo_free(rdev, vb, NULL); > + return r; > + } > + > /* calculate number of loops correctly */ > ring_size =3D num_loops * dwords_per_loop; > ring_size +=3D rdev->r600_blit.ring_size_common; > r =3D radeon_ring_lock(rdev, ring, ring_size); > if (r) { > radeon_sa_bo_free(rdev, vb, NULL); > + radeon_semaphore_free(rdev, sem, NULL); > return r; > } > = > + if (radeon_fence_need_sync(*fence, RADEON_RING_TYPE_GFX_INDEX)) { > + radeon_semaphore_sync_rings(rdev, *sem, (*fence)->ring, > + RADEON_RING_TYPE_GFX_INDEX); > + radeon_fence_note_sync(*fence, RADEON_RING_TYPE_GFX_INDEX); > + } else { > + radeon_semaphore_free(rdev, sem, NULL); > + } > + > rdev->r600_blit.primitives.set_default_state(rdev); > rdev->r600_blit.primitives.set_shaders(rdev); > return 0; > } > = > void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence= **fence, > - struct radeon_sa_bo *vb) > + struct radeon_sa_bo *vb, struct radeon_semaphore *sem) > { > struct radeon_ring *ring =3D &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; > int r; > @@ -717,6 +734,7 @@ void r600_blit_done_copy(struct radeon_device *rdev, = struct radeon_fence **fence > = > radeon_ring_unlock_commit(rdev, ring); > radeon_sa_bo_free(rdev, &vb, *fence); > + radeon_semaphore_free(rdev, &sem, *fence); > } > = > void r600_kms_blit_copy(struct radeon_device *rdev, > diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/rad= eon.h > index 4e232c3..aebaf28 100644 > --- a/drivers/gpu/drm/radeon/radeon.h > +++ b/drivers/gpu/drm/radeon/radeon.h > @@ -465,10 +465,9 @@ void radeon_semaphore_emit_wait(struct radeon_device= *rdev, int ring, > struct radeon_semaphore *semaphore); > int radeon_semaphore_sync_rings(struct radeon_device *rdev, > struct radeon_semaphore *semaphore, > - bool sync_to[RADEON_NUM_RINGS], > - int dst_ring); > + int signaler, int waiter); > void radeon_semaphore_free(struct radeon_device *rdev, > - struct radeon_semaphore *semaphore, > + struct radeon_semaphore **semaphore, > struct radeon_fence *fence); > = > /* > @@ -648,6 +647,7 @@ struct radeon_ib { > struct radeon_fence *fence; > unsigned vm_id; > bool is_const_ib; > + struct radeon_fence *sync_to[RADEON_NUM_RINGS]; > struct radeon_semaphore *semaphore; > }; > = > diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeo= n/radeon_asic.h > index 8cdf075..94c427a 100644 > --- a/drivers/gpu/drm/radeon/radeon_asic.h > +++ b/drivers/gpu/drm/radeon/radeon_asic.h > @@ -363,9 +363,10 @@ int r600_hdmi_buffer_status_changed(struct drm_encod= er *encoder); > void r600_hdmi_update_audio_settings(struct drm_encoder *encoder); > /* r600 blit */ > int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_= pages, > - struct radeon_sa_bo **vb); > + struct radeon_fence **fence, struct radeon_sa_bo **vb, > + struct radeon_semaphore **sem); > void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence= **fence, > - struct radeon_sa_bo *vb); > + struct radeon_sa_bo *vb, struct radeon_semaphore *sem); > void r600_kms_blit_copy(struct radeon_device *rdev, > u64 src_gpu_addr, u64 dst_gpu_addr, > unsigned num_gpu_pages, > diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/= radeon_cs.c > index c7d64a7..d295821 100644 > --- a/drivers/gpu/drm/radeon/radeon_cs.c > +++ b/drivers/gpu/drm/radeon/radeon_cs.c > @@ -115,36 +115,20 @@ static int radeon_cs_get_ring(struct radeon_cs_pars= er *p, u32 ring, s32 priority > return 0; > } > = > -static int radeon_cs_sync_rings(struct radeon_cs_parser *p) > +static void radeon_cs_sync_rings(struct radeon_cs_parser *p) > { > - bool sync_to_ring[RADEON_NUM_RINGS] =3D { }; > - bool need_sync =3D false; > - int i, r; > + int i; > = > for (i =3D 0; i < p->nrelocs; i++) { > - struct radeon_fence *fence; > + struct radeon_fence *a, *b; > = > if (!p->relocs[i].robj || !p->relocs[i].robj->tbo.sync_obj) > continue; > = > - fence =3D p->relocs[i].robj->tbo.sync_obj; > - if (fence->ring !=3D p->ring && !radeon_fence_signaled(fence)) { > - sync_to_ring[fence->ring] =3D true; > - need_sync =3D true; > - } > - } > - > - if (!need_sync) { > - return 0; > - } > - > - r =3D radeon_semaphore_create(p->rdev, &p->ib.semaphore); > - if (r) { > - return r; > + a =3D p->relocs[i].robj->tbo.sync_obj; > + b =3D p->ib.sync_to[a->ring]; > + p->ib.sync_to[a->ring] =3D radeon_fence_later(a, b); > } > - > - return radeon_semaphore_sync_rings(p->rdev, p->ib.semaphore, > - sync_to_ring, p->ring); > } > = > int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) > @@ -365,10 +349,7 @@ static int radeon_cs_ib_chunk(struct radeon_device *= rdev, > DRM_ERROR("Invalid command stream !\n"); > return r; > } > - r =3D radeon_cs_sync_rings(parser); > - if (r) { > - DRM_ERROR("Failed to synchronize rings !\n"); > - } > + radeon_cs_sync_rings(parser); > parser->ib.vm_id =3D 0; > r =3D radeon_ib_schedule(rdev, &parser->ib); > if (r) { > @@ -465,10 +446,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_devic= e *rdev, > if (r) { > goto out; > } > - r =3D radeon_cs_sync_rings(parser); > - if (r) { > - DRM_ERROR("Failed to synchronize rings !\n"); > - } > + radeon_cs_sync_rings(parser); > = > if ((rdev->family >=3D CHIP_TAHITI) && > (parser->chunk_const_ib_idx !=3D -1)) { > diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeo= n/radeon_ring.c > index a0b9970..87f66c5 100644 > --- a/drivers/gpu/drm/radeon/radeon_ring.c > +++ b/drivers/gpu/drm/radeon/radeon_ring.c > @@ -67,7 +67,7 @@ u32 radeon_get_ib_value(struct radeon_cs_parser *p, int= idx) > int radeon_ib_get(struct radeon_device *rdev, int ring, > struct radeon_ib *ib, unsigned size) > { > - int r; > + int i, r; > = > r =3D radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256,= true); > if (r) { > @@ -75,20 +75,26 @@ int radeon_ib_get(struct radeon_device *rdev, int rin= g, > return r; > } > = > + r =3D radeon_semaphore_create(rdev, &ib->semaphore); > + if (r) { > + return r; > + } > + > ib->ring =3D ring; > ib->fence =3D NULL; > ib->ptr =3D radeon_sa_bo_cpu_addr(ib->sa_bo); > ib->gpu_addr =3D radeon_sa_bo_gpu_addr(ib->sa_bo); > ib->vm_id =3D 0; > ib->is_const_ib =3D false; > - ib->semaphore =3D NULL; > + for (i =3D 0; i < RADEON_NUM_RINGS; ++i) > + ib->sync_to[i] =3D NULL; > = > return 0; > } > = > void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib) > { > - radeon_semaphore_free(rdev, ib->semaphore, ib->fence); > + radeon_semaphore_free(rdev, &ib->semaphore, ib->fence); > radeon_sa_bo_free(rdev, &ib->sa_bo, ib->fence); > radeon_fence_unref(&ib->fence); > } > @@ -96,7 +102,8 @@ void radeon_ib_free(struct radeon_device *rdev, struct= radeon_ib *ib) > int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib) > { > struct radeon_ring *ring =3D &rdev->ring[ib->ring]; > - int r =3D 0; > + bool need_sync =3D false; > + int i, r =3D 0; > = > if (!ib->length_dw || !ring->ready) { > /* TODO: Nothings in the ib we should report. */ > @@ -105,11 +112,24 @@ int radeon_ib_schedule(struct radeon_device *rdev, = struct radeon_ib *ib) > } > = > /* 64 dwords should be enough for fence too */ > - r =3D radeon_ring_lock(rdev, ring, 64); > + r =3D radeon_ring_lock(rdev, ring, 64 + RADEON_NUM_RINGS * 8); > if (r) { > dev_err(rdev->dev, "scheduling IB failed (%d).\n", r); > return r; > } > + for (i =3D 0; i < RADEON_NUM_RINGS; ++i) { > + struct radeon_fence *fence =3D ib->sync_to[i]; > + if (radeon_fence_need_sync(fence, ib->ring)) { > + need_sync =3D true; > + radeon_semaphore_sync_rings(rdev, ib->semaphore, > + fence->ring, ib->ring); > + radeon_fence_note_sync(fence, ib->ring); > + } > + } > + /* immediately free semaphore when we don't need to sync */ > + if (!need_sync) { > + radeon_semaphore_free(rdev, &ib->semaphore, NULL); > + } > radeon_ring_ib_execute(rdev, ib->ring, ib); > r =3D radeon_fence_emit(rdev, &ib->fence, ib->ring); > if (r) { > diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/= radeon/radeon_semaphore.c > index e2ace5d..7cc78de 100644 > --- a/drivers/gpu/drm/radeon/radeon_semaphore.c > +++ b/drivers/gpu/drm/radeon/radeon_semaphore.c > @@ -68,70 +68,49 @@ void radeon_semaphore_emit_wait(struct radeon_device = *rdev, int ring, > radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, tr= ue); > } > = > +/* caller must hold ring lock */ > int radeon_semaphore_sync_rings(struct radeon_device *rdev, > struct radeon_semaphore *semaphore, > - bool sync_to[RADEON_NUM_RINGS], > - int dst_ring) > + int signaler, int waiter) > { > - int i =3D 0, r; > + int r; > = > - mutex_lock(&rdev->ring_lock); > - r =3D radeon_ring_alloc(rdev, &rdev->ring[dst_ring], RADEON_NUM_RINGS *= 8); > - if (r) { > - goto error; > + /* no need to signal and wait on the same ring */ > + if (signaler =3D=3D waiter) { > + return 0; > } > = > - for (i =3D 0; i < RADEON_NUM_RINGS; ++i) { > - /* no need to sync to our own or unused rings */ > - if (!sync_to[i] || i =3D=3D dst_ring) > - continue; > - > - /* prevent GPU deadlocks */ > - if (!rdev->ring[i].ready) { > - dev_err(rdev->dev, "Trying to sync to a disabled ring!"); > - r =3D -EINVAL; > - goto error; > - } > - > - r =3D radeon_ring_alloc(rdev, &rdev->ring[i], 8); > - if (r) { > - goto error; > - } > - > - radeon_semaphore_emit_signal(rdev, i, semaphore); > - radeon_semaphore_emit_wait(rdev, dst_ring, semaphore); > + /* prevent GPU deadlocks */ > + if (!rdev->ring[signaler].ready) { > + dev_err(rdev->dev, "Trying to sync to a disabled ring!"); > + return -EINVAL; > + } > = > - radeon_ring_commit(rdev, &rdev->ring[i]); > + r =3D radeon_ring_alloc(rdev, &rdev->ring[signaler], 8); > + if (r) { > + return r; > } > + radeon_semaphore_emit_signal(rdev, signaler, semaphore); > + radeon_ring_commit(rdev, &rdev->ring[signaler]); > = > - radeon_ring_commit(rdev, &rdev->ring[dst_ring]); > - mutex_unlock(&rdev->ring_lock); > + /* we assume caller has already allocated space on waiters ring */ > + radeon_semaphore_emit_wait(rdev, waiter, semaphore); > = > return 0; > - > -error: > - /* unlock all locks taken so far */ > - for (--i; i >=3D 0; --i) { > - if (sync_to[i] || i =3D=3D dst_ring) { > - radeon_ring_undo(&rdev->ring[i]); > - } > - } > - radeon_ring_undo(&rdev->ring[dst_ring]); > - mutex_unlock(&rdev->ring_lock); > - return r; > } > = > void radeon_semaphore_free(struct radeon_device *rdev, > - struct radeon_semaphore *semaphore, > + struct radeon_semaphore **semaphore, > struct radeon_fence *fence) > { > - if (semaphore =3D=3D NULL) { > + if (semaphore =3D=3D NULL || *semaphore =3D=3D NULL) { > return; > } > - if (semaphore->waiters > 0) { > + if ((*semaphore)->waiters > 0) { > dev_err(rdev->dev, "semaphore %p has more waiters than signalers," > - " hardware lockup imminent!\n", semaphore); > + " hardware lockup imminent!\n", *semaphore); > } > - radeon_sa_bo_free(rdev, &semaphore->sa_bo, fence); > - kfree(semaphore); > + radeon_sa_bo_free(rdev, &(*semaphore)->sa_bo, fence); > + kfree(*semaphore); > + *semaphore =3D NULL; > } > diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeo= n/radeon_test.c > index 47e1535..a94f66f 100644 > --- a/drivers/gpu/drm/radeon/radeon_test.c > +++ b/drivers/gpu/drm/radeon/radeon_test.c > @@ -303,8 +303,7 @@ void radeon_test_ring_sync(struct radeon_device *rdev, > } > = > out_cleanup: > - if (semaphore) > - radeon_semaphore_free(rdev, semaphore, NULL); > + radeon_semaphore_free(rdev, &semaphore, NULL); > = > if (fence1) > radeon_fence_unref(&fence1); > @@ -422,8 +421,7 @@ void radeon_test_ring_sync2(struct radeon_device *rde= v, > } > = > out_cleanup: > - if (semaphore) > - radeon_semaphore_free(rdev, semaphore, NULL); > + radeon_semaphore_free(rdev, &semaphore, NULL); > = > if (fenceA) > radeon_fence_unref(&fenceA); > diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon= /radeon_ttm.c > index 2d36bdd..c43035c 100644 > --- a/drivers/gpu/drm/radeon/radeon_ttm.c > +++ b/drivers/gpu/drm/radeon/radeon_ttm.c > @@ -223,7 +223,6 @@ static int radeon_move_blit(struct ttm_buffer_object = *bo, > struct radeon_device *rdev; > uint64_t old_start, new_start; > struct radeon_fence *fence; > - struct radeon_semaphore *sem =3D NULL; > int r, ridx; > = > rdev =3D radeon_get_rdev(bo->bdev); > @@ -262,31 +261,12 @@ static int radeon_move_blit(struct ttm_buffer_objec= t *bo, > = > /* sync other rings */ > fence =3D bo->sync_obj; > - if (fence && fence->ring !=3D ridx > - && !radeon_fence_signaled(fence)) { > - bool sync_to_ring[RADEON_NUM_RINGS] =3D { }; > - sync_to_ring[fence->ring] =3D true; > - > - r =3D radeon_semaphore_create(rdev, &sem); > - if (r) { > - return r; > - } > - > - r =3D radeon_semaphore_sync_rings(rdev, sem, sync_to_ring, ridx); > - if (r) { > - radeon_semaphore_free(rdev, sem, NULL); > - return r; > - } > - } > - > - fence =3D NULL; > r =3D radeon_copy(rdev, old_start, new_start, > new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages= */ > &fence); > /* FIXME: handle copy error */ > r =3D ttm_bo_move_accel_cleanup(bo, (void *)fence, NULL, > evict, no_wait_reserve, no_wait_gpu, new_mem); > - radeon_semaphore_free(rdev, sem, fence); > radeon_fence_unref(&fence); > return r; > } > -- = > 1.7.9.5 > =