From mboxrd@z Thu Jan 1 00:00:00 1970 From: Jerome Glisse Subject: Re: [PATCH 02/10] drm/radeon: UVD bringup v7 Date: Wed, 3 Apr 2013 10:53:24 -0400 Message-ID: <20130403145323.GC2010@gmail.com> References: <1364944719-5175-1-git-send-email-deathsimple@vodafone.de> <1364944719-5175-3-git-send-email-deathsimple@vodafone.de> Mime-Version: 1.0 Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: quoted-printable Return-path: Received: from mail-pb0-f43.google.com (mail-pb0-f43.google.com [209.85.160.43]) by gabe.freedesktop.org (Postfix) with ESMTP id 9C7A0E5CE5 for ; Wed, 3 Apr 2013 07:56:45 -0700 (PDT) Received: by mail-pb0-f43.google.com with SMTP id md4so899268pbc.30 for ; Wed, 03 Apr 2013 07:56:45 -0700 (PDT) Content-Disposition: inline In-Reply-To: <1364944719-5175-3-git-send-email-deathsimple@vodafone.de> List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: dri-devel-bounces+sf-dri-devel=m.gmane.org@lists.freedesktop.org Errors-To: dri-devel-bounces+sf-dri-devel=m.gmane.org@lists.freedesktop.org To: Christian =?iso-8859-1?Q?K=F6nig?= Cc: dri-devel@lists.freedesktop.org List-Id: dri-devel@lists.freedesktop.org On Wed, Apr 03, 2013 at 01:18:31AM +0200, Christian K=F6nig wrote: > Just everything needed to decode videos using UVD. > = > v6: just all the bugfixes and support for R7xx-SI merged in one patch > v7: UVD_CGC_GATE is a write only register, lockup detection fix > = > Signed-off-by: Christian K=F6nig > --- > drivers/gpu/drm/radeon/Makefile | 2 +- > drivers/gpu/drm/radeon/evergreen.c | 40 ++- > drivers/gpu/drm/radeon/evergreend.h | 7 + > drivers/gpu/drm/radeon/ni.c | 49 +++ > drivers/gpu/drm/radeon/nid.h | 9 + > drivers/gpu/drm/radeon/r600.c | 291 ++++++++++++++++++ > drivers/gpu/drm/radeon/r600d.h | 61 ++++ > drivers/gpu/drm/radeon/radeon.h | 47 ++- > drivers/gpu/drm/radeon/radeon_asic.c | 63 ++++ > drivers/gpu/drm/radeon/radeon_asic.h | 19 ++ > drivers/gpu/drm/radeon/radeon_cs.c | 27 +- > drivers/gpu/drm/radeon/radeon_fence.c | 23 +- > drivers/gpu/drm/radeon/radeon_kms.c | 1 + > drivers/gpu/drm/radeon/radeon_object.c | 12 +- > drivers/gpu/drm/radeon/radeon_object.h | 2 +- > drivers/gpu/drm/radeon/radeon_ring.c | 24 +- > drivers/gpu/drm/radeon/radeon_test.c | 72 +++-- > drivers/gpu/drm/radeon/radeon_uvd.c | 521 ++++++++++++++++++++++++++= ++++++ > drivers/gpu/drm/radeon/rv770.c | 134 ++++++++ > drivers/gpu/drm/radeon/rv770d.h | 14 + > drivers/gpu/drm/radeon/si.c | 32 ++ > drivers/gpu/drm/radeon/sid.h | 6 + > include/uapi/drm/radeon_drm.h | 1 + > 23 files changed, 1400 insertions(+), 57 deletions(-) > create mode 100644 drivers/gpu/drm/radeon/radeon_uvd.c > = > diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Mak= efile > index bf17252..86c5e36 100644 > --- a/drivers/gpu/drm/radeon/Makefile > +++ b/drivers/gpu/drm/radeon/Makefile > @@ -76,7 +76,7 @@ radeon-y +=3D radeon_device.o radeon_asic.o radeon_kms.= o \ > evergreen.o evergreen_cs.o evergreen_blit_shaders.o evergreen_blit_kms.= o \ > evergreen_hdmi.o radeon_trace_points.o ni.o cayman_blit_shaders.o \ > atombios_encoders.o radeon_semaphore.o radeon_sa.o atombios_i2c.o si.o \ > - si_blit_shaders.o radeon_prime.o > + si_blit_shaders.o radeon_prime.o radeon_uvd.o > = > radeon-$(CONFIG_COMPAT) +=3D radeon_ioc32.o > radeon-$(CONFIG_VGA_SWITCHEROO) +=3D radeon_atpx_handler.o > diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/= evergreen.c > index 305a657..18b66ff 100644 > --- a/drivers/gpu/drm/radeon/evergreen.c > +++ b/drivers/gpu/drm/radeon/evergreen.c > @@ -3360,6 +3360,9 @@ restart_ih: > DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data); > break; > } > + case 124: /* UVD */ > + DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data); > + radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX); > break; > case 146: > case 147: > @@ -3571,7 +3574,7 @@ int evergreen_copy_dma(struct radeon_device *rdev, > = > static int evergreen_startup(struct radeon_device *rdev) > { > - struct radeon_ring *ring =3D &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; > + struct radeon_ring *ring; > int r; > = > /* enable pcie gen2 link */ > @@ -3638,6 +3641,17 @@ static int evergreen_startup(struct radeon_device = *rdev) > return r; > } > = > + r =3D rv770_uvd_resume(rdev); > + if (!r) { > + r =3D radeon_fence_driver_start_ring(rdev, > + R600_RING_TYPE_UVD_INDEX); > + if (r) > + dev_err(rdev->dev, "UVD fences init error (%d).\n", r); > + } > + > + if (r) > + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size =3D 0; > + > /* Enable IRQ */ > r =3D r600_irq_init(rdev); > if (r) { > @@ -3647,6 +3661,7 @@ static int evergreen_startup(struct radeon_device *= rdev) > } > evergreen_irq_set(rdev); > = > + ring =3D &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; > r =3D radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_O= FFSET, > R600_CP_RB_RPTR, R600_CP_RB_WPTR, > 0, 0xfffff, RADEON_CP_PACKET2); > @@ -3670,6 +3685,19 @@ static int evergreen_startup(struct radeon_device = *rdev) > if (r) > return r; > = > + ring =3D &rdev->ring[R600_RING_TYPE_UVD_INDEX]; > + if (ring->ring_size) { > + r =3D radeon_ring_init(rdev, ring, ring->ring_size, > + R600_WB_UVD_RPTR_OFFSET, > + UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, > + 0, 0xfffff, RADEON_CP_PACKET2); > + if (!r) > + r =3D r600_uvd_init(rdev); > + > + if (r) > + DRM_ERROR("radeon: error initializing UVD (%d).\n", r); > + } > + > r =3D radeon_ib_pool_init(rdev); > if (r) { > dev_err(rdev->dev, "IB initialization failed (%d).\n", r); > @@ -3716,8 +3744,10 @@ int evergreen_resume(struct radeon_device *rdev) > int evergreen_suspend(struct radeon_device *rdev) > { > r600_audio_fini(rdev); > + radeon_uvd_suspend(rdev); > r700_cp_stop(rdev); > r600_dma_stop(rdev); > + r600_uvd_rbc_stop(rdev); > evergreen_irq_suspend(rdev); > radeon_wb_disable(rdev); > evergreen_pcie_gart_disable(rdev); > @@ -3797,6 +3827,13 @@ int evergreen_init(struct radeon_device *rdev) > rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj =3D NULL; > r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024); > = > + r =3D radeon_uvd_init(rdev); > + if (!r) { > + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj =3D NULL; > + r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], > + 4096); > + } > + > rdev->ih.ring_obj =3D NULL; > r600_ih_ring_init(rdev, 64 * 1024); > = > @@ -3843,6 +3880,7 @@ void evergreen_fini(struct radeon_device *rdev) > radeon_ib_pool_fini(rdev); > radeon_irq_kms_fini(rdev); > evergreen_pcie_gart_fini(rdev); > + radeon_uvd_fini(rdev); > r600_vram_scratch_fini(rdev); > radeon_gem_fini(rdev); > radeon_fence_driver_fini(rdev); > diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon= /evergreend.h > index 982d25a..c5d873e 100644 > --- a/drivers/gpu/drm/radeon/evergreend.h > +++ b/drivers/gpu/drm/radeon/evergreend.h > @@ -992,6 +992,13 @@ > # define TARGET_LINK_SPEED_MASK (0xf << 0) > # define SELECTABLE_DEEMPHASIS (1 << 6) > = > + > +/* > + * UVD > + */ > +#define UVD_RBC_RB_RPTR 0xf690 > +#define UVD_RBC_RB_WPTR 0xf694 > + > /* > * PM4 > */ > diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c > index 27769e7..ac944f5 100644 > --- a/drivers/gpu/drm/radeon/ni.c > +++ b/drivers/gpu/drm/radeon/ni.c > @@ -931,6 +931,23 @@ void cayman_ring_ib_execute(struct radeon_device *rd= ev, struct radeon_ib *ib) > radeon_ring_write(ring, 10); /* poll interval */ > } > = > +void cayman_uvd_semaphore_emit(struct radeon_device *rdev, > + struct radeon_ring *ring, > + struct radeon_semaphore *semaphore, > + bool emit_wait) > +{ > + uint64_t addr =3D semaphore->gpu_addr; > + > + radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0)); > + radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF); > + > + radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0)); > + radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF); > + > + radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0)); > + radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0)); > +} > + > static void cayman_cp_enable(struct radeon_device *rdev, bool enable) > { > if (enable) > @@ -1682,6 +1699,16 @@ static int cayman_startup(struct radeon_device *rd= ev) > return r; > } > = > + r =3D rv770_uvd_resume(rdev); > + if (!r) { > + r =3D radeon_fence_driver_start_ring(rdev, > + R600_RING_TYPE_UVD_INDEX); > + if (r) > + dev_err(rdev->dev, "UVD fences init error (%d).\n", r); > + } > + if (r) > + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size =3D 0; > + > r =3D radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX); > if (r) { > dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); > @@ -1748,6 +1775,18 @@ static int cayman_startup(struct radeon_device *rd= ev) > if (r) > return r; > = > + ring =3D &rdev->ring[R600_RING_TYPE_UVD_INDEX]; > + if (ring->ring_size) { > + r =3D radeon_ring_init(rdev, ring, ring->ring_size, > + R600_WB_UVD_RPTR_OFFSET, > + UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, > + 0, 0xfffff, RADEON_CP_PACKET2); > + if (!r) > + r =3D r600_uvd_init(rdev); > + if (r) > + DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); > + } > + > r =3D radeon_ib_pool_init(rdev); > if (r) { > dev_err(rdev->dev, "IB initialization failed (%d).\n", r); > @@ -1794,6 +1833,8 @@ int cayman_suspend(struct radeon_device *rdev) > radeon_vm_manager_fini(rdev); > cayman_cp_enable(rdev, false); > cayman_dma_stop(rdev); > + r600_uvd_rbc_stop(rdev); > + radeon_uvd_suspend(rdev); > evergreen_irq_suspend(rdev); > radeon_wb_disable(rdev); > cayman_pcie_gart_disable(rdev); > @@ -1868,6 +1909,13 @@ int cayman_init(struct radeon_device *rdev) > ring->ring_obj =3D NULL; > r600_ring_init(rdev, ring, 64 * 1024); > = > + r =3D radeon_uvd_init(rdev); > + if (!r) { > + ring =3D &rdev->ring[R600_RING_TYPE_UVD_INDEX]; > + ring->ring_obj =3D NULL; > + r600_ring_init(rdev, ring, 4096); > + } > + > rdev->ih.ring_obj =3D NULL; > r600_ih_ring_init(rdev, 64 * 1024); > = > @@ -1919,6 +1967,7 @@ void cayman_fini(struct radeon_device *rdev) > radeon_vm_manager_fini(rdev); > radeon_ib_pool_fini(rdev); > radeon_irq_kms_fini(rdev); > + radeon_uvd_fini(rdev); > cayman_pcie_gart_fini(rdev); > r600_vram_scratch_fini(rdev); > radeon_gem_fini(rdev); > diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h > index 079dee2..3731f6c 100644 > --- a/drivers/gpu/drm/radeon/nid.h > +++ b/drivers/gpu/drm/radeon/nid.h > @@ -486,6 +486,15 @@ > # define CACHE_FLUSH_AND_INV_EVENT (0x16 <<= 0) > = > /* > + * UVD > + */ > +#define UVD_SEMA_ADDR_LOW 0xEF00 > +#define UVD_SEMA_ADDR_HIGH 0xEF04 > +#define UVD_SEMA_CMD 0xEF08 > +#define UVD_RBC_RB_RPTR 0xF690 > +#define UVD_RBC_RB_WPTR 0xF694 > + > +/* > * PM4 > */ > #define PACKET0(reg, n) ((RADEON_PACKET_TYPE0 << 30) | \ > diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c > index 0740db3..ca6117d 100644 > --- a/drivers/gpu/drm/radeon/r600.c > +++ b/drivers/gpu/drm/radeon/r600.c > @@ -2552,6 +2552,185 @@ void r600_dma_fini(struct radeon_device *rdev) > } > = > /* > + * UVD > + */ > +int r600_uvd_rbc_start(struct radeon_device *rdev) > +{ > + struct radeon_ring *ring =3D &rdev->ring[R600_RING_TYPE_UVD_INDEX]; > + uint64_t rptr_addr; > + uint32_t rb_bufsz, tmp; > + int r; > + > + rptr_addr =3D rdev->wb.gpu_addr + R600_WB_UVD_RPTR_OFFSET; > + > + if (upper_32_bits(rptr_addr) !=3D upper_32_bits(ring->gpu_addr)) { > + DRM_ERROR("UVD ring and rptr not in the same 4GB segment!\n"); > + return -EINVAL; > + } > + > + /* force RBC into idle state */ > + WREG32(UVD_RBC_RB_CNTL, 0x11010101); > + > + /* Set the write pointer delay */ > + WREG32(UVD_RBC_RB_WPTR_CNTL, 0); > + > + /* set the wb address */ > + WREG32(UVD_RBC_RB_RPTR_ADDR, rptr_addr >> 2); > + > + /* programm the 4GB memory segment for rptr and ring buffer */ > + WREG32(UVD_LMI_EXT40_ADDR, upper_32_bits(rptr_addr) | > + (0x7 << 16) | (0x1 << 31)); > + > + /* Initialize the ring buffer's read and write pointers */ > + WREG32(UVD_RBC_RB_RPTR, 0x0); > + > + ring->wptr =3D ring->rptr =3D RREG32(UVD_RBC_RB_RPTR); > + WREG32(UVD_RBC_RB_WPTR, ring->wptr); > + > + /* set the ring address */ > + WREG32(UVD_RBC_RB_BASE, ring->gpu_addr); > + > + /* Set ring buffer size */ > + rb_bufsz =3D drm_order(ring->ring_size); > + rb_bufsz =3D (0x1 << 8) | rb_bufsz; > + WREG32(UVD_RBC_RB_CNTL, rb_bufsz); > + > + ring->ready =3D true; > + r =3D radeon_ring_test(rdev, R600_RING_TYPE_UVD_INDEX, ring); > + if (r) { > + ring->ready =3D false; > + return r; > + } > + > + r =3D radeon_ring_lock(rdev, ring, 10); > + if (r) { > + DRM_ERROR("radeon: ring failed to lock UVD ring (%d).\n", r); > + return r; > + } > + > + tmp =3D PACKET0(UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0); > + radeon_ring_write(ring, tmp); > + radeon_ring_write(ring, 0xFFFFF); > + > + tmp =3D PACKET0(UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0); > + radeon_ring_write(ring, tmp); > + radeon_ring_write(ring, 0xFFFFF); > + > + tmp =3D PACKET0(UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0); > + radeon_ring_write(ring, tmp); > + radeon_ring_write(ring, 0xFFFFF); > + > + /* Clear timeout status bits */ > + radeon_ring_write(ring, PACKET0(UVD_SEMA_TIMEOUT_STATUS, 0)); > + radeon_ring_write(ring, 0x8); > + > + radeon_ring_write(ring, PACKET0(UVD_SEMA_CNTL, 0)); > + radeon_ring_write(ring, 1); > + > + radeon_ring_unlock_commit(rdev, ring); > + > + return 0; > +} > + > +void r600_uvd_rbc_stop(struct radeon_device *rdev) > +{ > + struct radeon_ring *ring =3D &rdev->ring[R600_RING_TYPE_UVD_INDEX]; > + > + /* force RBC into idle state */ > + WREG32(UVD_RBC_RB_CNTL, 0x11010101); > + ring->ready =3D false; > +} > + > +int r600_uvd_init(struct radeon_device *rdev) > +{ > + int i, j, r; > + > + /* disable clock gating */ > + WREG32(UVD_CGC_GATE, 0); > + > + /* disable interupt */ > + WREG32_P(UVD_MASTINT_EN, 0, ~(1 << 1)); > + > + /* put LMI, VCPU, RBC etc... into reset */ > + WREG32(UVD_SOFT_RESET, LMI_SOFT_RESET | VCPU_SOFT_RESET | > + LBSI_SOFT_RESET | RBC_SOFT_RESET | CSM_SOFT_RESET | > + CXW_SOFT_RESET | TAP_SOFT_RESET | LMI_UMC_SOFT_RESET); > + mdelay(5); > + > + /* take UVD block out of reset */ > + WREG32_P(SRBM_SOFT_RESET, 0, ~SOFT_RESET_UVD); > + mdelay(5); > + > + /* initialize UVD memory controller */ > + WREG32(UVD_LMI_CTRL, 0x40 | (1 << 8) | (1 << 13) | > + (1 << 21) | (1 << 9) | (1 << 20)); > + > + /* disable byte swapping */ > + WREG32(UVD_LMI_SWAP_CNTL, 0); > + WREG32(UVD_MP_SWAP_CNTL, 0); > + > + WREG32(UVD_MPC_SET_MUXA0, 0x40c2040); > + WREG32(UVD_MPC_SET_MUXA1, 0x0); > + WREG32(UVD_MPC_SET_MUXB0, 0x40c2040); > + WREG32(UVD_MPC_SET_MUXB1, 0x0); > + WREG32(UVD_MPC_SET_ALU, 0); > + WREG32(UVD_MPC_SET_MUX, 0x88); > + > + /* Stall UMC */ > + WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8)); > + WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3)); > + > + /* take all subblocks out of reset, except VCPU */ > + WREG32(UVD_SOFT_RESET, VCPU_SOFT_RESET); > + mdelay(5); > + > + /* enable VCPU clock */ > + WREG32(UVD_VCPU_CNTL, 1 << 9); > + > + /* enable UMC */ > + WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8)); > + > + /* boot up the VCPU */ > + WREG32(UVD_SOFT_RESET, 0); > + mdelay(10); > + > + WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3)); > + > + for (i =3D 0; i < 10; ++i) { > + uint32_t status; > + for (j =3D 0; j < 100; ++j) { > + status =3D RREG32(UVD_STATUS); > + if (status & 2) > + break; > + mdelay(10); > + } > + r =3D 0; > + if (status & 2) > + break; > + > + DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n"); > + WREG32_P(UVD_SOFT_RESET, VCPU_SOFT_RESET, ~VCPU_SOFT_RESET); > + mdelay(10); > + WREG32_P(UVD_SOFT_RESET, 0, ~VCPU_SOFT_RESET); > + mdelay(10); > + r =3D -1; > + } > + if (r) { > + DRM_ERROR("UVD not responding, giving up!!!\n"); > + return r; > + } > + /* enable interupt */ > + WREG32_P(UVD_MASTINT_EN, 3<<1, ~(3 << 1)); > + > + r =3D r600_uvd_rbc_start(rdev); > + if (r) > + return r; > + > + DRM_INFO("UVD initialized successfully.\n"); > + return 0; > +} > + > +/* > * GPU scratch registers helpers function. > */ > void r600_scratch_init(struct radeon_device *rdev) > @@ -2660,6 +2839,40 @@ int r600_dma_ring_test(struct radeon_device *rdev, > return r; > } > = > +int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *r= ing) > +{ > + uint32_t tmp =3D 0; > + unsigned i; > + int r; > + > + WREG32(UVD_CONTEXT_ID, 0xCAFEDEAD); > + r =3D radeon_ring_lock(rdev, ring, 3); > + if (r) { > + DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", > + ring->idx, r); > + return r; > + } > + radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0)); > + radeon_ring_write(ring, 0xDEADBEEF); > + radeon_ring_unlock_commit(rdev, ring); > + for (i =3D 0; i < rdev->usec_timeout; i++) { > + tmp =3D RREG32(UVD_CONTEXT_ID); > + if (tmp =3D=3D 0xDEADBEEF) > + break; > + DRM_UDELAY(1); > + } > + > + if (i < rdev->usec_timeout) { > + DRM_INFO("ring test on %d succeeded in %d usecs\n", > + ring->idx, i); > + } else { > + DRM_ERROR("radeon: ring %d test failed (0x%08X)\n", > + ring->idx, tmp); > + r =3D -EINVAL; > + } > + return r; > +} > + > /* > * CP fences/semaphores > */ > @@ -2711,6 +2924,30 @@ void r600_fence_ring_emit(struct radeon_device *rd= ev, > } > } > = > +void r600_uvd_fence_emit(struct radeon_device *rdev, > + struct radeon_fence *fence) > +{ > + struct radeon_ring *ring =3D &rdev->ring[fence->ring]; > + uint32_t addr =3D rdev->fence_drv[fence->ring].gpu_addr; > + > + radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0)); > + radeon_ring_write(ring, fence->seq); > + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0)); > + radeon_ring_write(ring, addr & 0xffffffff); > + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0)); > + radeon_ring_write(ring, upper_32_bits(addr) & 0xff); > + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0)); > + radeon_ring_write(ring, 0); > + > + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0)); > + radeon_ring_write(ring, 0); > + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0)); > + radeon_ring_write(ring, 0); > + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0)); > + radeon_ring_write(ring, 2); > + return; > +} > + > void r600_semaphore_ring_emit(struct radeon_device *rdev, > struct radeon_ring *ring, > struct radeon_semaphore *semaphore, > @@ -2780,6 +3017,23 @@ void r600_dma_semaphore_ring_emit(struct radeon_de= vice *rdev, > radeon_ring_write(ring, upper_32_bits(addr) & 0xff); > } > = > +void r600_uvd_semaphore_emit(struct radeon_device *rdev, > + struct radeon_ring *ring, > + struct radeon_semaphore *semaphore, > + bool emit_wait) > +{ > + uint64_t addr =3D semaphore->gpu_addr; > + > + radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0)); > + radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF); > + > + radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0)); > + radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF); > + > + radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0)); > + radeon_ring_write(ring, emit_wait ? 1 : 0); > +} > + > int r600_copy_blit(struct radeon_device *rdev, > uint64_t src_offset, > uint64_t dst_offset, > @@ -3183,6 +3437,16 @@ void r600_ring_ib_execute(struct radeon_device *rd= ev, struct radeon_ib *ib) > radeon_ring_write(ring, ib->length_dw); > } > = > +void r600_uvd_ib_execute(struct radeon_device *rdev, struct radeon_ib *i= b) > +{ > + struct radeon_ring *ring =3D &rdev->ring[ib->ring]; > + > + radeon_ring_write(ring, PACKET0(UVD_RBC_IB_BASE, 0)); > + radeon_ring_write(ring, ib->gpu_addr); > + radeon_ring_write(ring, PACKET0(UVD_RBC_IB_SIZE, 0)); > + radeon_ring_write(ring, ib->length_dw); > +} > + > int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) > { > struct radeon_ib ib; > @@ -3300,6 +3564,33 @@ int r600_dma_ib_test(struct radeon_device *rdev, s= truct radeon_ring *ring) > return r; > } > = > +int r600_uvd_ib_test(struct radeon_device *rdev, struct radeon_ring *rin= g) > +{ > + struct radeon_fence *fence; > + int r; > + > + r =3D radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL); > + if (r) { > + DRM_ERROR("radeon: failed to get create msg (%d).\n", r); > + return r; > + } > + > + r =3D radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, &fence); > + if (r) { > + DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r); > + return r; > + } > + > + r =3D radeon_fence_wait(fence, false); > + if (r) { > + DRM_ERROR("radeon: fence wait failed (%d).\n", r); > + return r; > + } > + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); > + radeon_fence_unref(&fence); > + return r; > +} > + > /** > * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine > * > diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600= d.h > index a42ba11..441bdb8 100644 > --- a/drivers/gpu/drm/radeon/r600d.h > +++ b/drivers/gpu/drm/radeon/r600d.h > @@ -691,6 +691,7 @@ > #define SRBM_SOFT_RESET 0xe60 > # define SOFT_RESET_DMA (1 << 12) > # define SOFT_RESET_RLC (1 << 13) > +# define SOFT_RESET_UVD (1 << 18) > # define RV770_SOFT_RESET_DMA (1 << 20) > = > #define CP_INT_CNTL 0xc124 > @@ -1143,6 +1144,66 @@ > # define AFMT_AZ_AUDIO_ENABLE_CHG_ACK (1 << 30) > = > /* > + * UVD > + */ > +#define UVD_SEMA_ADDR_LOW 0xef00 > +#define UVD_SEMA_ADDR_HIGH 0xef04 > +#define UVD_SEMA_CMD 0xef08 > + > +#define UVD_GPCOM_VCPU_CMD 0xef0c > +#define UVD_GPCOM_VCPU_DATA0 0xef10 > +#define UVD_GPCOM_VCPU_DATA1 0xef14 > +#define UVD_ENGINE_CNTL 0xef18 > + > +#define UVD_SEMA_CNTL 0xf400 > +#define UVD_RB_ARB_CTRL 0xf480 > + > +#define UVD_LMI_EXT40_ADDR 0xf498 > +#define UVD_CGC_GATE 0xf4a8 > +#define UVD_LMI_CTRL2 0xf4f4 > +#define UVD_MASTINT_EN 0xf500 > +#define UVD_LMI_ADDR_EXT 0xf594 > +#define UVD_LMI_CTRL 0xf598 > +#define UVD_LMI_SWAP_CNTL 0xf5b4 > +#define UVD_MP_SWAP_CNTL 0xf5bC > +#define UVD_MPC_CNTL 0xf5dC > +#define UVD_MPC_SET_MUXA0 0xf5e4 > +#define UVD_MPC_SET_MUXA1 0xf5e8 > +#define UVD_MPC_SET_MUXB0 0xf5eC > +#define UVD_MPC_SET_MUXB1 0xf5f0 > +#define UVD_MPC_SET_MUX 0xf5f4 > +#define UVD_MPC_SET_ALU 0xf5f8 > + > +#define UVD_VCPU_CNTL 0xf660 > +#define UVD_SOFT_RESET 0xf680 > +#define RBC_SOFT_RESET (1<<0) > +#define LBSI_SOFT_RESET (1<<1) > +#define LMI_SOFT_RESET (1<<2) > +#define VCPU_SOFT_RESET (1<<3) > +#define CSM_SOFT_RESET (1<<5) > +#define CXW_SOFT_RESET (1<<6) > +#define TAP_SOFT_RESET (1<<7) > +#define LMI_UMC_SOFT_RESET (1<<13) > +#define UVD_RBC_IB_BASE 0xf684 > +#define UVD_RBC_IB_SIZE 0xf688 > +#define UVD_RBC_RB_BASE 0xf68c > +#define UVD_RBC_RB_RPTR 0xf690 > +#define UVD_RBC_RB_WPTR 0xf694 > +#define UVD_RBC_RB_WPTR_CNTL 0xf698 > + > +#define UVD_STATUS 0xf6bc > + > +#define UVD_SEMA_TIMEOUT_STATUS 0xf6c0 > +#define UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL 0xf6c4 > +#define UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL 0xf6c8 > +#define UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL 0xf6cc > + > +#define UVD_RBC_RB_CNTL 0xf6a4 > +#define UVD_RBC_RB_RPTR_ADDR 0xf6a8 > + > +#define UVD_CONTEXT_ID 0xf6f4 > + > +/* > * PM4 > */ > #define PACKET0(reg, n) ((RADEON_PACKET_TYPE0 << 30) | \ > diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/rad= eon.h > index 8263af3..3f5572d 100644 > --- a/drivers/gpu/drm/radeon/radeon.h > +++ b/drivers/gpu/drm/radeon/radeon.h > @@ -109,24 +109,27 @@ extern int radeon_lockup_timeout; > #define RADEON_BIOS_NUM_SCRATCH 8 > = > /* max number of rings */ > -#define RADEON_NUM_RINGS 5 > +#define RADEON_NUM_RINGS 6 > = > /* fence seq are set to this number when signaled */ > #define RADEON_FENCE_SIGNALED_SEQ 0LL > = > /* internal ring indices */ > /* r1xx+ has gfx CP ring */ > -#define RADEON_RING_TYPE_GFX_INDEX 0 > +#define RADEON_RING_TYPE_GFX_INDEX 0 > = > /* cayman has 2 compute CP rings */ > -#define CAYMAN_RING_TYPE_CP1_INDEX 1 > -#define CAYMAN_RING_TYPE_CP2_INDEX 2 > +#define CAYMAN_RING_TYPE_CP1_INDEX 1 > +#define CAYMAN_RING_TYPE_CP2_INDEX 2 > = > /* R600+ has an async dma ring */ > #define R600_RING_TYPE_DMA_INDEX 3 > /* cayman add a second async dma ring */ > #define CAYMAN_RING_TYPE_DMA1_INDEX 4 > = > +/* R600+ */ > +#define R600_RING_TYPE_UVD_INDEX 5 > + > /* hardcode those limit for now */ > #define RADEON_VA_IB_OFFSET (1 << 20) > #define RADEON_VA_RESERVED_SIZE (8 << 20) > @@ -357,8 +360,9 @@ struct radeon_bo_list { > struct ttm_validate_buffer tv; > struct radeon_bo *bo; > uint64_t gpu_offset; > - unsigned rdomain; > - unsigned wdomain; > + bool written; > + unsigned domain; > + unsigned alt_domain; > u32 tiling_flags; > }; I think that the change to the rdomain/wdomain should be in a patch of its own. I think the change is fine but we had issue with change that touched that part previously, would make bisecting and understanding the change implication easier. > = > @@ -826,7 +830,6 @@ struct radeon_cs_reloc { > struct radeon_bo *robj; > struct radeon_bo_list lobj; > uint32_t handle; > - uint32_t flags; > }; Why removing the flags ? iirc it's not really use right now but i remember plan to use it. > = > struct radeon_cs_chunk { > @@ -918,6 +921,7 @@ struct radeon_wb { > #define R600_WB_DMA_RPTR_OFFSET 1792 > #define R600_WB_IH_WPTR_OFFSET 2048 > #define CAYMAN_WB_DMA1_RPTR_OFFSET 2304 > +#define R600_WB_UVD_RPTR_OFFSET 2560 > #define R600_WB_EVENT_OFFSET 3072 > = > /** > @@ -1118,6 +1122,33 @@ struct radeon_pm { > int radeon_pm_get_type_index(struct radeon_device *rdev, > enum radeon_pm_state_type ps_type, > int instance); > +/* > + * UVD > + */ > +#define RADEON_MAX_UVD_HANDLES 10 > +#define RADEON_UVD_STACK_SIZE (1024*1024) > +#define RADEON_UVD_HEAP_SIZE (1024*1024) > + > +struct radeon_uvd { > + struct radeon_bo *vcpu_bo; > + void *cpu_addr; > + uint64_t gpu_addr; > + atomic_t handles[RADEON_MAX_UVD_HANDLES]; > + struct drm_file *filp[RADEON_MAX_UVD_HANDLES]; > +}; > + > +int radeon_uvd_init(struct radeon_device *rdev); > +void radeon_uvd_fini(struct radeon_device *rdev); > +int radeon_uvd_suspend(struct radeon_device *rdev); > +int radeon_uvd_resume(struct radeon_device *rdev); > +int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, > + uint32_t handle, struct radeon_fence **fence); > +int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, > + uint32_t handle, struct radeon_fence **fence); > +void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo); > +void radeon_uvd_free_handles(struct radeon_device *rdev, > + struct drm_file *filp); > +int radeon_uvd_cs_parse(struct radeon_cs_parser *parser); > = > struct r600_audio { > int channels; > @@ -1608,6 +1639,7 @@ struct radeon_device { > struct radeon_asic *asic; > struct radeon_gem gem; > struct radeon_pm pm; > + struct radeon_uvd uvd; > uint32_t bios_scratch[RADEON_BIOS_NUM_SCRATCH]; > struct radeon_wb wb; > struct radeon_dummy_page dummy_page; > @@ -1621,6 +1653,7 @@ struct radeon_device { > const struct firmware *rlc_fw; /* r6/700 RLC firmware */ > const struct firmware *mc_fw; /* NI MC firmware */ > const struct firmware *ce_fw; /* SI CE firmware */ > + const struct firmware *uvd_fw; /* UVD firmware */ > struct r600_blit r600_blit; > struct r600_vram_scratch vram_scratch; > int msi_enabled; /* msi enabled */ > diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeo= n/radeon_asic.c > index aba0a89..a7a7b2b 100644 > --- a/drivers/gpu/drm/radeon/radeon_asic.c > +++ b/drivers/gpu/drm/radeon/radeon_asic.c > @@ -1130,6 +1130,15 @@ static struct radeon_asic rv770_asic =3D { > .ring_test =3D &r600_dma_ring_test, > .ib_test =3D &r600_dma_ib_test, > .is_lockup =3D &r600_dma_is_lockup, > + }, > + [R600_RING_TYPE_UVD_INDEX] =3D { > + .ib_execute =3D &r600_uvd_ib_execute, > + .emit_fence =3D &r600_uvd_fence_emit, > + .emit_semaphore =3D &r600_uvd_semaphore_emit, > + .cs_parse =3D &radeon_uvd_cs_parse, > + .ring_test =3D &r600_uvd_ring_test, > + .ib_test =3D &r600_uvd_ib_test, > + .is_lockup =3D &radeon_ring_test_lockup, > } > }, > .irq =3D { > @@ -1216,6 +1225,15 @@ static struct radeon_asic evergreen_asic =3D { > .ring_test =3D &r600_dma_ring_test, > .ib_test =3D &r600_dma_ib_test, > .is_lockup =3D &evergreen_dma_is_lockup, > + }, > + [R600_RING_TYPE_UVD_INDEX] =3D { > + .ib_execute =3D &r600_uvd_ib_execute, > + .emit_fence =3D &r600_uvd_fence_emit, > + .emit_semaphore =3D &r600_uvd_semaphore_emit, > + .cs_parse =3D &radeon_uvd_cs_parse, > + .ring_test =3D &r600_uvd_ring_test, > + .ib_test =3D &r600_uvd_ib_test, > + .is_lockup =3D &radeon_ring_test_lockup, > } > }, > .irq =3D { > @@ -1302,6 +1320,15 @@ static struct radeon_asic sumo_asic =3D { > .ring_test =3D &r600_dma_ring_test, > .ib_test =3D &r600_dma_ib_test, > .is_lockup =3D &evergreen_dma_is_lockup, > + }, > + [R600_RING_TYPE_UVD_INDEX] =3D { > + .ib_execute =3D &r600_uvd_ib_execute, > + .emit_fence =3D &r600_uvd_fence_emit, > + .emit_semaphore =3D &r600_uvd_semaphore_emit, > + .cs_parse =3D &radeon_uvd_cs_parse, > + .ring_test =3D &r600_uvd_ring_test, > + .ib_test =3D &r600_uvd_ib_test, > + .is_lockup =3D &radeon_ring_test_lockup, > } > }, > .irq =3D { > @@ -1388,6 +1415,15 @@ static struct radeon_asic btc_asic =3D { > .ring_test =3D &r600_dma_ring_test, > .ib_test =3D &r600_dma_ib_test, > .is_lockup =3D &evergreen_dma_is_lockup, > + }, > + [R600_RING_TYPE_UVD_INDEX] =3D { > + .ib_execute =3D &r600_uvd_ib_execute, > + .emit_fence =3D &r600_uvd_fence_emit, > + .emit_semaphore =3D &r600_uvd_semaphore_emit, > + .cs_parse =3D &radeon_uvd_cs_parse, > + .ring_test =3D &r600_uvd_ring_test, > + .ib_test =3D &r600_uvd_ib_test, > + .is_lockup =3D &radeon_ring_test_lockup, > } > }, > .irq =3D { > @@ -1517,6 +1553,15 @@ static struct radeon_asic cayman_asic =3D { > .ib_test =3D &r600_dma_ib_test, > .is_lockup =3D &cayman_dma_is_lockup, > .vm_flush =3D &cayman_dma_vm_flush, > + }, > + [R600_RING_TYPE_UVD_INDEX] =3D { > + .ib_execute =3D &r600_uvd_ib_execute, > + .emit_fence =3D &r600_uvd_fence_emit, > + .emit_semaphore =3D &cayman_uvd_semaphore_emit, > + .cs_parse =3D &radeon_uvd_cs_parse, > + .ring_test =3D &r600_uvd_ring_test, > + .ib_test =3D &r600_uvd_ib_test, > + .is_lockup =3D &radeon_ring_test_lockup, > } > }, > .irq =3D { > @@ -1646,6 +1691,15 @@ static struct radeon_asic trinity_asic =3D { > .ib_test =3D &r600_dma_ib_test, > .is_lockup =3D &cayman_dma_is_lockup, > .vm_flush =3D &cayman_dma_vm_flush, > + }, > + [R600_RING_TYPE_UVD_INDEX] =3D { > + .ib_execute =3D &r600_uvd_ib_execute, > + .emit_fence =3D &r600_uvd_fence_emit, > + .emit_semaphore =3D &cayman_uvd_semaphore_emit, > + .cs_parse =3D &radeon_uvd_cs_parse, > + .ring_test =3D &r600_uvd_ring_test, > + .ib_test =3D &r600_uvd_ib_test, > + .is_lockup =3D &radeon_ring_test_lockup, > } > }, > .irq =3D { > @@ -1775,6 +1829,15 @@ static struct radeon_asic si_asic =3D { > .ib_test =3D &r600_dma_ib_test, > .is_lockup =3D &si_dma_is_lockup, > .vm_flush =3D &si_dma_vm_flush, > + }, > + [R600_RING_TYPE_UVD_INDEX] =3D { > + .ib_execute =3D &r600_uvd_ib_execute, > + .emit_fence =3D &r600_uvd_fence_emit, > + .emit_semaphore =3D &cayman_uvd_semaphore_emit, > + .cs_parse =3D &radeon_uvd_cs_parse, > + .ring_test =3D &r600_uvd_ring_test, > + .ib_test =3D &r600_uvd_ib_test, > + .is_lockup =3D &radeon_ring_test_lockup, > } > }, > .irq =3D { > diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeo= n/radeon_asic.h > index 3535f73..515db96 100644 > --- a/drivers/gpu/drm/radeon/radeon_asic.h > +++ b/drivers/gpu/drm/radeon/radeon_asic.h > @@ -330,6 +330,7 @@ int r600_dma_ib_test(struct radeon_device *rdev, stru= ct radeon_ring *ring); > void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *= ib); > int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); > int r600_dma_ring_test(struct radeon_device *rdev, struct radeon_ring *c= p); > +int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *r= ing); > int r600_copy_blit(struct radeon_device *rdev, > uint64_t src_offset, uint64_t dst_offset, > unsigned num_gpu_pages, struct radeon_fence **fence); > @@ -392,6 +393,19 @@ int r600_mc_wait_for_idle(struct radeon_device *rdev= ); > u32 r600_get_xclk(struct radeon_device *rdev); > uint64_t r600_get_gpu_clock_counter(struct radeon_device *rdev); > = > +/* uvd */ > +int r600_uvd_init(struct radeon_device *rdev); > +int r600_uvd_rbc_start(struct radeon_device *rdev); > +void r600_uvd_rbc_stop(struct radeon_device *rdev); > +int r600_uvd_ib_test(struct radeon_device *rdev, struct radeon_ring *rin= g); > +void r600_uvd_fence_emit(struct radeon_device *rdev, > + struct radeon_fence *fence); > +void r600_uvd_semaphore_emit(struct radeon_device *rdev, > + struct radeon_ring *ring, > + struct radeon_semaphore *semaphore, > + bool emit_wait); > +void r600_uvd_ib_execute(struct radeon_device *rdev, struct radeon_ib *i= b); > + > /* > * rv770,rv730,rv710,rv740 > */ > @@ -409,6 +423,7 @@ int rv770_copy_dma(struct radeon_device *rdev, > unsigned num_gpu_pages, > struct radeon_fence **fence); > u32 rv770_get_xclk(struct radeon_device *rdev); > +int rv770_uvd_resume(struct radeon_device *rdev); > = > /* > * evergreen > @@ -465,6 +480,10 @@ int evergreen_copy_dma(struct radeon_device *rdev, > */ > void cayman_fence_ring_emit(struct radeon_device *rdev, > struct radeon_fence *fence); > +void cayman_uvd_semaphore_emit(struct radeon_device *rdev, > + struct radeon_ring *ring, > + struct radeon_semaphore *semaphore, > + bool emit_wait); > void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev); > int cayman_init(struct radeon_device *rdev); > void cayman_fini(struct radeon_device *rdev); > diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/= radeon_cs.c > index 7d66e01..532ff68 100644 > --- a/drivers/gpu/drm/radeon/radeon_cs.c > +++ b/drivers/gpu/drm/radeon/radeon_cs.c > @@ -75,18 +75,34 @@ static int radeon_cs_parser_relocs(struct radeon_cs_p= arser *p) > p->relocs_ptr[i] =3D &p->relocs[i]; > p->relocs[i].robj =3D gem_to_radeon_bo(p->relocs[i].gobj); > p->relocs[i].lobj.bo =3D p->relocs[i].robj; > - p->relocs[i].lobj.wdomain =3D r->write_domain; > - p->relocs[i].lobj.rdomain =3D r->read_domains; > + p->relocs[i].lobj.written =3D !!r->write_domain; > + > + /* the first reloc of an UVD job is the > + msg and that must be in VRAM */ > + if (p->ring =3D=3D R600_RING_TYPE_UVD_INDEX && i =3D=3D 0) { > + > + p->relocs[i].lobj.domain =3D > + RADEON_GEM_DOMAIN_VRAM; > + > + p->relocs[i].lobj.alt_domain =3D > + RADEON_GEM_DOMAIN_VRAM; > + } else { > + uint32_t domain =3D r->write_domain ? > + r->write_domain : r->read_domains; > + p->relocs[i].lobj.domain =3D domain; > + if (domain =3D=3D RADEON_GEM_DOMAIN_VRAM) > + domain |=3D RADEON_GEM_DOMAIN_GTT; > + p->relocs[i].lobj.alt_domain =3D domain; > + } > p->relocs[i].lobj.tv.bo =3D &p->relocs[i].robj->tbo; > p->relocs[i].handle =3D r->handle; > - p->relocs[i].flags =3D r->flags; > radeon_bo_list_add_object(&p->relocs[i].lobj, > &p->validated); > = > } else > p->relocs[i].handle =3D 0; > } > - return radeon_bo_list_validate(&p->validated); > + return radeon_bo_list_validate(&p->validated, p->ring); > } > = > static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 = priority) > @@ -121,6 +137,9 @@ static int radeon_cs_get_ring(struct radeon_cs_parser= *p, u32 ring, s32 priority > return -EINVAL; > } > break; > + case RADEON_CS_RING_UVD: > + p->ring =3D R600_RING_TYPE_UVD_INDEX; > + break; > } > return 0; > } > diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/rade= on/radeon_fence.c > index 3435625..82fe183 100644 > --- a/drivers/gpu/drm/radeon/radeon_fence.c > +++ b/drivers/gpu/drm/radeon/radeon_fence.c > @@ -31,9 +31,9 @@ > #include > #include > #include > -#include > #include > #include > +#include > #include > #include "radeon_reg.h" > #include "radeon.h" > @@ -767,8 +767,21 @@ int radeon_fence_driver_start_ring(struct radeon_dev= ice *rdev, int ring) > = > radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); > if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev= ->ring[ring])) { > - rdev->fence_drv[ring].scratch_reg =3D 0; > - index =3D R600_WB_EVENT_OFFSET + ring * 4; > + if (ring !=3D R600_RING_TYPE_UVD_INDEX) { > + rdev->fence_drv[ring].scratch_reg =3D 0; > + index =3D R600_WB_EVENT_OFFSET + ring * 4; > + rdev->fence_drv[ring].cpu_addr =3D &rdev->wb.wb[index/4]; > + rdev->fence_drv[ring].gpu_addr =3D rdev->wb.gpu_addr + > + index; > + > + } else { > + /* put fence directly behind firmware */ > + rdev->fence_drv[ring].cpu_addr =3D rdev->uvd.cpu_addr + > + rdev->uvd_fw->size; > + rdev->fence_drv[ring].gpu_addr =3D rdev->uvd.gpu_addr + > + rdev->uvd_fw->size; > + } > + > } else { > r =3D radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg); > if (r) { > @@ -778,9 +791,9 @@ int radeon_fence_driver_start_ring(struct radeon_devi= ce *rdev, int ring) > index =3D RADEON_WB_SCRATCH_OFFSET + > rdev->fence_drv[ring].scratch_reg - > rdev->scratch.reg_base; > + rdev->fence_drv[ring].cpu_addr =3D &rdev->wb.wb[index/4]; > + rdev->fence_drv[ring].gpu_addr =3D rdev->wb.gpu_addr + index; > } > - rdev->fence_drv[ring].cpu_addr =3D &rdev->wb.wb[index/4]; > - rdev->fence_drv[ring].gpu_addr =3D rdev->wb.gpu_addr + index; > radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq)= , ring); > rdev->fence_drv[ring].initialized =3D true; > dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and= cpu addr 0x%p\n", > diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon= /radeon_kms.c > index c75cb2c..3019759 100644 > --- a/drivers/gpu/drm/radeon/radeon_kms.c > +++ b/drivers/gpu/drm/radeon/radeon_kms.c > @@ -513,6 +513,7 @@ void radeon_driver_preclose_kms(struct drm_device *de= v, > rdev->hyperz_filp =3D NULL; > if (rdev->cmask_filp =3D=3D file_priv) > rdev->cmask_filp =3D NULL; > + radeon_uvd_free_handles(rdev, file_priv); > } > = > /* > diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/rad= eon/radeon_object.c > index d3aface..0e34446 100644 > --- a/drivers/gpu/drm/radeon/radeon_object.c > +++ b/drivers/gpu/drm/radeon/radeon_object.c > @@ -339,14 +339,14 @@ void radeon_bo_fini(struct radeon_device *rdev) > void radeon_bo_list_add_object(struct radeon_bo_list *lobj, > struct list_head *head) > { > - if (lobj->wdomain) { > + if (lobj->written) { > list_add(&lobj->tv.head, head); > } else { > list_add_tail(&lobj->tv.head, head); > } > } > = > -int radeon_bo_list_validate(struct list_head *head) > +int radeon_bo_list_validate(struct list_head *head, int ring) > { > struct radeon_bo_list *lobj; > struct radeon_bo *bo; > @@ -360,15 +360,17 @@ int radeon_bo_list_validate(struct list_head *head) > list_for_each_entry(lobj, head, tv.head) { > bo =3D lobj->bo; > if (!bo->pin_count) { > - domain =3D lobj->wdomain ? lobj->wdomain : lobj->rdomain; > + domain =3D lobj->domain; > = > retry: > radeon_ttm_placement_from_domain(bo, domain); > + if (ring =3D=3D R600_RING_TYPE_UVD_INDEX) > + radeon_uvd_force_into_uvd_segment(bo); > r =3D ttm_bo_validate(&bo->tbo, &bo->placement, > true, false); > if (unlikely(r)) { > - if (r !=3D -ERESTARTSYS && domain =3D=3D RADEON_GEM_DOMAIN_VRAM) { > - domain |=3D RADEON_GEM_DOMAIN_GTT; > + if (r !=3D -ERESTARTSYS && domain !=3D lobj->alt_domain) { > + domain =3D lobj->alt_domain; > goto retry; > } > return r; > diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/rad= eon/radeon_object.h > index 5fc86b0..e2cb80a 100644 > --- a/drivers/gpu/drm/radeon/radeon_object.h > +++ b/drivers/gpu/drm/radeon/radeon_object.h > @@ -128,7 +128,7 @@ extern int radeon_bo_init(struct radeon_device *rdev); > extern void radeon_bo_fini(struct radeon_device *rdev); > extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj, > struct list_head *head); > -extern int radeon_bo_list_validate(struct list_head *head); > +extern int radeon_bo_list_validate(struct list_head *head, int ring); > extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo, > struct vm_area_struct *vma); > extern int radeon_bo_set_tiling_flags(struct radeon_bo *bo, > diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeo= n/radeon_ring.c > index 8d58e26..31e47d8 100644 > --- a/drivers/gpu/drm/radeon/radeon_ring.c > +++ b/drivers/gpu/drm/radeon/radeon_ring.c > @@ -368,7 +368,7 @@ void radeon_ring_free_size(struct radeon_device *rdev= , struct radeon_ring *ring) > { > u32 rptr; > = > - if (rdev->wb.enabled) > + if (rdev->wb.enabled && ring !=3D &rdev->ring[R600_RING_TYPE_UVD_INDEX]) > rptr =3D le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]); > else > rptr =3D RREG32(ring->rptr_reg); > @@ -821,18 +821,20 @@ static int radeon_debugfs_ring_info(struct seq_file= *m, void *data) > return 0; > } > = > -static int radeon_ring_type_gfx_index =3D RADEON_RING_TYPE_GFX_INDEX; > -static int cayman_ring_type_cp1_index =3D CAYMAN_RING_TYPE_CP1_INDEX; > -static int cayman_ring_type_cp2_index =3D CAYMAN_RING_TYPE_CP2_INDEX; > -static int radeon_ring_type_dma1_index =3D R600_RING_TYPE_DMA_INDEX; > -static int radeon_ring_type_dma2_index =3D CAYMAN_RING_TYPE_DMA1_INDEX; > +static int radeon_gfx_index =3D RADEON_RING_TYPE_GFX_INDEX; > +static int cayman_cp1_index =3D CAYMAN_RING_TYPE_CP1_INDEX; > +static int cayman_cp2_index =3D CAYMAN_RING_TYPE_CP2_INDEX; > +static int radeon_dma1_index =3D R600_RING_TYPE_DMA_INDEX; > +static int radeon_dma2_index =3D CAYMAN_RING_TYPE_DMA1_INDEX; > +static int r600_uvd_index =3D R600_RING_TYPE_UVD_INDEX; > = > static struct drm_info_list radeon_debugfs_ring_info_list[] =3D { > - {"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_ring_type_gfx_= index}, > - {"radeon_ring_cp1", radeon_debugfs_ring_info, 0, &cayman_ring_type_cp1_= index}, > - {"radeon_ring_cp2", radeon_debugfs_ring_info, 0, &cayman_ring_type_cp2_= index}, > - {"radeon_ring_dma1", radeon_debugfs_ring_info, 0, &radeon_ring_type_dma= 1_index}, > - {"radeon_ring_dma2", radeon_debugfs_ring_info, 0, &radeon_ring_type_dma= 2_index}, > + {"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_gfx_index}, > + {"radeon_ring_cp1", radeon_debugfs_ring_info, 0, &cayman_cp1_index}, > + {"radeon_ring_cp2", radeon_debugfs_ring_info, 0, &cayman_cp2_index}, > + {"radeon_ring_dma1", radeon_debugfs_ring_info, 0, &radeon_dma1_index}, > + {"radeon_ring_dma2", radeon_debugfs_ring_info, 0, &radeon_dma2_index}, > + {"radeon_ring_uvd", radeon_debugfs_ring_info, 0, &r600_uvd_index}, > }; > = > static int radeon_debugfs_sa_info(struct seq_file *m, void *data) > diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeo= n/radeon_test.c > index fda09c9..bbed4af 100644 > --- a/drivers/gpu/drm/radeon/radeon_test.c > +++ b/drivers/gpu/drm/radeon/radeon_test.c > @@ -252,6 +252,36 @@ void radeon_test_moves(struct radeon_device *rdev) > radeon_do_test_moves(rdev, RADEON_TEST_COPY_BLIT); > } > = > +static int radeon_test_create_and_emit_fence(struct radeon_device *rdev, > + struct radeon_ring *ring, > + struct radeon_fence **fence) > +{ > + int r; > + > + if (ring->idx =3D=3D R600_RING_TYPE_UVD_INDEX) { > + r =3D radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL); > + if (r) { > + DRM_ERROR("Failed to get dummy create msg\n"); > + return r; > + } > + > + r =3D radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, fence); > + if (r) { > + DRM_ERROR("Failed to get dummy destroy msg\n"); > + return r; > + } > + } else { > + r =3D radeon_ring_lock(rdev, ring, 64); > + if (r) { > + DRM_ERROR("Failed to lock ring A %d\n", ring->idx); > + return r; > + } > + radeon_fence_emit(rdev, fence, ring->idx); > + radeon_ring_unlock_commit(rdev, ring); > + } > + return 0; > +} > + > void radeon_test_ring_sync(struct radeon_device *rdev, > struct radeon_ring *ringA, > struct radeon_ring *ringB) > @@ -272,21 +302,24 @@ void radeon_test_ring_sync(struct radeon_device *rd= ev, > goto out_cleanup; > } > radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); > - r =3D radeon_fence_emit(rdev, &fence1, ringA->idx); > - if (r) { > - DRM_ERROR("Failed to emit fence 1\n"); > - radeon_ring_unlock_undo(rdev, ringA); > + radeon_ring_unlock_commit(rdev, ringA); > + > + r =3D radeon_test_create_and_emit_fence(rdev, ringA, &fence1); > + if (r) > goto out_cleanup; > - } > - radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); > - r =3D radeon_fence_emit(rdev, &fence2, ringA->idx); > + > + r =3D radeon_ring_lock(rdev, ringA, 64); > if (r) { > - DRM_ERROR("Failed to emit fence 2\n"); > - radeon_ring_unlock_undo(rdev, ringA); > + DRM_ERROR("Failed to lock ring A %d\n", ringA->idx); > goto out_cleanup; > } > + radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); > radeon_ring_unlock_commit(rdev, ringA); > = > + r =3D radeon_test_create_and_emit_fence(rdev, ringA, &fence2); > + if (r) > + goto out_cleanup; > + > mdelay(1000); > = > if (radeon_fence_signaled(fence1)) { > @@ -364,27 +397,22 @@ static void radeon_test_ring_sync2(struct radeon_de= vice *rdev, > goto out_cleanup; > } > radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); > - r =3D radeon_fence_emit(rdev, &fenceA, ringA->idx); > - if (r) { > - DRM_ERROR("Failed to emit sync fence 1\n"); > - radeon_ring_unlock_undo(rdev, ringA); > - goto out_cleanup; > - } > radeon_ring_unlock_commit(rdev, ringA); > = > + r =3D radeon_test_create_and_emit_fence(rdev, ringA, &fenceA); > + if (r) > + goto out_cleanup; > + > r =3D radeon_ring_lock(rdev, ringB, 64); > if (r) { > DRM_ERROR("Failed to lock ring B %d\n", ringB->idx); > goto out_cleanup; > } > radeon_semaphore_emit_wait(rdev, ringB->idx, semaphore); > - r =3D radeon_fence_emit(rdev, &fenceB, ringB->idx); > - if (r) { > - DRM_ERROR("Failed to create sync fence 2\n"); > - radeon_ring_unlock_undo(rdev, ringB); > - goto out_cleanup; > - } > radeon_ring_unlock_commit(rdev, ringB); > + r =3D radeon_test_create_and_emit_fence(rdev, ringB, &fenceB); > + if (r) > + goto out_cleanup; > = > mdelay(1000); > = > @@ -393,7 +421,7 @@ static void radeon_test_ring_sync2(struct radeon_devi= ce *rdev, > goto out_cleanup; > } > if (radeon_fence_signaled(fenceB)) { > - DRM_ERROR("Fence A signaled without waiting for semaphore.\n"); > + DRM_ERROR("Fence B signaled without waiting for semaphore.\n"); > goto out_cleanup; > } > = > diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon= /radeon_uvd.c > new file mode 100644 > index 0000000..8ab7bb9 > --- /dev/null > +++ b/drivers/gpu/drm/radeon/radeon_uvd.c > @@ -0,0 +1,521 @@ > +/* > + * Copyright 2011 Advanced Micro Devices, Inc. > + * All Rights Reserved. > + * > + * Permission is hereby granted, free of charge, to any person obtaining= a > + * copy of this software and associated documentation files (the > + * "Software"), to deal in the Software without restriction, including > + * without limitation the rights to use, copy, modify, merge, publish, > + * distribute, sub license, and/or sell copies of the Software, and to > + * permit persons to whom the Software is furnished to do so, subject to > + * the following conditions: > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRE= SS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILI= TY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SH= ALL > + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY= CLAIM, > + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR > + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR= THE > + * USE OR OTHER DEALINGS IN THE SOFTWARE. > + * > + * The above copyright notice and this permission notice (including the > + * next paragraph) shall be included in all copies or substantial portio= ns > + * of the Software. > + * > + */ > +/* > + * Authors: > + * Christian K=F6nig > + */ > + > +#include > +#include > +#include > +#include > + > +#include "radeon.h" > +#include "r600d.h" > + > +/* Firmware Names */ > +#define FIRMWARE_RV770 "radeon/RV770_uvd.bin" > +#define FIRMWARE_RV710 "radeon/RV710_uvd.bin" > +#define FIRMWARE_CYPRESS "radeon/CYPRESS_uvd.bin" > +#define FIRMWARE_SUMO "radeon/SUMO_uvd.bin" > +#define FIRMWARE_TAHITI "radeon/TAHITI_uvd.bin" > + > +MODULE_FIRMWARE(FIRMWARE_RV770); > +MODULE_FIRMWARE(FIRMWARE_RV710); > +MODULE_FIRMWARE(FIRMWARE_CYPRESS); > +MODULE_FIRMWARE(FIRMWARE_SUMO); > +MODULE_FIRMWARE(FIRMWARE_TAHITI); > + > +int radeon_uvd_init(struct radeon_device *rdev) > +{ > + struct platform_device *pdev; > + unsigned long bo_size; > + const char *fw_name; > + int i, r; > + > + pdev =3D platform_device_register_simple("radeon_uvd", 0, NULL, 0); > + r =3D IS_ERR(pdev); > + if (r) { > + dev_err(rdev->dev, "radeon_uvd: Failed to register firmware\n"); > + return -EINVAL; > + } > + > + switch (rdev->family) { > + case CHIP_RV770: > + fw_name =3D FIRMWARE_RV770; > + break; > + > + case CHIP_RV710: > + case CHIP_RV730: > + case CHIP_RV740: > + fw_name =3D FIRMWARE_RV710; > + break; > + > + case CHIP_CYPRESS: > + case CHIP_JUNIPER: > + case CHIP_REDWOOD: > + case CHIP_CEDAR: > + fw_name =3D FIRMWARE_CYPRESS; > + break; > + > + case CHIP_SUMO: > + case CHIP_SUMO2: > + case CHIP_PALM: > + case CHIP_CAYMAN: > + case CHIP_BARTS: > + case CHIP_TURKS: > + case CHIP_CAICOS: > + fw_name =3D FIRMWARE_SUMO; > + break; > + > + case CHIP_TAHITI: > + case CHIP_VERDE: > + case CHIP_PITCAIRN: > + case CHIP_ARUBA: > + fw_name =3D FIRMWARE_TAHITI; > + break; > + > + default: > + return -EINVAL; > + } > + > + r =3D request_firmware(&rdev->uvd_fw, fw_name, &pdev->dev); > + if (r) { > + dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n", > + fw_name); > + platform_device_unregister(pdev); > + return r; > + } > + > + platform_device_unregister(pdev); > + > + bo_size =3D RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) + > + RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE; > + r =3D radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, > + RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->uvd.vcpu_bo); > + if (r) { > + dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r); > + return r; > + } > + > + r =3D radeon_uvd_resume(rdev); > + if (r) > + return r; > + > + memset(rdev->uvd.cpu_addr, 0, bo_size); > + memcpy(rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size); > + > + r =3D radeon_uvd_suspend(rdev); > + if (r) > + return r; > + > + for (i =3D 0; i < RADEON_MAX_UVD_HANDLES; ++i) { > + atomic_set(&rdev->uvd.handles[i], 0); > + rdev->uvd.filp[i] =3D NULL; > + } > + > + return 0; > +} > + > +void radeon_uvd_fini(struct radeon_device *rdev) > +{ > + radeon_uvd_suspend(rdev); > + radeon_bo_unref(&rdev->uvd.vcpu_bo); > +} > + > +int radeon_uvd_suspend(struct radeon_device *rdev) > +{ > + int r; > + > + if (rdev->uvd.vcpu_bo =3D=3D NULL) > + return 0; > + > + r =3D radeon_bo_reserve(rdev->uvd.vcpu_bo, false); > + if (!r) { > + radeon_bo_kunmap(rdev->uvd.vcpu_bo); > + radeon_bo_unpin(rdev->uvd.vcpu_bo); > + radeon_bo_unreserve(rdev->uvd.vcpu_bo); > + } > + return r; > +} > + > +int radeon_uvd_resume(struct radeon_device *rdev) > +{ > + int r; > + > + if (rdev->uvd.vcpu_bo =3D=3D NULL) > + return -EINVAL; > + > + r =3D radeon_bo_reserve(rdev->uvd.vcpu_bo, false); > + if (r) { > + radeon_bo_unref(&rdev->uvd.vcpu_bo); > + dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r); > + return r; > + } > + > + r =3D radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM, > + &rdev->uvd.gpu_addr); > + if (r) { > + radeon_bo_unreserve(rdev->uvd.vcpu_bo); > + radeon_bo_unref(&rdev->uvd.vcpu_bo); > + dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r); > + return r; > + } > + > + r =3D radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr); > + if (r) { > + dev_err(rdev->dev, "(%d) UVD map failed\n", r); > + return r; > + } > + > + radeon_bo_unreserve(rdev->uvd.vcpu_bo); > + > + return 0; > +} > + > +void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo) > +{ > + rbo->placement.fpfn =3D 0 >> PAGE_SHIFT; > + rbo->placement.lpfn =3D (256 * 1024 * 1024) >> PAGE_SHIFT; > +} > + > +void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file= *filp) > +{ > + int i, r; > + for (i =3D 0; i < RADEON_MAX_UVD_HANDLES; ++i) { > + if (rdev->uvd.filp[i] =3D=3D filp) { > + uint32_t handle =3D atomic_read(&rdev->uvd.handles[i]); > + struct radeon_fence *fence; > + > + r =3D radeon_uvd_get_destroy_msg(rdev, > + R600_RING_TYPE_UVD_INDEX, handle, &fence); > + if (r) { > + DRM_ERROR("Error destroying UVD (%d)!\n", r); > + continue; > + } > + > + radeon_fence_wait(fence, false); > + radeon_fence_unref(&fence); > + > + rdev->uvd.filp[i] =3D NULL; > + atomic_set(&rdev->uvd.handles[i], 0); > + } > + } > +} > + > +static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_b= o *msg) > +{ > + uint32_t *map, msg_type, handle; > + int i, r; > + > + r =3D radeon_bo_kmap(msg, (void **)&map); > + if (r) > + return r; > + > + msg_type =3D map[1]; > + handle =3D map[2]; > + > + radeon_bo_kunmap(msg); > + > + if (handle =3D=3D 0) { > + DRM_ERROR("Invalid UVD handle!\n"); > + return -EINVAL; > + } > + > + if (msg_type =3D=3D 2) { > + /* it's a destroy msg, free the handle */ > + for (i =3D 0; i < RADEON_MAX_UVD_HANDLES; ++i) > + atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0); > + return 0; > + } > + > + /* create or decode, validate the handle */ > + for (i =3D 0; i < RADEON_MAX_UVD_HANDLES; ++i) { > + if (atomic_read(&p->rdev->uvd.handles[i]) =3D=3D handle) > + return 0; > + } > + /* handle not found try to alloc a new one */ > + for (i =3D 0; i < RADEON_MAX_UVD_HANDLES; ++i) { > + if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) { > + p->rdev->uvd.filp[i] =3D p->filp; > + return 0; > + } > + } > + DRM_ERROR("No more free UVD handles!\n"); > + return -EINVAL; > +} > + > +static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p, int data0, in= t data1) > +{ > + struct radeon_cs_chunk *relocs_chunk; > + struct radeon_cs_reloc *reloc; > + unsigned idx, cmd; > + uint64_t start, end; > + > + relocs_chunk =3D &p->chunks[p->chunk_relocs_idx]; > + idx =3D radeon_get_ib_value(p, data1); > + if (idx >=3D relocs_chunk->length_dw) { > + DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", > + idx, relocs_chunk->length_dw); > + return -EINVAL; > + } > + > + reloc =3D p->relocs_ptr[(idx / 4)]; > + start =3D reloc->lobj.gpu_offset; > + end =3D start + radeon_bo_size(reloc->robj); > + start +=3D radeon_get_ib_value(p, data0); I am assuming there is no way for you to know the size that the uvd engine = will write to ? You are not checking anything on uvd possibly overwritting after the bo end. > + > + p->ib.ptr[data0] =3D start & 0xFFFFFFFF; > + p->ib.ptr[data1] =3D start >> 32; > + > + > + cmd =3D radeon_get_ib_value(p, p->idx); > + if (cmd =3D=3D 0) { > + if (end & 0xFFFFFFFFF0000000) { > + DRM_ERROR("msg buffer %LX-%LX out of 256MB segment!\n", > + start, end); > + return -EINVAL; > + } > + > + return radeon_uvd_cs_msg(p, reloc->robj); > + > + } > + > + if ((start & 0xFFFFFFFFF0000000) !=3D (end & 0xFFFFFFFFF0000000)) { > + DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n", > + start, end); > + return -EINVAL; > + } > + return 0; > +} > + > +int radeon_uvd_cs_parse(struct radeon_cs_parser *p) > +{ > + struct radeon_cs_packet pkt; > + int i, r, data0 =3D 0, data1 =3D 0; > + > + if (p->chunks[p->chunk_ib_idx].length_dw % 16) { > + DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n", > + p->chunks[p->chunk_ib_idx].length_dw); > + return -EINVAL; > + } > + > + if (p->chunk_relocs_idx =3D=3D -1) { > + DRM_ERROR("No relocation chunk !\n"); > + return -EINVAL; > + } > + > + > + do { > + r =3D radeon_cs_packet_parse(p, &pkt, p->idx); > + if (r) > + return r; > + switch (pkt.type) { > + case RADEON_PACKET_TYPE0: > + p->idx++; > + for (i =3D 0; i <=3D pkt.count; ++i) { > + switch (pkt.reg + i*4) { > + case UVD_GPCOM_VCPU_DATA0: > + data0 =3D p->idx; > + break; > + case UVD_GPCOM_VCPU_DATA1: > + data1 =3D p->idx; > + break; > + case UVD_GPCOM_VCPU_CMD: > + r =3D radeon_uvd_cs_reloc(p, data0, > + data1); > + if (r) > + return r; > + break; > + case UVD_ENGINE_CNTL: > + break; > + default: > + DRM_ERROR("Invalid reg 0x%X!\n", > + pkt.reg + i*4); > + return -EINVAL; > + } > + p->idx++; > + } > + break; > + case RADEON_PACKET_TYPE2: > + p->idx +=3D pkt.count + 2; > + break; > + default: > + DRM_ERROR("Unknown packet type %d !\n", pkt.type); > + return -EINVAL; > + } > + } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); > + return 0; > +} > + > +static int radeon_uvd_send_msg(struct radeon_device *rdev, > + int ring, struct radeon_bo *bo, > + struct radeon_fence **fence) > +{ > + struct ttm_validate_buffer tv; > + struct list_head head; > + struct radeon_ib ib; > + uint64_t addr; > + int i, r; > + > + memset(&tv, 0, sizeof(tv)); > + tv.bo =3D &bo->tbo; > + > + INIT_LIST_HEAD(&head); > + list_add(&tv.head, &head); > + > + r =3D ttm_eu_reserve_buffers(&head); > + if (r) > + return r; > + > + radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_VRAM); > + radeon_uvd_force_into_uvd_segment(bo); > + > + r =3D ttm_bo_validate(&bo->tbo, &bo->placement, true, false); > + if (r) { > + ttm_eu_backoff_reservation(&head); > + return r; > + } > + > + r =3D radeon_ib_get(rdev, ring, &ib, NULL, 16); > + if (r) { > + ttm_eu_backoff_reservation(&head); > + return r; > + } > + > + addr =3D radeon_bo_gpu_offset(bo); > + ib.ptr[0] =3D PACKET0(UVD_GPCOM_VCPU_DATA0, 0); > + ib.ptr[1] =3D addr; > + ib.ptr[2] =3D PACKET0(UVD_GPCOM_VCPU_DATA1, 0); > + ib.ptr[3] =3D addr >> 32; > + ib.ptr[4] =3D PACKET0(UVD_GPCOM_VCPU_CMD, 0); > + ib.ptr[5] =3D 0; > + for (i =3D 6; i < 16; ++i) > + ib.ptr[i] =3D PACKET2(0); > + ib.length_dw =3D 16; > + > + r =3D radeon_ib_schedule(rdev, &ib, NULL); > + if (r) { > + ttm_eu_backoff_reservation(&head); > + return r; > + } > + ttm_eu_fence_buffer_objects(&head, ib.fence); > + > + if (fence) > + *fence =3D radeon_fence_ref(ib.fence); > + > + radeon_ib_free(rdev, &ib); > + radeon_bo_unref(&bo); > + return 0; > +} > + > +/* multiple fence commands without any stream commands in between can > + crash the vcpu so just try to emmit a dummy create/destroy msg to > + avoid this */ > +int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, > + uint32_t handle, struct radeon_fence **fence) > +{ > + struct radeon_bo *bo; > + uint32_t *msg; > + int r, i; > + > + r =3D radeon_bo_create(rdev, 1024, PAGE_SIZE, true, > + RADEON_GEM_DOMAIN_VRAM, NULL, &bo); > + if (r) > + return r; > + > + r =3D radeon_bo_reserve(bo, false); > + if (r) { > + radeon_bo_unref(&bo); > + return r; > + } > + > + r =3D radeon_bo_kmap(bo, (void **)&msg); > + if (r) { > + radeon_bo_unreserve(bo); > + radeon_bo_unref(&bo); > + return r; > + } > + > + /* stitch together an UVD create msg */ > + msg[0] =3D 0x00000de4; > + msg[1] =3D 0x00000000; > + msg[2] =3D handle; > + msg[3] =3D 0x00000000; > + msg[4] =3D 0x00000000; > + msg[5] =3D 0x00000000; > + msg[6] =3D 0x00000000; > + msg[7] =3D 0x00000780; > + msg[8] =3D 0x00000440; > + msg[9] =3D 0x00000000; > + msg[10] =3D 0x01b37000; > + for (i =3D 11; i < 1024; ++i) > + msg[i] =3D 0x0; > + > + radeon_bo_kunmap(bo); > + radeon_bo_unreserve(bo); > + > + return radeon_uvd_send_msg(rdev, ring, bo, fence); > +} > + > +int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, > + uint32_t handle, struct radeon_fence **fence) > +{ > + struct radeon_bo *bo; > + uint32_t *msg; > + int r, i; > + > + r =3D radeon_bo_create(rdev, 1024, PAGE_SIZE, true, > + RADEON_GEM_DOMAIN_VRAM, NULL, &bo); > + if (r) > + return r; > + > + r =3D radeon_bo_reserve(bo, false); > + if (r) { > + radeon_bo_unref(&bo); > + return r; > + } > + > + r =3D radeon_bo_kmap(bo, (void **)&msg); > + if (r) { > + radeon_bo_unreserve(bo); > + radeon_bo_unref(&bo); > + return r; > + } > + > + /* stitch together an UVD destroy msg */ > + msg[0] =3D 0x00000de4; > + msg[1] =3D 0x00000002; > + msg[2] =3D handle; > + msg[3] =3D 0x00000000; > + for (i =3D 4; i < 1024; ++i) > + msg[i] =3D 0x0; > + > + radeon_bo_kunmap(bo); > + radeon_bo_unreserve(bo); > + > + return radeon_uvd_send_msg(rdev, ring, bo, fence); > +} > diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv77= 0.c > index d63fe1d..5a78cce 100644 > --- a/drivers/gpu/drm/radeon/rv770.c > +++ b/drivers/gpu/drm/radeon/rv770.c > @@ -68,6 +68,107 @@ u32 rv770_get_xclk(struct radeon_device *rdev) > return reference_clock; > } > = > +int rv770_uvd_resume(struct radeon_device *rdev) > +{ > + uint64_t addr; > + uint32_t chip_id, size; > + int r; > + > + r =3D radeon_uvd_resume(rdev); > + if (r) > + return r; > + > + /* programm the VCPU memory controller bits 0-27 */ > + addr =3D rdev->uvd.gpu_addr >> 3; > + size =3D RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3; > + WREG32(UVD_VCPU_CACHE_OFFSET0, addr); > + WREG32(UVD_VCPU_CACHE_SIZE0, size); > + > + addr +=3D size; > + size =3D RADEON_UVD_STACK_SIZE >> 3; > + WREG32(UVD_VCPU_CACHE_OFFSET1, addr); > + WREG32(UVD_VCPU_CACHE_SIZE1, size); > + > + addr +=3D size; > + size =3D RADEON_UVD_HEAP_SIZE >> 3; > + WREG32(UVD_VCPU_CACHE_OFFSET2, addr); > + WREG32(UVD_VCPU_CACHE_SIZE2, size); > + > + /* bits 28-31 */ > + addr =3D (rdev->uvd.gpu_addr >> 28) & 0xF; > + WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0)); > + > + /* bits 32-39 */ > + addr =3D (rdev->uvd.gpu_addr >> 32) & 0xFF; > + WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31)); > + > + /* tell firmware which hardware it is running on */ > + switch (rdev->family) { > + default: > + return -EINVAL; > + case CHIP_RV770: > + chip_id =3D 0x01000004; > + break; > + case CHIP_RV710: > + chip_id =3D 0x01000005; > + break; > + case CHIP_RV730: > + chip_id =3D 0x01000006; > + break; > + case CHIP_RV740: > + chip_id =3D 0x01000007; > + break; > + case CHIP_CYPRESS: > + chip_id =3D 0x01000008; > + break; > + case CHIP_JUNIPER: > + chip_id =3D 0x01000009; > + break; > + case CHIP_REDWOOD: > + chip_id =3D 0x0100000a; > + break; > + case CHIP_CEDAR: > + chip_id =3D 0x0100000b; > + break; > + case CHIP_SUMO: > + chip_id =3D 0x0100000c; > + break; > + case CHIP_SUMO2: > + chip_id =3D 0x0100000d; > + break; > + case CHIP_PALM: > + chip_id =3D 0x0100000e; > + break; > + case CHIP_CAYMAN: > + chip_id =3D 0x0100000f; > + break; > + case CHIP_BARTS: > + chip_id =3D 0x01000010; > + break; > + case CHIP_TURKS: > + chip_id =3D 0x01000011; > + break; > + case CHIP_CAICOS: > + chip_id =3D 0x01000012; > + break; > + case CHIP_TAHITI: > + chip_id =3D 0x01000014; > + break; > + case CHIP_VERDE: > + chip_id =3D 0x01000015; > + break; > + case CHIP_PITCAIRN: > + chip_id =3D 0x01000016; > + break; > + case CHIP_ARUBA: > + chip_id =3D 0x01000017; > + break; > + } > + WREG32(UVD_VCPU_CHIP_ID, chip_id); > + > + return 0; > +} > + > u32 rv770_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_ba= se) > { > struct radeon_crtc *radeon_crtc =3D rdev->mode_info.crtcs[crtc_id]; > @@ -1040,6 +1141,17 @@ static int rv770_startup(struct radeon_device *rde= v) > return r; > } > = > + r =3D rv770_uvd_resume(rdev); > + if (!r) { > + r =3D radeon_fence_driver_start_ring(rdev, > + R600_RING_TYPE_UVD_INDEX); > + if (r) > + dev_err(rdev->dev, "UVD fences init error (%d).\n", r); > + } > + > + if (r) > + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size =3D 0; > + > /* Enable IRQ */ > r =3D r600_irq_init(rdev); > if (r) { > @@ -1074,6 +1186,19 @@ static int rv770_startup(struct radeon_device *rde= v) > if (r) > return r; > = > + ring =3D &rdev->ring[R600_RING_TYPE_UVD_INDEX]; > + if (ring->ring_size) { > + r =3D radeon_ring_init(rdev, ring, ring->ring_size, > + R600_WB_UVD_RPTR_OFFSET, > + UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, > + 0, 0xfffff, RADEON_CP_PACKET2); > + if (!r) > + r =3D r600_uvd_init(rdev); > + > + if (r) > + DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); > + } > + > r =3D radeon_ib_pool_init(rdev); > if (r) { > dev_err(rdev->dev, "IB initialization failed (%d).\n", r); > @@ -1115,6 +1240,7 @@ int rv770_resume(struct radeon_device *rdev) > int rv770_suspend(struct radeon_device *rdev) > { > r600_audio_fini(rdev); > + radeon_uvd_suspend(rdev); > r700_cp_stop(rdev); > r600_dma_stop(rdev); > r600_irq_suspend(rdev); > @@ -1190,6 +1316,13 @@ int rv770_init(struct radeon_device *rdev) > rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj =3D NULL; > r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024); > = > + r =3D radeon_uvd_init(rdev); > + if (!r) { > + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj =3D NULL; > + r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], > + 4096); > + } > + > rdev->ih.ring_obj =3D NULL; > r600_ih_ring_init(rdev, 64 * 1024); > = > @@ -1224,6 +1357,7 @@ void rv770_fini(struct radeon_device *rdev) > radeon_ib_pool_fini(rdev); > radeon_irq_kms_fini(rdev); > rv770_pcie_gart_fini(rdev); > + radeon_uvd_fini(rdev); > r600_vram_scratch_fini(rdev); > radeon_gem_fini(rdev); > radeon_fence_driver_fini(rdev); > diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv7= 70d.h > index c55f950..da158b54 100644 > --- a/drivers/gpu/drm/radeon/rv770d.h > +++ b/drivers/gpu/drm/radeon/rv770d.h > @@ -671,4 +671,18 @@ > # define TARGET_LINK_SPEED_MASK (0xf << 0) > # define SELECTABLE_DEEMPHASIS (1 << 6) > = > +/* UVD */ > +#define UVD_LMI_EXT40_ADDR 0xf498 > +#define UVD_VCPU_CHIP_ID 0xf4d4 > +#define UVD_VCPU_CACHE_OFFSET0 0xf4d8 > +#define UVD_VCPU_CACHE_SIZE0 0xf4dc > +#define UVD_VCPU_CACHE_OFFSET1 0xf4e0 > +#define UVD_VCPU_CACHE_SIZE1 0xf4e4 > +#define UVD_VCPU_CACHE_OFFSET2 0xf4e8 > +#define UVD_VCPU_CACHE_SIZE2 0xf4ec > +#define UVD_LMI_ADDR_EXT 0xf594 > + > +#define UVD_RBC_RB_RPTR 0xf690 > +#define UVD_RBC_RB_WPTR 0xf694 > + > #endif > diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c > index bafbe32..cc9fe39 100644 > --- a/drivers/gpu/drm/radeon/si.c > +++ b/drivers/gpu/drm/radeon/si.c > @@ -4372,6 +4372,16 @@ static int si_startup(struct radeon_device *rdev) > return r; > } > = > + r =3D rv770_uvd_resume(rdev); > + if (!r) { > + r =3D radeon_fence_driver_start_ring(rdev, > + R600_RING_TYPE_UVD_INDEX); > + if (r) > + dev_err(rdev->dev, "UVD fences init error (%d).\n", r); > + } > + if (r) > + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size =3D 0; > + > /* Enable IRQ */ > r =3D si_irq_init(rdev); > if (r) { > @@ -4429,6 +4439,18 @@ static int si_startup(struct radeon_device *rdev) > if (r) > return r; > = > + ring =3D &rdev->ring[R600_RING_TYPE_UVD_INDEX]; > + if (ring->ring_size) { > + r =3D radeon_ring_init(rdev, ring, ring->ring_size, > + R600_WB_UVD_RPTR_OFFSET, > + UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR, > + 0, 0xfffff, RADEON_CP_PACKET2); > + if (!r) > + r =3D r600_uvd_init(rdev); > + if (r) > + DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); > + } > + > r =3D radeon_ib_pool_init(rdev); > if (r) { > dev_err(rdev->dev, "IB initialization failed (%d).\n", r); > @@ -4472,6 +4494,8 @@ int si_suspend(struct radeon_device *rdev) > radeon_vm_manager_fini(rdev); > si_cp_enable(rdev, false); > cayman_dma_stop(rdev); > + r600_uvd_rbc_stop(rdev); > + radeon_uvd_suspend(rdev); > si_irq_suspend(rdev); > radeon_wb_disable(rdev); > si_pcie_gart_disable(rdev); > @@ -4557,6 +4581,13 @@ int si_init(struct radeon_device *rdev) > ring->ring_obj =3D NULL; > r600_ring_init(rdev, ring, 64 * 1024); > = > + r =3D radeon_uvd_init(rdev); > + if (!r) { > + ring =3D &rdev->ring[R600_RING_TYPE_UVD_INDEX]; > + ring->ring_obj =3D NULL; > + r600_ring_init(rdev, ring, 4096); > + } > + > rdev->ih.ring_obj =3D NULL; > r600_ih_ring_init(rdev, 64 * 1024); > = > @@ -4605,6 +4636,7 @@ void si_fini(struct radeon_device *rdev) > radeon_vm_manager_fini(rdev); > radeon_ib_pool_fini(rdev); > radeon_irq_kms_fini(rdev); > + radeon_uvd_fini(rdev); > si_pcie_gart_fini(rdev); > r600_vram_scratch_fini(rdev); > radeon_gem_fini(rdev); > diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h > index 23fc08f..759f682 100644 > --- a/drivers/gpu/drm/radeon/sid.h > +++ b/drivers/gpu/drm/radeon/sid.h > @@ -798,6 +798,12 @@ > # define THREAD_TRACE_FINISH (55 << 0) > = > /* > + * UVD > + */ > +#define UVD_RBC_RB_RPTR 0xF690 > +#define UVD_RBC_RB_WPTR 0xF694 > + > +/* > * PM4 > */ > #define PACKET0(reg, n) ((RADEON_PACKET_TYPE0 << 30) | \ > diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h > index eeda917..cd085d1 100644 > --- a/include/uapi/drm/radeon_drm.h > +++ b/include/uapi/drm/radeon_drm.h > @@ -918,6 +918,7 @@ struct drm_radeon_gem_va { > #define RADEON_CS_RING_GFX 0 > #define RADEON_CS_RING_COMPUTE 1 > #define RADEON_CS_RING_DMA 2 > +#define RADEON_CS_RING_UVD 3 > /* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the pr= iority */ > /* 0 =3D normal, + =3D higher priority, - =3D lower priority */ > = > -- = > 1.7.9.5 > = Cheers, Jerome