* [PATCH 2/4] drm/radeon/kms: add evergreen/cayman CS parser for async DMA
2012-12-13 17:57 [PATCH 1/4] drm/radeon/kms: add 6xx/7xx CS parser for async DMA alexdeucher
@ 2012-12-13 17:57 ` alexdeucher
2012-12-13 17:57 ` [PATCH 3/4] drm/radeon: add VM CS parser support for async DMA on cayman/TN/SI alexdeucher
` (3 subsequent siblings)
4 siblings, 0 replies; 9+ messages in thread
From: alexdeucher @ 2012-12-13 17:57 UTC (permalink / raw)
To: airlied, dri-devel; +Cc: Alex Deucher
From: Alex Deucher <alexander.deucher@amd.com>
Allows us to use the DMA ring from userspace.
DMA doesn't have a good NOP packet in which to embed the
reloc idx, so userspace has to add a reloc for each
buffer used and order them to match the command stream.
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/radeon/evergreen_cs.c | 449 +++++++++++++++++++++++++++++++++
drivers/gpu/drm/radeon/radeon_asic.c | 14 +-
drivers/gpu/drm/radeon/radeon_asic.h | 1 +
3 files changed, 457 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index 62c2271..adfc66f 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -34,6 +34,8 @@
#define MAX(a,b) (((a)>(b))?(a):(b))
#define MIN(a,b) (((a)<(b))?(a):(b))
+int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
+ struct radeon_cs_reloc **cs_reloc);
static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p,
struct radeon_cs_reloc **cs_reloc);
@@ -2804,6 +2806,453 @@ int evergreen_cs_parse(struct radeon_cs_parser *p)
return 0;
}
+/*
+ * DMA
+ */
+
+#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
+#define GET_DMA_COUNT(h) ((h) & 0x000fffff)
+#define GET_DMA_T(h) (((h) & 0x00800000) >> 23)
+#define GET_DMA_NEW(h) (((h) & 0x04000000) >> 26)
+#define GET_DMA_MISC(h) (((h) & 0x0700000) >> 20)
+
+/**
+ * evergreen_dma_cs_parse() - parse the DMA IB
+ * @p: parser structure holding parsing context.
+ *
+ * Parses the DMA IB from the CS ioctl and updates
+ * the GPU addresses based on the reloc information and
+ * checks for errors. (Evergreen-Cayman)
+ * Returns 0 for success and an error on failure.
+ **/
+int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
+{
+ struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
+ struct radeon_cs_reloc *src_reloc, *dst_reloc, *dst2_reloc;
+ u32 header, cmd, count, tiled, new_cmd, misc;
+ volatile u32 *ib = p->ib.ptr;
+ u32 idx, idx_value;
+ u64 src_offset, dst_offset, dst2_offset;
+ int r;
+
+ do {
+ if (p->idx >= ib_chunk->length_dw) {
+ DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
+ p->idx, ib_chunk->length_dw);
+ return -EINVAL;
+ }
+ idx = p->idx;
+ header = radeon_get_ib_value(p, idx);
+ cmd = GET_DMA_CMD(header);
+ count = GET_DMA_COUNT(header);
+ tiled = GET_DMA_T(header);
+ new_cmd = GET_DMA_NEW(header);
+ misc = GET_DMA_MISC(header);
+
+ switch (cmd) {
+ case DMA_PACKET_WRITE:
+ r = r600_dma_cs_next_reloc(p, &dst_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_WRITE\n");
+ return -EINVAL;
+ }
+ if (tiled) {
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+ p->idx += count + 7;
+ } else {
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32;
+ p->idx += count + 3;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
+ dst_offset, radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ break;
+ case DMA_PACKET_COPY:
+ r = r600_dma_cs_next_reloc(p, &src_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ r = r600_dma_cs_next_reloc(p, &dst_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ if (tiled) {
+ idx_value = radeon_get_ib_value(p, idx + 2);
+ if (new_cmd) {
+ switch (misc) {
+ case 0:
+ /* L2T, frame to fields */
+ if (idx_value & (1 << 31)) {
+ DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ r = r600_dma_cs_next_reloc(p, &dst2_reloc);
+ if (r) {
+ DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
+ ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+ dst2_offset = ib[idx+2];
+ dst2_offset <<= 8;
+ src_offset = ib[idx+8];
+ src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n",
+ src_offset, radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
+ dst_offset, radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
+ dst2_offset, radeon_bo_size(dst2_reloc->robj));
+ return -EINVAL;
+ }
+ p->idx += 10;
+ break;
+ case 1:
+ /* L2T, T2L partial */
+ if (p->family < CHIP_CAYMAN) {
+ DRM_ERROR("L2T, T2L Partial is cayman only !\n");
+ return -EINVAL;
+ }
+ /* detile bit */
+ if (idx_value & (1 << 31)) {
+ /* tiled src, linear dst */
+ ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+
+ ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ } else {
+ /* linear src, tiled dst */
+ ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ }
+ p->idx += 12;
+ break;
+ case 3:
+ /* L2T, broadcast */
+ if (idx_value & (1 << 31)) {
+ DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ r = r600_dma_cs_next_reloc(p, &dst2_reloc);
+ if (r) {
+ DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
+ ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+ dst2_offset = ib[idx+2];
+ dst2_offset <<= 8;
+ src_offset = ib[idx+8];
+ src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
+ src_offset, radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
+ dst_offset, radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
+ dst2_offset, radeon_bo_size(dst2_reloc->robj));
+ return -EINVAL;
+ }
+ p->idx += 10;
+ break;
+ case 4:
+ /* L2T, T2L */
+ /* detile bit */
+ if (idx_value & (1 << 31)) {
+ /* tiled src, linear dst */
+ ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+ src_offset = ib[idx+1];
+ src_offset <<= 8;
+
+ ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ dst_offset = ib[idx+7];
+ dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
+ } else {
+ /* linear src, tiled dst */
+ ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ src_offset = ib[idx+7];
+ src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
+
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+ }
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n",
+ src_offset, radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n",
+ dst_offset, radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ p->idx += 9;
+ break;
+ case 5:
+ /* T2T partial */
+ if (p->family < CHIP_CAYMAN) {
+ DRM_ERROR("L2T, T2L Partial is cayman only !\n");
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+ ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ p->idx += 13;
+ break;
+ case 7:
+ /* L2T, broadcast */
+ if (idx_value & (1 << 31)) {
+ DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ r = r600_dma_cs_next_reloc(p, &dst2_reloc);
+ if (r) {
+ DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
+ ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+ dst2_offset = ib[idx+2];
+ dst2_offset <<= 8;
+ src_offset = ib[idx+8];
+ src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
+ src_offset, radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
+ dst_offset, radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
+ dst2_offset, radeon_bo_size(dst2_reloc->robj));
+ return -EINVAL;
+ }
+ p->idx += 10;
+ break;
+ default:
+ DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+ return -EINVAL;
+ }
+ } else {
+ switch (misc) {
+ case 0:
+ /* detile bit */
+ if (idx_value & (1 << 31)) {
+ /* tiled src, linear dst */
+ ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+ src_offset = ib[idx+1];
+ src_offset <<= 8;
+
+ ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ dst_offset = ib[idx+7];
+ dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
+ } else {
+ /* linear src, tiled dst */
+ ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ src_offset = ib[idx+7];
+ src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
+
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+ }
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
+ src_offset, radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
+ dst_offset, radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ p->idx += 9;
+ break;
+ default:
+ DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+ return -EINVAL;
+ }
+ }
+ } else {
+ if (new_cmd) {
+ switch (misc) {
+ case 0:
+ /* L2L, byte */
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
+ ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
+ ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ src_offset = ib[idx+2];
+ src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
+ if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n",
+ src_offset, radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n",
+ dst_offset, radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ p->idx += 5;
+ break;
+ case 1:
+ /* L2L, partial */
+ if (p->family < CHIP_CAYMAN) {
+ DRM_ERROR("L2L Partial is cayman only !\n");
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
+ ib[idx+2] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
+ ib[idx+5] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+
+ p->idx += 9;
+ break;
+ case 4:
+ /* L2L, dw, broadcast */
+ r = r600_dma_cs_next_reloc(p, &dst2_reloc);
+ if (r) {
+ DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+3] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+4] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ ib[idx+5] += upper_32_bits(dst2_reloc->lobj.gpu_offset) & 0xff;
+ ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+ dst2_offset = ib[idx+2];
+ dst2_offset |= ((u64)(ib[idx+5] & 0xff)) << 32;
+ src_offset = ib[idx+3];
+ src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n",
+ src_offset, radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n",
+ dst_offset, radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n",
+ dst2_offset, radeon_bo_size(dst2_reloc->robj));
+ return -EINVAL;
+ }
+ p->idx += 7;
+ break;
+ default:
+ DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+ return -EINVAL;
+ }
+ } else {
+ /* L2L, dw */
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ src_offset = ib[idx+2];
+ src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n",
+ src_offset, radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n",
+ dst_offset, radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ p->idx += 5;
+ }
+ }
+ break;
+ case DMA_PACKET_CONSTANT_FILL:
+ r = r600_dma_cs_next_reloc(p, &dst_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n");
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000;
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16;
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
+ dst_offset, radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ p->idx += 4;
+ break;
+ case DMA_PACKET_NOP:
+ p->idx += 1;
+ break;
+ default:
+ DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
+ return -EINVAL;
+ }
+ } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+#if 0
+ for (r = 0; r < p->ib->length_dw; r++) {
+ printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]);
+ mdelay(1);
+ }
+#endif
+ return 0;
+}
+
/* vm parser */
static bool evergreen_vm_reg_valid(u32 reg)
{
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index d360341..ac1d570 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -1204,7 +1204,7 @@ static struct radeon_asic evergreen_asic = {
.ib_execute = &evergreen_dma_ring_ib_execute,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
- .cs_parse = NULL,
+ .cs_parse = &evergreen_dma_cs_parse,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &r600_dma_is_lockup,
@@ -1288,7 +1288,7 @@ static struct radeon_asic sumo_asic = {
.ib_execute = &evergreen_dma_ring_ib_execute,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
- .cs_parse = NULL,
+ .cs_parse = &evergreen_dma_cs_parse,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &r600_dma_is_lockup,
@@ -1372,7 +1372,7 @@ static struct radeon_asic btc_asic = {
.ib_execute = &evergreen_dma_ring_ib_execute,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
- .cs_parse = NULL,
+ .cs_parse = &evergreen_dma_cs_parse,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &r600_dma_is_lockup,
@@ -1486,7 +1486,7 @@ static struct radeon_asic cayman_asic = {
.ib_execute = &cayman_dma_ring_ib_execute,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
- .cs_parse = NULL,
+ .cs_parse = &evergreen_dma_cs_parse,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &cayman_dma_is_lockup,
@@ -1496,7 +1496,7 @@ static struct radeon_asic cayman_asic = {
.ib_execute = &cayman_dma_ring_ib_execute,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
- .cs_parse = NULL,
+ .cs_parse = &evergreen_dma_cs_parse,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &cayman_dma_is_lockup,
@@ -1611,7 +1611,7 @@ static struct radeon_asic trinity_asic = {
.ib_execute = &cayman_dma_ring_ib_execute,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
- .cs_parse = NULL,
+ .cs_parse = &evergreen_dma_cs_parse,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &cayman_dma_is_lockup,
@@ -1621,7 +1621,7 @@ static struct radeon_asic trinity_asic = {
.ib_execute = &cayman_dma_ring_ib_execute,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
- .cs_parse = NULL,
+ .cs_parse = &evergreen_dma_cs_parse,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &cayman_dma_is_lockup,
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index b311c0a..d2ac646 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -431,6 +431,7 @@ u32 evergreen_get_vblank_counter(struct radeon_device *rdev, int crtc);
int evergreen_irq_set(struct radeon_device *rdev);
int evergreen_irq_process(struct radeon_device *rdev);
extern int evergreen_cs_parse(struct radeon_cs_parser *p);
+extern int evergreen_dma_cs_parse(struct radeon_cs_parser *p);
extern void evergreen_pm_misc(struct radeon_device *rdev);
extern void evergreen_pm_prepare(struct radeon_device *rdev);
extern void evergreen_pm_finish(struct radeon_device *rdev);
--
1.7.7.5
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 3/4] drm/radeon: add VM CS parser support for async DMA on cayman/TN/SI
2012-12-13 17:57 [PATCH 1/4] drm/radeon/kms: add 6xx/7xx CS parser for async DMA alexdeucher
2012-12-13 17:57 ` [PATCH 2/4] drm/radeon/kms: add evergreen/cayman " alexdeucher
@ 2012-12-13 17:57 ` alexdeucher
2012-12-13 17:57 ` [PATCH 4/4] drm/radeon: enable the async DMA rings in the CS ioctl alexdeucher
` (2 subsequent siblings)
4 siblings, 0 replies; 9+ messages in thread
From: alexdeucher @ 2012-12-13 17:57 UTC (permalink / raw)
To: airlied, dri-devel; +Cc: Alex Deucher
From: Alex Deucher <alexander.deucher@amd.com>
Allows us to use async DMA from userspace.
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/radeon/evergreen_cs.c | 111 +++++++++++++++++++++++++++++++++
drivers/gpu/drm/radeon/radeon_asic.c | 6 ++
drivers/gpu/drm/radeon/radeon_asic.h | 1 +
3 files changed, 118 insertions(+), 0 deletions(-)
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index adfc66f..fefb2cc 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -3543,3 +3543,114 @@ int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
return ret;
}
+
+/**
+ * evergreen_dma_ib_parse() - parse the DMA IB for VM
+ * @rdev: radeon_device pointer
+ * @ib: radeon_ib pointer
+ *
+ * Parses the DMA IB from the VM CS ioctl
+ * checks for errors. (Cayman-SI)
+ * Returns 0 for success and an error on failure.
+ **/
+int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+ u32 idx = 0;
+ u32 header, cmd, count, tiled, new_cmd, misc;
+
+ do {
+ header = ib->ptr[idx];
+ cmd = GET_DMA_CMD(header);
+ count = GET_DMA_COUNT(header);
+ tiled = GET_DMA_T(header);
+ new_cmd = GET_DMA_NEW(header);
+ misc = GET_DMA_MISC(header);
+
+ switch (cmd) {
+ case DMA_PACKET_WRITE:
+ if (tiled)
+ idx += count + 7;
+ else
+ idx += count + 3;
+ break;
+ case DMA_PACKET_COPY:
+ if (tiled) {
+ if (new_cmd) {
+ switch (misc) {
+ case 0:
+ /* L2T, frame to fields */
+ idx += 10;
+ break;
+ case 1:
+ /* L2T, T2L partial */
+ idx += 12;
+ break;
+ case 3:
+ /* L2T, broadcast */
+ idx += 10;
+ break;
+ case 4:
+ /* L2T, T2L */
+ idx += 9;
+ break;
+ case 5:
+ /* T2T partial */
+ idx += 13;
+ break;
+ case 7:
+ /* L2T, broadcast */
+ idx += 10;
+ break;
+ default:
+ DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+ return -EINVAL;
+ }
+ } else {
+ switch (misc) {
+ case 0:
+ idx += 9;
+ break;
+ default:
+ DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+ return -EINVAL;
+ }
+ }
+ } else {
+ if (new_cmd) {
+ switch (misc) {
+ case 0:
+ /* L2L, byte */
+ idx += 5;
+ break;
+ case 1:
+ /* L2L, partial */
+ idx += 9;
+ break;
+ case 4:
+ /* L2L, dw, broadcast */
+ idx += 7;
+ break;
+ default:
+ DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+ return -EINVAL;
+ }
+ } else {
+ /* L2L, dw */
+ idx += 5;
+ }
+ }
+ break;
+ case DMA_PACKET_CONSTANT_FILL:
+ idx += 4;
+ break;
+ case DMA_PACKET_NOP:
+ idx += 1;
+ break;
+ default:
+ DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
+ return -EINVAL;
+ }
+ } while (idx < ib->length_dw);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index ac1d570..596bcbe 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -1484,6 +1484,7 @@ static struct radeon_asic cayman_asic = {
},
[R600_RING_TYPE_DMA_INDEX] = {
.ib_execute = &cayman_dma_ring_ib_execute,
+ .ib_parse = &evergreen_dma_ib_parse,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
.cs_parse = &evergreen_dma_cs_parse,
@@ -1494,6 +1495,7 @@ static struct radeon_asic cayman_asic = {
},
[CAYMAN_RING_TYPE_DMA1_INDEX] = {
.ib_execute = &cayman_dma_ring_ib_execute,
+ .ib_parse = &evergreen_dma_ib_parse,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
.cs_parse = &evergreen_dma_cs_parse,
@@ -1609,6 +1611,7 @@ static struct radeon_asic trinity_asic = {
},
[R600_RING_TYPE_DMA_INDEX] = {
.ib_execute = &cayman_dma_ring_ib_execute,
+ .ib_parse = &evergreen_dma_ib_parse,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
.cs_parse = &evergreen_dma_cs_parse,
@@ -1619,6 +1622,7 @@ static struct radeon_asic trinity_asic = {
},
[CAYMAN_RING_TYPE_DMA1_INDEX] = {
.ib_execute = &cayman_dma_ring_ib_execute,
+ .ib_parse = &evergreen_dma_ib_parse,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
.cs_parse = &evergreen_dma_cs_parse,
@@ -1734,6 +1738,7 @@ static struct radeon_asic si_asic = {
},
[R600_RING_TYPE_DMA_INDEX] = {
.ib_execute = &cayman_dma_ring_ib_execute,
+ .ib_parse = &evergreen_dma_ib_parse,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
.cs_parse = NULL,
@@ -1744,6 +1749,7 @@ static struct radeon_asic si_asic = {
},
[CAYMAN_RING_TYPE_DMA1_INDEX] = {
.ib_execute = &cayman_dma_ring_ib_execute,
+ .ib_parse = &evergreen_dma_ib_parse,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
.cs_parse = NULL,
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index d2ac646..5f4882c 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -473,6 +473,7 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags);
int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
+int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
struct radeon_ib *ib);
bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
--
1.7.7.5
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 4/4] drm/radeon: enable the async DMA rings in the CS ioctl
2012-12-13 17:57 [PATCH 1/4] drm/radeon/kms: add 6xx/7xx CS parser for async DMA alexdeucher
2012-12-13 17:57 ` [PATCH 2/4] drm/radeon/kms: add evergreen/cayman " alexdeucher
2012-12-13 17:57 ` [PATCH 3/4] drm/radeon: add VM CS parser support for async DMA on cayman/TN/SI alexdeucher
@ 2012-12-13 17:57 ` alexdeucher
2012-12-13 18:12 ` [PATCH 1/4] drm/radeon/kms: add 6xx/7xx CS parser for async DMA Christian König
2012-12-13 23:49 ` [PATCH 1/4] drm/radeon/kms: add 6xx/7xx CS parser for async DMA (v2) alexdeucher
4 siblings, 0 replies; 9+ messages in thread
From: alexdeucher @ 2012-12-13 17:57 UTC (permalink / raw)
To: airlied, dri-devel; +Cc: Alex Deucher
From: Alex Deucher <alexander.deucher@amd.com>
This enables the functionality added in the previous
patches. Userspace acceleration drivers can use the
CS ioctl to submit command buffers to the async DMA
rings.
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/radeon/radeon_cs.c | 12 ++++++++++++
include/uapi/drm/radeon_drm.h | 1 +
2 files changed, 13 insertions(+), 0 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 1b32a5a..396baba 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -112,6 +112,18 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority
} else
p->ring = RADEON_RING_TYPE_GFX_INDEX;
break;
+ case RADEON_CS_RING_DMA:
+ if (p->rdev->family >= CHIP_CAYMAN) {
+ if (p->priority > 0)
+ p->ring = R600_RING_TYPE_DMA_INDEX;
+ else
+ p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
+ } else if (p->rdev->family >= CHIP_R600) {
+ p->ring = R600_RING_TYPE_DMA_INDEX;
+ } else {
+ return -EINVAL;
+ }
+ break;
}
return 0;
}
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index 5645a87..eeda917 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -917,6 +917,7 @@ struct drm_radeon_gem_va {
/* The second dword of RADEON_CHUNK_ID_FLAGS is a uint32 that sets the ring type */
#define RADEON_CS_RING_GFX 0
#define RADEON_CS_RING_COMPUTE 1
+#define RADEON_CS_RING_DMA 2
/* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority */
/* 0 = normal, + = higher priority, - = lower priority */
--
1.7.7.5
^ permalink raw reply related [flat|nested] 9+ messages in thread* Re: [PATCH 1/4] drm/radeon/kms: add 6xx/7xx CS parser for async DMA
2012-12-13 17:57 [PATCH 1/4] drm/radeon/kms: add 6xx/7xx CS parser for async DMA alexdeucher
` (2 preceding siblings ...)
2012-12-13 17:57 ` [PATCH 4/4] drm/radeon: enable the async DMA rings in the CS ioctl alexdeucher
@ 2012-12-13 18:12 ` Christian König
2012-12-13 23:49 ` [PATCH 1/4] drm/radeon/kms: add 6xx/7xx CS parser for async DMA (v2) alexdeucher
4 siblings, 0 replies; 9+ messages in thread
From: Christian König @ 2012-12-13 18:12 UTC (permalink / raw)
To: alexdeucher; +Cc: Alex Deucher, dri-devel
On 13.12.2012 18:57, alexdeucher@gmail.com wrote:
> From: Alex Deucher <alexander.deucher@amd.com>
>
> Allows us to use the DMA ring from userspace.
> DMA doesn't have a good NOP packet in which to embed the
> reloc idx, so userspace has to add a reloc for each
> buffer used and order them to match the command stream.
>
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Looks good at first glance:
Reviewed-by: Christian König <christian.koenig@amd.com>
> ---
> drivers/gpu/drm/radeon/r600_cs.c | 193 ++++++++++++++++++++++++++++++++++
> drivers/gpu/drm/radeon/radeon.h | 1 +
> drivers/gpu/drm/radeon/radeon_asic.c | 6 +-
> drivers/gpu/drm/radeon/radeon_asic.h | 1 +
> drivers/gpu/drm/radeon/radeon_cs.c | 1 +
> 5 files changed, 199 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
> index 5d6e7f9..f23609ac 100644
> --- a/drivers/gpu/drm/radeon/r600_cs.c
> +++ b/drivers/gpu/drm/radeon/r600_cs.c
> @@ -2568,3 +2568,196 @@ void r600_cs_legacy_init(void)
> {
> r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_nomm;
> }
> +
> +/*
> + * DMA
> + */
> +/**
> + * r600_dma_cs_next_reloc() - parse next reloc
> + * @p: parser structure holding parsing context.
> + * @cs_reloc: reloc informations
> + *
> + * Return the next reloc, do bo validation and compute
> + * GPU offset using the provided start.
> + **/
> +int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
> + struct radeon_cs_reloc **cs_reloc)
> +{
> + struct radeon_cs_chunk *relocs_chunk;
> + unsigned idx;
> +
> + if (p->chunk_relocs_idx == -1) {
> + DRM_ERROR("No relocation chunk !\n");
> + return -EINVAL;
> + }
> + *cs_reloc = NULL;
> + relocs_chunk = &p->chunks[p->chunk_relocs_idx];
> + idx = p->dma_reloc_idx;
> + if (idx >= relocs_chunk->length_dw) {
> + DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
> + idx, relocs_chunk->length_dw);
> + return -EINVAL;
> + }
> + /* FIXME: we assume reloc size is 4 dwords */
> + *cs_reloc = p->relocs_ptr[(idx / 4)];
> + p->dma_reloc_idx++;
> + return 0;
> +}
> +
> +#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
> +#define GET_DMA_COUNT(h) ((h) & 0x0000ffff)
> +#define GET_DMA_T(h) (((h) & 0x00800000) >> 23)
> +
> +/**
> + * r600_dma_cs_parse() - parse the DMA IB
> + * @p: parser structure holding parsing context.
> + *
> + * Parses the DMA IB from the CS ioctl and updates
> + * the GPU addresses based on the reloc information and
> + * checks for errors. (R6xx-R7xx)
> + * Returns 0 for success and an error on failure.
> + **/
> +int r600_dma_cs_parse(struct radeon_cs_parser *p)
> +{
> + struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
> + struct radeon_cs_reloc *src_reloc, *dst_reloc;
> + u32 header, cmd, count, tiled;
> + volatile u32 *ib = p->ib.ptr;
> + u32 idx, idx_value;
> + u64 src_offset, dst_offset;
> + int r;
> +
> + do {
> + if (p->idx >= ib_chunk->length_dw) {
> + DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
> + p->idx, ib_chunk->length_dw);
> + return -EINVAL;
> + }
> + idx = p->idx;
> + header = radeon_get_ib_value(p, idx);
> + cmd = GET_DMA_CMD(header);
> + count = GET_DMA_COUNT(header);
> + tiled = GET_DMA_T(header);
> +
> + switch (cmd) {
> + case DMA_PACKET_WRITE:
> + r = r600_dma_cs_next_reloc(p, &dst_reloc);
> + if (r) {
> + DRM_ERROR("bad DMA_PACKET_WRITE\n");
> + return -EINVAL;
> + }
> + if (tiled) {
> + ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
> + dst_offset = ib[idx+1];
> + dst_offset <<= 8;
> + p->idx += count + 5;
> + } else {
> + ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
> + ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
> + dst_offset = ib[idx+1];
> + dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32;
> + p->idx += count + 3;
> + }
> + if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
> + dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
> + dst_offset, radeon_bo_size(dst_reloc->robj));
> + return -EINVAL;
> + }
> + break;
> + case DMA_PACKET_COPY:
> + r = r600_dma_cs_next_reloc(p, &src_reloc);
> + if (r) {
> + DRM_ERROR("bad DMA_PACKET_COPY\n");
> + return -EINVAL;
> + }
> + r = r600_dma_cs_next_reloc(p, &dst_reloc);
> + if (r) {
> + DRM_ERROR("bad DMA_PACKET_COPY\n");
> + return -EINVAL;
> + }
> + if (tiled) {
> + idx_value = radeon_get_ib_value(p, idx + 2);
> + /* detile bit */
> + if (idx_value & (1 << 31)) {
> + /* tiled src, linear dst */
> + ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
> + src_offset = ib[idx+1];
> + src_offset <<= 8;
> +
> + ib[idx+5] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
> + ib[idx+6] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
> + dst_offset = ib[idx+5];
> + dst_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
> + } else {
> + /* linear src, tiled dst */
> + ib[idx+5] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
> + ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
> + src_offset = ib[idx+5];
> + src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
> +
> + ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
> + dst_offset = ib[idx+1];
> + dst_offset <<= 8;
> + }
> + p->idx += 7;
> + } else {
> + ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
> + ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
> + ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
> + ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
> +
> + src_offset = ib[idx+2];
> + src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
> + dst_offset = ib[idx+1];
> + dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
> +
> + p->idx += 5;
> + }
> + if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
> + dev_warn(p->dev, "DMA copy src buffer too small (%llu %lu)\n",
> + src_offset, radeon_bo_size(src_reloc->robj));
> + return -EINVAL;
> + }
> + if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
> + dev_warn(p->dev, "DMA write dst buffer too small (%llu %lu)\n",
> + dst_offset, radeon_bo_size(dst_reloc->robj));
> + return -EINVAL;
> + }
> + break;
> + case DMA_PACKET_CONSTANT_FILL:
> + if (p->family < CHIP_RV770) {
> + DRM_ERROR("Constant Fill is 7xx only !\n");
> + return -EINVAL;
> + }
> + r = r600_dma_cs_next_reloc(p, &dst_reloc);
> + if (r) {
> + DRM_ERROR("bad DMA_PACKET_WRITE\n");
> + return -EINVAL;
> + }
> + ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
> + ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000;
> + dst_offset = ib[idx+1];
> + dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16;
> + if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
> + dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
> + dst_offset, radeon_bo_size(dst_reloc->robj));
> + return -EINVAL;
> + }
> + p->idx += 4;
> + break;
> + case DMA_PACKET_NOP:
> + p->idx += 1;
> + break;
> + default:
> + DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
> + return -EINVAL;
> + }
> + } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
> +#if 0
> + for (r = 0; r < p->ib->length_dw; r++) {
> + printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]);
> + mdelay(1);
> + }
> +#endif
> + return 0;
> +}
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index 285fb3f..5dc744d 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -839,6 +839,7 @@ struct radeon_cs_parser {
> struct radeon_cs_reloc *relocs;
> struct radeon_cs_reloc **relocs_ptr;
> struct list_head validated;
> + unsigned dma_reloc_idx;
> /* indices of various chunks */
> int chunk_ib_idx;
> int chunk_relocs_idx;
> diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
> index 3ea0475..d360341 100644
> --- a/drivers/gpu/drm/radeon/radeon_asic.c
> +++ b/drivers/gpu/drm/radeon/radeon_asic.c
> @@ -952,7 +952,7 @@ static struct radeon_asic r600_asic = {
> .ib_execute = &r600_dma_ring_ib_execute,
> .emit_fence = &r600_dma_fence_ring_emit,
> .emit_semaphore = &r600_dma_semaphore_ring_emit,
> - .cs_parse = NULL,
> + .cs_parse = &r600_dma_cs_parse,
> .ring_test = &r600_dma_ring_test,
> .ib_test = &r600_dma_ib_test,
> .is_lockup = &r600_dma_is_lockup,
> @@ -1036,7 +1036,7 @@ static struct radeon_asic rs780_asic = {
> .ib_execute = &r600_dma_ring_ib_execute,
> .emit_fence = &r600_dma_fence_ring_emit,
> .emit_semaphore = &r600_dma_semaphore_ring_emit,
> - .cs_parse = NULL,
> + .cs_parse = &r600_dma_cs_parse,
> .ring_test = &r600_dma_ring_test,
> .ib_test = &r600_dma_ib_test,
> .is_lockup = &r600_dma_is_lockup,
> @@ -1120,7 +1120,7 @@ static struct radeon_asic rv770_asic = {
> .ib_execute = &r600_dma_ring_ib_execute,
> .emit_fence = &r600_dma_fence_ring_emit,
> .emit_semaphore = &r600_dma_semaphore_ring_emit,
> - .cs_parse = NULL,
> + .cs_parse = &r600_dma_cs_parse,
> .ring_test = &r600_dma_ring_test,
> .ib_test = &r600_dma_ib_test,
> .is_lockup = &r600_dma_is_lockup,
> diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
> index c338931..b311c0a 100644
> --- a/drivers/gpu/drm/radeon/radeon_asic.h
> +++ b/drivers/gpu/drm/radeon/radeon_asic.h
> @@ -304,6 +304,7 @@ void r600_pcie_gart_tlb_flush(struct radeon_device *rdev);
> uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg);
> void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
> int r600_cs_parse(struct radeon_cs_parser *p);
> +int r600_dma_cs_parse(struct radeon_cs_parser *p);
> void r600_fence_ring_emit(struct radeon_device *rdev,
> struct radeon_fence *fence);
> void r600_semaphore_ring_emit(struct radeon_device *rdev,
> diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
> index 41672cc..1b32a5a 100644
> --- a/drivers/gpu/drm/radeon/radeon_cs.c
> +++ b/drivers/gpu/drm/radeon/radeon_cs.c
> @@ -43,6 +43,7 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
> return 0;
> }
> chunk = &p->chunks[p->chunk_relocs_idx];
> + p->dma_reloc_idx = 0;
> /* FIXME: we assume that each relocs use 4 dwords */
> p->nrelocs = chunk->length_dw / 4;
> p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL);
^ permalink raw reply [flat|nested] 9+ messages in thread* [PATCH 1/4] drm/radeon/kms: add 6xx/7xx CS parser for async DMA (v2)
2012-12-13 17:57 [PATCH 1/4] drm/radeon/kms: add 6xx/7xx CS parser for async DMA alexdeucher
` (3 preceding siblings ...)
2012-12-13 18:12 ` [PATCH 1/4] drm/radeon/kms: add 6xx/7xx CS parser for async DMA Christian König
@ 2012-12-13 23:49 ` alexdeucher
2012-12-13 23:49 ` [PATCH 2/4] drm/radeon/kms: add evergreen/cayman " alexdeucher
` (2 more replies)
4 siblings, 3 replies; 9+ messages in thread
From: alexdeucher @ 2012-12-13 23:49 UTC (permalink / raw)
To: airlied, dri-devel; +Cc: Alex Deucher
From: Alex Deucher <alexander.deucher@amd.com>
Allows us to use the DMA ring from userspace.
DMA doesn't have a good NOP packet in which to embed the
reloc idx, so userspace has to add a reloc for each
buffer used and order them to match the command stream.
v2: fix address bounds checking, reloc indexing
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/radeon/r600_cs.c | 193 ++++++++++++++++++++++++++++++++++
drivers/gpu/drm/radeon/radeon.h | 1 +
drivers/gpu/drm/radeon/radeon_asic.c | 6 +-
drivers/gpu/drm/radeon/radeon_asic.h | 1 +
drivers/gpu/drm/radeon/radeon_cs.c | 1 +
5 files changed, 199 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 5d6e7f9..1e2935e 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -2568,3 +2568,196 @@ void r600_cs_legacy_init(void)
{
r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_nomm;
}
+
+/*
+ * DMA
+ */
+/**
+ * r600_dma_cs_next_reloc() - parse next reloc
+ * @p: parser structure holding parsing context.
+ * @cs_reloc: reloc informations
+ *
+ * Return the next reloc, do bo validation and compute
+ * GPU offset using the provided start.
+ **/
+int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
+ struct radeon_cs_reloc **cs_reloc)
+{
+ struct radeon_cs_chunk *relocs_chunk;
+ unsigned idx;
+
+ if (p->chunk_relocs_idx == -1) {
+ DRM_ERROR("No relocation chunk !\n");
+ return -EINVAL;
+ }
+ *cs_reloc = NULL;
+ relocs_chunk = &p->chunks[p->chunk_relocs_idx];
+ idx = p->dma_reloc_idx;
+ if (idx >= relocs_chunk->length_dw) {
+ DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
+ idx, relocs_chunk->length_dw);
+ return -EINVAL;
+ }
+ *cs_reloc = p->relocs_ptr[idx];
+ p->dma_reloc_idx++;
+ return 0;
+}
+
+#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
+#define GET_DMA_COUNT(h) ((h) & 0x0000ffff)
+#define GET_DMA_T(h) (((h) & 0x00800000) >> 23)
+
+/**
+ * r600_dma_cs_parse() - parse the DMA IB
+ * @p: parser structure holding parsing context.
+ *
+ * Parses the DMA IB from the CS ioctl and updates
+ * the GPU addresses based on the reloc information and
+ * checks for errors. (R6xx-R7xx)
+ * Returns 0 for success and an error on failure.
+ **/
+int r600_dma_cs_parse(struct radeon_cs_parser *p)
+{
+ struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
+ struct radeon_cs_reloc *src_reloc, *dst_reloc;
+ u32 header, cmd, count, tiled;
+ volatile u32 *ib = p->ib.ptr;
+ u32 idx, idx_value;
+ u64 src_offset, dst_offset;
+ int r;
+
+ do {
+ if (p->idx >= ib_chunk->length_dw) {
+ DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
+ p->idx, ib_chunk->length_dw);
+ return -EINVAL;
+ }
+ idx = p->idx;
+ header = radeon_get_ib_value(p, idx);
+ cmd = GET_DMA_CMD(header);
+ count = GET_DMA_COUNT(header);
+ tiled = GET_DMA_T(header);
+
+ switch (cmd) {
+ case DMA_PACKET_WRITE:
+ r = r600_dma_cs_next_reloc(p, &dst_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_WRITE\n");
+ return -EINVAL;
+ }
+ if (tiled) {
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ p->idx += count + 5;
+ } else {
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32;
+
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ p->idx += count + 3;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ break;
+ case DMA_PACKET_COPY:
+ r = r600_dma_cs_next_reloc(p, &src_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ r = r600_dma_cs_next_reloc(p, &dst_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ if (tiled) {
+ idx_value = radeon_get_ib_value(p, idx + 2);
+ /* detile bit */
+ if (idx_value & (1 << 31)) {
+ /* tiled src, linear dst */
+ src_offset = ib[idx+1];
+ src_offset <<= 8;
+ ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+
+ dst_offset = ib[idx+5];
+ dst_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
+ ib[idx+5] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+6] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ } else {
+ /* linear src, tiled dst */
+ src_offset = ib[idx+5];
+ src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
+ ib[idx+5] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ }
+ p->idx += 7;
+ } else {
+ src_offset = ib[idx+2];
+ src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
+
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ p->idx += 5;
+ }
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA copy src buffer too small (%llu %lu)\n",
+ src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA write dst buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ break;
+ case DMA_PACKET_CONSTANT_FILL:
+ if (p->family < CHIP_RV770) {
+ DRM_ERROR("Constant Fill is 7xx only !\n");
+ return -EINVAL;
+ }
+ r = r600_dma_cs_next_reloc(p, &dst_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_WRITE\n");
+ return -EINVAL;
+ }
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16;
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000;
+ p->idx += 4;
+ break;
+ case DMA_PACKET_NOP:
+ p->idx += 1;
+ break;
+ default:
+ DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
+ return -EINVAL;
+ }
+ } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+#if 0
+ for (r = 0; r < p->ib->length_dw; r++) {
+ printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]);
+ mdelay(1);
+ }
+#endif
+ return 0;
+}
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 285fb3f..5dc744d 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -839,6 +839,7 @@ struct radeon_cs_parser {
struct radeon_cs_reloc *relocs;
struct radeon_cs_reloc **relocs_ptr;
struct list_head validated;
+ unsigned dma_reloc_idx;
/* indices of various chunks */
int chunk_ib_idx;
int chunk_relocs_idx;
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index 3ea0475..d360341 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -952,7 +952,7 @@ static struct radeon_asic r600_asic = {
.ib_execute = &r600_dma_ring_ib_execute,
.emit_fence = &r600_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
- .cs_parse = NULL,
+ .cs_parse = &r600_dma_cs_parse,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &r600_dma_is_lockup,
@@ -1036,7 +1036,7 @@ static struct radeon_asic rs780_asic = {
.ib_execute = &r600_dma_ring_ib_execute,
.emit_fence = &r600_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
- .cs_parse = NULL,
+ .cs_parse = &r600_dma_cs_parse,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &r600_dma_is_lockup,
@@ -1120,7 +1120,7 @@ static struct radeon_asic rv770_asic = {
.ib_execute = &r600_dma_ring_ib_execute,
.emit_fence = &r600_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
- .cs_parse = NULL,
+ .cs_parse = &r600_dma_cs_parse,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &r600_dma_is_lockup,
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index c338931..b311c0a 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -304,6 +304,7 @@ void r600_pcie_gart_tlb_flush(struct radeon_device *rdev);
uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg);
void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
int r600_cs_parse(struct radeon_cs_parser *p);
+int r600_dma_cs_parse(struct radeon_cs_parser *p);
void r600_fence_ring_emit(struct radeon_device *rdev,
struct radeon_fence *fence);
void r600_semaphore_ring_emit(struct radeon_device *rdev,
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 41672cc..1b32a5a 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -43,6 +43,7 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
return 0;
}
chunk = &p->chunks[p->chunk_relocs_idx];
+ p->dma_reloc_idx = 0;
/* FIXME: we assume that each relocs use 4 dwords */
p->nrelocs = chunk->length_dw / 4;
p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL);
--
1.7.7.5
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 2/4] drm/radeon/kms: add evergreen/cayman CS parser for async DMA (v2)
2012-12-13 23:49 ` [PATCH 1/4] drm/radeon/kms: add 6xx/7xx CS parser for async DMA (v2) alexdeucher
@ 2012-12-13 23:49 ` alexdeucher
2012-12-13 23:49 ` [PATCH 3/4] drm/radeon: add VM CS parser support for async DMA on cayman/TN/SI alexdeucher
2012-12-13 23:50 ` [PATCH 4/4] drm/radeon: enable the async DMA rings in the CS ioctl alexdeucher
2 siblings, 0 replies; 9+ messages in thread
From: alexdeucher @ 2012-12-13 23:49 UTC (permalink / raw)
To: airlied, dri-devel; +Cc: Alex Deucher
From: Alex Deucher <alexander.deucher@amd.com>
Allows us to use the DMA ring from userspace.
DMA doesn't have a good NOP packet in which to embed the
reloc idx, so userspace has to add a reloc for each
buffer used and order them to match the command stream.
v2: fix address bounds checking
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/radeon/evergreen_cs.c | 451 +++++++++++++++++++++++++++++++++
drivers/gpu/drm/radeon/radeon_asic.c | 14 +-
drivers/gpu/drm/radeon/radeon_asic.h | 1 +
3 files changed, 459 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index 62c2271..1e8bd46 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -34,6 +34,8 @@
#define MAX(a,b) (((a)>(b))?(a):(b))
#define MIN(a,b) (((a)<(b))?(a):(b))
+int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
+ struct radeon_cs_reloc **cs_reloc);
static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p,
struct radeon_cs_reloc **cs_reloc);
@@ -2804,6 +2806,455 @@ int evergreen_cs_parse(struct radeon_cs_parser *p)
return 0;
}
+/*
+ * DMA
+ */
+
+#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
+#define GET_DMA_COUNT(h) ((h) & 0x000fffff)
+#define GET_DMA_T(h) (((h) & 0x00800000) >> 23)
+#define GET_DMA_NEW(h) (((h) & 0x04000000) >> 26)
+#define GET_DMA_MISC(h) (((h) & 0x0700000) >> 20)
+
+/**
+ * evergreen_dma_cs_parse() - parse the DMA IB
+ * @p: parser structure holding parsing context.
+ *
+ * Parses the DMA IB from the CS ioctl and updates
+ * the GPU addresses based on the reloc information and
+ * checks for errors. (Evergreen-Cayman)
+ * Returns 0 for success and an error on failure.
+ **/
+int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
+{
+ struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
+ struct radeon_cs_reloc *src_reloc, *dst_reloc, *dst2_reloc;
+ u32 header, cmd, count, tiled, new_cmd, misc;
+ volatile u32 *ib = p->ib.ptr;
+ u32 idx, idx_value;
+ u64 src_offset, dst_offset, dst2_offset;
+ int r;
+
+ do {
+ if (p->idx >= ib_chunk->length_dw) {
+ DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
+ p->idx, ib_chunk->length_dw);
+ return -EINVAL;
+ }
+ idx = p->idx;
+ header = radeon_get_ib_value(p, idx);
+ cmd = GET_DMA_CMD(header);
+ count = GET_DMA_COUNT(header);
+ tiled = GET_DMA_T(header);
+ new_cmd = GET_DMA_NEW(header);
+ misc = GET_DMA_MISC(header);
+
+ switch (cmd) {
+ case DMA_PACKET_WRITE:
+ r = r600_dma_cs_next_reloc(p, &dst_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_WRITE\n");
+ return -EINVAL;
+ }
+ if (tiled) {
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ p->idx += count + 7;
+ } else {
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32;
+
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ p->idx += count + 3;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
+ dst_offset, radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ break;
+ case DMA_PACKET_COPY:
+ r = r600_dma_cs_next_reloc(p, &src_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ r = r600_dma_cs_next_reloc(p, &dst_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ if (tiled) {
+ idx_value = radeon_get_ib_value(p, idx + 2);
+ if (new_cmd) {
+ switch (misc) {
+ case 0:
+ /* L2T, frame to fields */
+ if (idx_value & (1 << 31)) {
+ DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ r = r600_dma_cs_next_reloc(p, &dst2_reloc);
+ if (r) {
+ DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+ dst2_offset = ib[idx+2];
+ dst2_offset <<= 8;
+ src_offset = ib[idx+8];
+ src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n",
+ src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
+ dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
+ ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ p->idx += 10;
+ break;
+ case 1:
+ /* L2T, T2L partial */
+ if (p->family < CHIP_CAYMAN) {
+ DRM_ERROR("L2T, T2L Partial is cayman only !\n");
+ return -EINVAL;
+ }
+ /* detile bit */
+ if (idx_value & (1 << 31)) {
+ /* tiled src, linear dst */
+ ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+
+ ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ } else {
+ /* linear src, tiled dst */
+ ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ }
+ p->idx += 12;
+ break;
+ case 3:
+ /* L2T, broadcast */
+ if (idx_value & (1 << 31)) {
+ DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ r = r600_dma_cs_next_reloc(p, &dst2_reloc);
+ if (r) {
+ DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+ dst2_offset = ib[idx+2];
+ dst2_offset <<= 8;
+ src_offset = ib[idx+8];
+ src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
+ src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
+ dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
+ ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ p->idx += 10;
+ break;
+ case 4:
+ /* L2T, T2L */
+ /* detile bit */
+ if (idx_value & (1 << 31)) {
+ /* tiled src, linear dst */
+ src_offset = ib[idx+1];
+ src_offset <<= 8;
+ ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+
+ dst_offset = ib[idx+7];
+ dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
+ ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ } else {
+ /* linear src, tiled dst */
+ src_offset = ib[idx+7];
+ src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
+ ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ }
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n",
+ src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ p->idx += 9;
+ break;
+ case 5:
+ /* T2T partial */
+ if (p->family < CHIP_CAYMAN) {
+ DRM_ERROR("L2T, T2L Partial is cayman only !\n");
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+ ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ p->idx += 13;
+ break;
+ case 7:
+ /* L2T, broadcast */
+ if (idx_value & (1 << 31)) {
+ DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ r = r600_dma_cs_next_reloc(p, &dst2_reloc);
+ if (r) {
+ DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+ dst2_offset = ib[idx+2];
+ dst2_offset <<= 8;
+ src_offset = ib[idx+8];
+ src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
+ src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
+ dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
+ ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ p->idx += 10;
+ break;
+ default:
+ DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+ return -EINVAL;
+ }
+ } else {
+ switch (misc) {
+ case 0:
+ /* detile bit */
+ if (idx_value & (1 << 31)) {
+ /* tiled src, linear dst */
+ src_offset = ib[idx+1];
+ src_offset <<= 8;
+ ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+
+ dst_offset = ib[idx+7];
+ dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
+ ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ } else {
+ /* linear src, tiled dst */
+ src_offset = ib[idx+7];
+ src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
+ ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ }
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
+ src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ p->idx += 9;
+ break;
+ default:
+ DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+ return -EINVAL;
+ }
+ }
+ } else {
+ if (new_cmd) {
+ switch (misc) {
+ case 0:
+ /* L2L, byte */
+ src_offset = ib[idx+2];
+ src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
+ if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n",
+ src_offset + count, radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n",
+ dst_offset + count, radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
+ ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
+ ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ p->idx += 5;
+ break;
+ case 1:
+ /* L2L, partial */
+ if (p->family < CHIP_CAYMAN) {
+ DRM_ERROR("L2L Partial is cayman only !\n");
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
+ ib[idx+2] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
+ ib[idx+5] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+
+ p->idx += 9;
+ break;
+ case 4:
+ /* L2L, dw, broadcast */
+ r = r600_dma_cs_next_reloc(p, &dst2_reloc);
+ if (r) {
+ DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+ dst2_offset = ib[idx+2];
+ dst2_offset |= ((u64)(ib[idx+5] & 0xff)) << 32;
+ src_offset = ib[idx+3];
+ src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n",
+ src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n",
+ dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+3] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+4] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ ib[idx+5] += upper_32_bits(dst2_reloc->lobj.gpu_offset) & 0xff;
+ ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ p->idx += 7;
+ break;
+ default:
+ DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+ return -EINVAL;
+ }
+ } else {
+ /* L2L, dw */
+ src_offset = ib[idx+2];
+ src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n",
+ src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ p->idx += 5;
+ }
+ }
+ break;
+ case DMA_PACKET_CONSTANT_FILL:
+ r = r600_dma_cs_next_reloc(p, &dst_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n");
+ return -EINVAL;
+ }
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16;
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
+ dst_offset, radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000;
+ p->idx += 4;
+ break;
+ case DMA_PACKET_NOP:
+ p->idx += 1;
+ break;
+ default:
+ DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
+ return -EINVAL;
+ }
+ } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+#if 0
+ for (r = 0; r < p->ib->length_dw; r++) {
+ printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]);
+ mdelay(1);
+ }
+#endif
+ return 0;
+}
+
/* vm parser */
static bool evergreen_vm_reg_valid(u32 reg)
{
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index d360341..ac1d570 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -1204,7 +1204,7 @@ static struct radeon_asic evergreen_asic = {
.ib_execute = &evergreen_dma_ring_ib_execute,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
- .cs_parse = NULL,
+ .cs_parse = &evergreen_dma_cs_parse,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &r600_dma_is_lockup,
@@ -1288,7 +1288,7 @@ static struct radeon_asic sumo_asic = {
.ib_execute = &evergreen_dma_ring_ib_execute,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
- .cs_parse = NULL,
+ .cs_parse = &evergreen_dma_cs_parse,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &r600_dma_is_lockup,
@@ -1372,7 +1372,7 @@ static struct radeon_asic btc_asic = {
.ib_execute = &evergreen_dma_ring_ib_execute,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
- .cs_parse = NULL,
+ .cs_parse = &evergreen_dma_cs_parse,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &r600_dma_is_lockup,
@@ -1486,7 +1486,7 @@ static struct radeon_asic cayman_asic = {
.ib_execute = &cayman_dma_ring_ib_execute,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
- .cs_parse = NULL,
+ .cs_parse = &evergreen_dma_cs_parse,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &cayman_dma_is_lockup,
@@ -1496,7 +1496,7 @@ static struct radeon_asic cayman_asic = {
.ib_execute = &cayman_dma_ring_ib_execute,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
- .cs_parse = NULL,
+ .cs_parse = &evergreen_dma_cs_parse,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &cayman_dma_is_lockup,
@@ -1611,7 +1611,7 @@ static struct radeon_asic trinity_asic = {
.ib_execute = &cayman_dma_ring_ib_execute,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
- .cs_parse = NULL,
+ .cs_parse = &evergreen_dma_cs_parse,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &cayman_dma_is_lockup,
@@ -1621,7 +1621,7 @@ static struct radeon_asic trinity_asic = {
.ib_execute = &cayman_dma_ring_ib_execute,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
- .cs_parse = NULL,
+ .cs_parse = &evergreen_dma_cs_parse,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &cayman_dma_is_lockup,
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index b311c0a..d2ac646 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -431,6 +431,7 @@ u32 evergreen_get_vblank_counter(struct radeon_device *rdev, int crtc);
int evergreen_irq_set(struct radeon_device *rdev);
int evergreen_irq_process(struct radeon_device *rdev);
extern int evergreen_cs_parse(struct radeon_cs_parser *p);
+extern int evergreen_dma_cs_parse(struct radeon_cs_parser *p);
extern void evergreen_pm_misc(struct radeon_device *rdev);
extern void evergreen_pm_prepare(struct radeon_device *rdev);
extern void evergreen_pm_finish(struct radeon_device *rdev);
--
1.7.7.5
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 3/4] drm/radeon: add VM CS parser support for async DMA on cayman/TN/SI
2012-12-13 23:49 ` [PATCH 1/4] drm/radeon/kms: add 6xx/7xx CS parser for async DMA (v2) alexdeucher
2012-12-13 23:49 ` [PATCH 2/4] drm/radeon/kms: add evergreen/cayman " alexdeucher
@ 2012-12-13 23:49 ` alexdeucher
2012-12-13 23:50 ` [PATCH 4/4] drm/radeon: enable the async DMA rings in the CS ioctl alexdeucher
2 siblings, 0 replies; 9+ messages in thread
From: alexdeucher @ 2012-12-13 23:49 UTC (permalink / raw)
To: airlied, dri-devel; +Cc: Alex Deucher
From: Alex Deucher <alexander.deucher@amd.com>
Allows us to use async DMA from userspace.
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/radeon/evergreen_cs.c | 111 +++++++++++++++++++++++++++++++++
drivers/gpu/drm/radeon/radeon_asic.c | 6 ++
drivers/gpu/drm/radeon/radeon_asic.h | 1 +
3 files changed, 118 insertions(+), 0 deletions(-)
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index 1e8bd46..5e7ba99 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -3545,3 +3545,114 @@ int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
return ret;
}
+
+/**
+ * evergreen_dma_ib_parse() - parse the DMA IB for VM
+ * @rdev: radeon_device pointer
+ * @ib: radeon_ib pointer
+ *
+ * Parses the DMA IB from the VM CS ioctl
+ * checks for errors. (Cayman-SI)
+ * Returns 0 for success and an error on failure.
+ **/
+int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+ u32 idx = 0;
+ u32 header, cmd, count, tiled, new_cmd, misc;
+
+ do {
+ header = ib->ptr[idx];
+ cmd = GET_DMA_CMD(header);
+ count = GET_DMA_COUNT(header);
+ tiled = GET_DMA_T(header);
+ new_cmd = GET_DMA_NEW(header);
+ misc = GET_DMA_MISC(header);
+
+ switch (cmd) {
+ case DMA_PACKET_WRITE:
+ if (tiled)
+ idx += count + 7;
+ else
+ idx += count + 3;
+ break;
+ case DMA_PACKET_COPY:
+ if (tiled) {
+ if (new_cmd) {
+ switch (misc) {
+ case 0:
+ /* L2T, frame to fields */
+ idx += 10;
+ break;
+ case 1:
+ /* L2T, T2L partial */
+ idx += 12;
+ break;
+ case 3:
+ /* L2T, broadcast */
+ idx += 10;
+ break;
+ case 4:
+ /* L2T, T2L */
+ idx += 9;
+ break;
+ case 5:
+ /* T2T partial */
+ idx += 13;
+ break;
+ case 7:
+ /* L2T, broadcast */
+ idx += 10;
+ break;
+ default:
+ DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+ return -EINVAL;
+ }
+ } else {
+ switch (misc) {
+ case 0:
+ idx += 9;
+ break;
+ default:
+ DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+ return -EINVAL;
+ }
+ }
+ } else {
+ if (new_cmd) {
+ switch (misc) {
+ case 0:
+ /* L2L, byte */
+ idx += 5;
+ break;
+ case 1:
+ /* L2L, partial */
+ idx += 9;
+ break;
+ case 4:
+ /* L2L, dw, broadcast */
+ idx += 7;
+ break;
+ default:
+ DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+ return -EINVAL;
+ }
+ } else {
+ /* L2L, dw */
+ idx += 5;
+ }
+ }
+ break;
+ case DMA_PACKET_CONSTANT_FILL:
+ idx += 4;
+ break;
+ case DMA_PACKET_NOP:
+ idx += 1;
+ break;
+ default:
+ DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
+ return -EINVAL;
+ }
+ } while (idx < ib->length_dw);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index ac1d570..596bcbe 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -1484,6 +1484,7 @@ static struct radeon_asic cayman_asic = {
},
[R600_RING_TYPE_DMA_INDEX] = {
.ib_execute = &cayman_dma_ring_ib_execute,
+ .ib_parse = &evergreen_dma_ib_parse,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
.cs_parse = &evergreen_dma_cs_parse,
@@ -1494,6 +1495,7 @@ static struct radeon_asic cayman_asic = {
},
[CAYMAN_RING_TYPE_DMA1_INDEX] = {
.ib_execute = &cayman_dma_ring_ib_execute,
+ .ib_parse = &evergreen_dma_ib_parse,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
.cs_parse = &evergreen_dma_cs_parse,
@@ -1609,6 +1611,7 @@ static struct radeon_asic trinity_asic = {
},
[R600_RING_TYPE_DMA_INDEX] = {
.ib_execute = &cayman_dma_ring_ib_execute,
+ .ib_parse = &evergreen_dma_ib_parse,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
.cs_parse = &evergreen_dma_cs_parse,
@@ -1619,6 +1622,7 @@ static struct radeon_asic trinity_asic = {
},
[CAYMAN_RING_TYPE_DMA1_INDEX] = {
.ib_execute = &cayman_dma_ring_ib_execute,
+ .ib_parse = &evergreen_dma_ib_parse,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
.cs_parse = &evergreen_dma_cs_parse,
@@ -1734,6 +1738,7 @@ static struct radeon_asic si_asic = {
},
[R600_RING_TYPE_DMA_INDEX] = {
.ib_execute = &cayman_dma_ring_ib_execute,
+ .ib_parse = &evergreen_dma_ib_parse,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
.cs_parse = NULL,
@@ -1744,6 +1749,7 @@ static struct radeon_asic si_asic = {
},
[CAYMAN_RING_TYPE_DMA1_INDEX] = {
.ib_execute = &cayman_dma_ring_ib_execute,
+ .ib_parse = &evergreen_dma_ib_parse,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
.cs_parse = NULL,
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index d2ac646..5f4882c 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -473,6 +473,7 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags);
int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
+int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
struct radeon_ib *ib);
bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
--
1.7.7.5
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 4/4] drm/radeon: enable the async DMA rings in the CS ioctl
2012-12-13 23:49 ` [PATCH 1/4] drm/radeon/kms: add 6xx/7xx CS parser for async DMA (v2) alexdeucher
2012-12-13 23:49 ` [PATCH 2/4] drm/radeon/kms: add evergreen/cayman " alexdeucher
2012-12-13 23:49 ` [PATCH 3/4] drm/radeon: add VM CS parser support for async DMA on cayman/TN/SI alexdeucher
@ 2012-12-13 23:50 ` alexdeucher
2 siblings, 0 replies; 9+ messages in thread
From: alexdeucher @ 2012-12-13 23:50 UTC (permalink / raw)
To: airlied, dri-devel; +Cc: Alex Deucher
From: Alex Deucher <alexander.deucher@amd.com>
This enables the functionality added in the previous
patches. Userspace acceleration drivers can use the
CS ioctl to submit command buffers to the async DMA
rings.
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/radeon/radeon_cs.c | 12 ++++++++++++
include/uapi/drm/radeon_drm.h | 1 +
2 files changed, 13 insertions(+), 0 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 1b32a5a..396baba 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -112,6 +112,18 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority
} else
p->ring = RADEON_RING_TYPE_GFX_INDEX;
break;
+ case RADEON_CS_RING_DMA:
+ if (p->rdev->family >= CHIP_CAYMAN) {
+ if (p->priority > 0)
+ p->ring = R600_RING_TYPE_DMA_INDEX;
+ else
+ p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
+ } else if (p->rdev->family >= CHIP_R600) {
+ p->ring = R600_RING_TYPE_DMA_INDEX;
+ } else {
+ return -EINVAL;
+ }
+ break;
}
return 0;
}
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index 5645a87..eeda917 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -917,6 +917,7 @@ struct drm_radeon_gem_va {
/* The second dword of RADEON_CHUNK_ID_FLAGS is a uint32 that sets the ring type */
#define RADEON_CS_RING_GFX 0
#define RADEON_CS_RING_COMPUTE 1
+#define RADEON_CS_RING_DMA 2
/* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority */
/* 0 = normal, + = higher priority, - = lower priority */
--
1.7.7.5
^ permalink raw reply related [flat|nested] 9+ messages in thread