All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] Add r6xx/r7xx tiling support to mesa
@ 2010-05-25 23:12 Alex Deucher
  2010-05-25 23:12 ` [PATCH 1/3] r600: add span support for 2D tiling Alex Deucher
  0 siblings, 1 reply; 16+ messages in thread
From: Alex Deucher @ 2010-05-25 23:12 UTC (permalink / raw)
  To: airlied, dri-devel

These patches along with the drm and ddx patches enable tiling
on r6xx/r7xx hardware.

Alex

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 1/3] r600: add span support for 2D tiling
  2010-05-25 23:12 [PATCH 0/3] Add r6xx/r7xx tiling support to mesa Alex Deucher
@ 2010-05-25 23:12 ` Alex Deucher
  2010-05-25 23:12   ` [PATCH 2/3] r600: add new relocs for tiling support Alex Deucher
  2010-05-27 14:55   ` [PATCH 1/3] r600: add span support for 2D tiling Matt Turner
  0 siblings, 2 replies; 16+ messages in thread
From: Alex Deucher @ 2010-05-25 23:12 UTC (permalink / raw)
  To: airlied, dri-devel

Requires tiling config ioctl support from the drm to use.
kms only.

Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
---
 .../drivers/dri/radeon/radeon_common_context.c     |    9 +-
 .../drivers/dri/radeon/radeon_common_context.h     |    7 +
 src/mesa/drivers/dri/radeon/radeon_screen.h        |    7 +
 src/mesa/drivers/dri/radeon/radeon_span.c          |  192 +++++++++++++++++++-
 4 files changed, 210 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c
index 94f4766..1cce032 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c
@@ -650,6 +650,13 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable,
 		rb->base.Height = drawable->h;
 		rb->has_surface = 0;
 
+		/* r6xx+ tiling */
+		rb->tile_config = radeon->radeonScreen->tile_config;
+		rb->group_bytes = radeon->radeonScreen->group_bytes;
+		rb->num_channels = radeon->radeonScreen->num_channels;
+		rb->num_banks = radeon->radeonScreen->num_banks;
+		rb->r7xx_bank_op = radeon->radeonScreen->r7xx_bank_op;
+
 		if (buffers[i].attachment == __DRI_BUFFER_STENCIL && depth_bo) {
 			if (RADEON_DEBUG & RADEON_DRI)
 				fprintf(stderr, "(reusing depth buffer as stencil)\n");
@@ -678,7 +685,7 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable,
 				bo->flags |= RADEON_BO_FLAGS_MACRO_TILE;
 			if (tiling_flags & RADEON_TILING_MICRO)
 				bo->flags |= RADEON_BO_FLAGS_MICRO_TILE;
-			
+
 		}
 
 		if (buffers[i].attachment == __DRI_BUFFER_DEPTH) {
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h
index 5156c5d..3fd00eb 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.h
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h
@@ -93,6 +93,13 @@ struct radeon_renderbuffer
 	GLuint pf_pending;  /**< sequence number of pending flip */
 	GLuint vbl_pending;   /**< vblank sequence number of pending flip */
 	__DRIdrawable *dPriv;
+
+	/* r6xx+ tiling */
+	GLuint tile_config;
+	GLint group_bytes;
+	GLint num_channels;
+	GLint num_banks;
+	GLint r7xx_bank_op;
 };
 
 struct radeon_framebuffer
diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h
index 0d7e335..2b33201 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.h
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.h
@@ -112,6 +112,13 @@ typedef struct radeon_screen {
    int kernel_mm;
    drm_radeon_sarea_t *sarea;	/* Private SAREA data */
    struct radeon_bo_manager *bom;
+
+   /* r6xx+ tiling */
+   GLuint tile_config;
+   GLint group_bytes;
+   GLint num_channels;
+   GLint num_banks;
+   GLint r7xx_bank_op;
 } radeonScreenRec, *radeonScreenPtr;
 
 #define IS_R100_CLASS(screen) \
diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c
index 1adb609..9dfe2dd 100644
--- a/src/mesa/drivers/dri/radeon/radeon_span.c
+++ b/src/mesa/drivers/dri/radeon/radeon_span.c
@@ -111,7 +111,6 @@ static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb,
  * two main types:
  * - 1D (akin to macro-linear/micro-tiled on older asics)
  * - 2D (akin to macro-tiled/micro-tiled on older asics)
- * only 1D tiling is implemented below
  */
 #if defined(RADEON_R600)
 static inline GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb,
@@ -208,12 +207,190 @@ static inline GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb,
     return offset;
 }
 
+static inline GLint r600_log2(GLint n)
+{
+	GLint log2 = 0;
+
+	while (n >>= 1)
+		++log2;
+	return log2;
+}
+
+static inline GLint r600_2d_tile_helper(const struct radeon_renderbuffer * rrb,
+					GLint x, GLint y, GLint is_depth, GLint is_stencil)
+{
+	GLint group_bytes = rrb->group_bytes;
+	GLint num_channels = rrb->num_channels;
+	GLint num_banks = rrb->num_banks;
+	GLint r7xx_bank_op = rrb->r7xx_bank_op;
+	/* */
+	GLint group_bits = r600_log2(group_bytes);
+	GLint channel_bits = r600_log2(num_channels);
+	GLint bank_bits = r600_log2(num_banks);
+	GLint element_bytes = rrb->cpp;
+	GLint num_samples = 1;
+	GLint tile_width = 8;
+	GLint tile_height = 8;
+	GLint tile_thickness = 1;
+	GLint macro_tile_width = num_banks;
+	GLint macro_tile_height = num_channels;
+	GLint pitch_elements = (rrb->pitch / element_bytes) / tile_width;
+	GLint height = rrb->base.Height / tile_height;
+	GLint z = 0;
+	GLint sample_number = 0;
+	/* */
+	GLint tile_bytes;
+	GLint macro_tile_bytes;
+	GLint macro_tiles_per_row;
+	GLint macro_tiles_per_slice;
+	GLint slice_offset;
+	GLint macro_tile_row_index;
+	GLint macro_tile_column_index;
+	GLint macro_tile_offset;
+	GLint pixel_number = 0;
+	GLint element_offset;
+	GLint bank = 0;
+	GLint channel = 0;
+	GLint total_offset;
+	GLint group_mask = (1 << group_bits) - 1;
+	GLint offset_low;
+	GLint offset_high;
+	GLint offset = 0;
+
+	switch (num_channels) {
+	case 2:
+	default:
+		// channel[0] = x[3] ^ y[3]
+		channel |= (((x >> 3) ^ (y >> 3)) & 1) << 0;
+		break;
+	case 4:
+		// channel[0] = x[4] ^ y[3]
+		channel |= (((x >> 4) ^ (y >> 3)) & 1) << 0;
+		// channel[1] = x[3] ^ y[4]
+		channel |= (((x >> 3) ^ (y >> 4)) & 1) << 1;
+		break;
+	case 8:
+		// channel[0] = x[5] ^ y[3]
+		channel |= (((x >> 5) ^ (y >> 3)) & 1) << 0;
+		// channel[0] = x[4] ^ x[5] ^ y[4]
+		channel |= (((x >> 4) ^ (x >> 5) ^ (y >> 4)) & 1) << 1;
+		// channel[0] = x[3] ^ y[5]
+		channel |= (((x >> 3) ^ (y >> 5)) & 1) << 2;
+		break;
+	}
+
+	switch (num_banks) {
+	case 4:
+		// bank[0] = x[3] ^ y[4 + log2(num_channels)]
+		bank |= (((x >> 3) ^ (y >> (4 + channel_bits))) & 1) << 0;
+		if (r7xx_bank_op)
+			// bank[1] = x[3] ^ y[4 + log2(num_channels)] ^ x[5]
+			bank |= (((x >> 4) ^ (y >> (3 + channel_bits)) ^ (x >> 5)) & 1) << 1;
+		else
+			// bank[1] = x[4] ^ y[3 + log2(num_channels)]
+			bank |= (((x >> 4) ^ (y >> (3 + channel_bits))) & 1) << 1;
+		break;
+	case 8:
+		// bank[0] = x[3] ^ y[5 + log2(num_channels)]
+		bank |= (((x >> 3) ^ (y >> (5 + channel_bits))) & 1) << 0;
+		// bank[1] = x[4] ^ y[4 + log2(num_channels)] ^ y[5 + log2(num_channels)]
+		bank |= (((x >> 4) ^ (y >> (4 + channel_bits)) ^ (y >> (5 + channel_bits))) & 1) << 1;
+		if (r7xx_bank_op)
+			// bank[2] = x[5] ^ y[3 + log2(num_channels)] ^ x[6]
+			bank |= (((x >> 5) ^ (y >> (3 + channel_bits)) ^ (x >> 6)) & 1) << 2;
+		else
+			// bank[2] = x[5] ^ y[3 + log2(num_channels)]
+			bank |= (((x >> 5) ^ (y >> (3 + channel_bits))) & 1) << 2;
+		break;
+	}
+
+	tile_bytes = tile_width * tile_height * tile_thickness * element_bytes * num_samples;
+	macro_tile_bytes = macro_tile_width * macro_tile_height * tile_bytes;
+	macro_tiles_per_row = pitch_elements / macro_tile_width;
+	macro_tiles_per_slice = macro_tiles_per_row * (height / macro_tile_height);
+	slice_offset = (z / tile_thickness) * macro_tiles_per_slice * macro_tile_bytes;
+	macro_tile_row_index = (y / tile_height) / macro_tile_height;
+	macro_tile_column_index = (x / tile_width) / macro_tile_width;
+	macro_tile_offset = ((macro_tile_row_index * macro_tiles_per_row) + macro_tile_column_index) * macro_tile_bytes;
+
+	if (is_depth) {
+		GLint pixel_offset = 0;
+
+		pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+		pixel_number |= ((y >> 0) & 1) << 1; // pn[1] = y[0]
+		pixel_number |= ((x >> 1) & 1) << 2; // pn[2] = x[1]
+		pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
+		pixel_number |= ((x >> 2) & 1) << 4; // pn[4] = x[2]
+		pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+		switch (element_bytes) {
+		case 2:
+			pixel_offset = pixel_number * element_bytes * num_samples;
+			break;
+		case 4:
+			/* stencil and depth data are stored separately within a tile.
+			 * stencil is stored in a contiguous tile before the depth tile.
+			 * stencil element is 1 byte, depth element is 3 bytes.
+			 * stencil tile is 64 bytes.
+			 */
+			if (is_stencil)
+				pixel_offset = pixel_number * 1 * num_samples;
+			else
+				pixel_offset = (pixel_number * 3 * num_samples) + 64;
+			break;
+		}
+		element_offset = pixel_offset + (sample_number * element_bytes);
+	} else {
+		GLint sample_offset;
+
+		switch (element_bytes) {
+		case 1:
+			pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+			pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
+			pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
+			pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
+			pixel_number |= ((y >> 0) & 1) << 4; // pn[4] = y[0]
+			pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+			break;
+		case 2:
+			pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+			pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
+			pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
+			pixel_number |= ((y >> 0) & 1) << 3; // pn[3] = y[0]
+			pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
+			pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+			break;
+		case 4:
+			pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+			pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
+			pixel_number |= ((y >> 0) & 1) << 2; // pn[2] = y[0]
+			pixel_number |= ((x >> 2) & 1) << 3; // pn[3] = x[2]
+			pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
+			pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+			break;
+		}
+		sample_offset = sample_number * (tile_bytes / num_samples);
+		element_offset = sample_offset + (pixel_number * element_bytes);
+	}
+	total_offset = (slice_offset + macro_tile_offset) >> (channel_bits + bank_bits);
+	total_offset += element_offset;
+
+	offset_low = total_offset & group_mask;
+	offset_high = (total_offset & ~group_mask) << (channel_bits + bank_bits);
+	offset = (bank << (group_bits + channel_bits)) + (channel << group_bits) + offset_low + offset_high;
+
+	return offset;
+}
+
 /* depth buffers */
 static GLubyte *r600_ptr_depth(const struct radeon_renderbuffer * rrb,
 			       GLint x, GLint y)
 {
     GLubyte *ptr = rrb->bo->ptr;
-    GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 0);
+    GLint offset;
+    if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+	    offset = r600_2d_tile_helper(rrb, x, y, 1, 0);
+    else
+	    offset = r600_1d_tile_helper(rrb, x, y, 1, 0);
     return &ptr[offset];
 }
 
@@ -221,7 +398,11 @@ static GLubyte *r600_ptr_stencil(const struct radeon_renderbuffer * rrb,
 				 GLint x, GLint y)
 {
     GLubyte *ptr = rrb->bo->ptr;
-    GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 1);
+    GLint offset;
+    if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+	    offset = r600_2d_tile_helper(rrb, x, y, 1, 1);
+    else
+	    offset = r600_1d_tile_helper(rrb, x, y, 1, 1);
     return &ptr[offset];
 }
 
@@ -235,7 +416,10 @@ static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb,
     if (rrb->has_surface || !(rrb->bo->flags & mask)) {
         offset = x * rrb->cpp + y * rrb->pitch;
     } else {
-	    offset = r600_1d_tile_helper(rrb, x, y, 0, 0);
+	    if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+		    offset = r600_2d_tile_helper(rrb, x, y, 0, 0);
+	    else
+		    offset = r600_1d_tile_helper(rrb, x, y, 0, 0);
     }
     return &ptr[offset];
 }
-- 
1.5.6.3

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/3] r600: add new relocs for tiling support
  2010-05-25 23:12 ` [PATCH 1/3] r600: add span support for 2D tiling Alex Deucher
@ 2010-05-25 23:12   ` Alex Deucher
  2010-05-25 23:12     ` [PATCH 3/3] r600: add support for getting the tiling config via drm ioctl Alex Deucher
  2010-05-27 14:55   ` [PATCH 1/3] r600: add span support for 2D tiling Matt Turner
  1 sibling, 1 reply; 16+ messages in thread
From: Alex Deucher @ 2010-05-25 23:12 UTC (permalink / raw)
  To: airlied, dri-devel

Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
---
 src/mesa/drivers/dri/r600/r600_blit.c |   20 +++++++++++------
 src/mesa/drivers/dri/r600/r700_chip.c |   37 +++++++++++++++++++++++---------
 2 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/src/mesa/drivers/dri/r600/r600_blit.c b/src/mesa/drivers/dri/r600/r600_blit.c
index 172f85e..6196782 100644
--- a/src/mesa/drivers/dri/r600/r600_blit.c
+++ b/src/mesa/drivers/dri/r600/r600_blit.c
@@ -390,13 +390,20 @@ set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_forma
 			 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0);
     END_BATCH();
 
-    BEGIN_BATCH_NO_AUTOSTATE(12);
+    BEGIN_BATCH_NO_AUTOSTATE(9);
     R600_OUT_BATCH_REGVAL(CB_COLOR0_SIZE + (4 * id), cb_color0_size);
     R600_OUT_BATCH_REGVAL(CB_COLOR0_VIEW + (4 * id), cb_color0_view);
-    R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), cb_color0_info);
     R600_OUT_BATCH_REGVAL(CB_COLOR0_MASK + (4 * id), 0);
     END_BATCH();
 
+    BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+    R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), cb_color0_info);
+    R600_OUT_BATCH_RELOC(0,
+			 bo,
+			 0,
+			 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0);
+    END_BATCH();
+
     COMMIT_BATCH();
 
 }
@@ -1447,7 +1454,7 @@ set_default_state(context_t *context)
 	    SETbit(sq_dyn_gpr_cntl_ps_flush_req, VS_PC_LIMIT_ENABLE_bit);
     }
 
-    BEGIN_BATCH_NO_AUTOSTATE(117);
+    BEGIN_BATCH_NO_AUTOSTATE(114);
     R600_OUT_BATCH_REGSEQ(SQ_CONFIG, 6);
     R600_OUT_BATCH(sq_config);
     R600_OUT_BATCH(sq_gpr_resource_mgmt_1);
@@ -1477,7 +1484,6 @@ set_default_state(context_t *context)
                          (CLRCMP_SEL_SRC << CLRCMP_FCN_SEL_shift));
     R600_OUT_BATCH_REGVAL(SQ_VTX_BASE_VTX_LOC, 0);
     R600_OUT_BATCH_REGVAL(SQ_VTX_START_INST_LOC, 0);
-    R600_OUT_BATCH_REGVAL(DB_DEPTH_INFO, 0);
     R600_OUT_BATCH_REGVAL(DB_DEPTH_CONTROL, 0);
     R600_OUT_BATCH_REGVAL(CB_SHADER_MASK, (OUTPUT0_ENABLE_mask));
     R600_OUT_BATCH_REGVAL(CB_TARGET_MASK, (TARGET0_ENABLE_mask));
@@ -1607,7 +1613,7 @@ unsigned r600_blit(GLcontext *ctx,
     /* Flush is needed to make sure that source buffer has correct data */
     radeonFlush(ctx);
 
-    rcommonEnsureCmdBufSpace(&context->radeon, 304, __FUNCTION__);
+    rcommonEnsureCmdBufSpace(&context->radeon, 305, __FUNCTION__);
 
     /* load shaders */
     load_shaders(context->radeon.glCtx);
@@ -1616,7 +1622,7 @@ unsigned r600_blit(GLcontext *ctx,
         return GL_FALSE;
 
     /* set clear state */
-    /* 117 */
+    /* 114 */
     set_default_state(context);
 
     /* shaders */
@@ -1632,7 +1638,7 @@ unsigned r600_blit(GLcontext *ctx,
     set_tex_sampler(context);
 
     /* dst */
-    /* 27 */
+    /* 31 */
     set_render_target(context, dst_bo, dst_mesaformat,
 		      dst_pitch, dst_width, dst_height, dst_offset);
     /* scissors */
diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c
index cefda3a..1e955b9 100644
--- a/src/mesa/drivers/dri/r600/r700_chip.c
+++ b/src/mesa/drivers/dri/r600/r700_chip.c
@@ -617,18 +617,25 @@ static void r700SendDepthTargetState(GLcontext *ctx, struct radeon_state_atom *a
 
 	r700SetDepthTarget(context);
 
-        BEGIN_BATCH_NO_AUTOSTATE(8 + 2);
+        BEGIN_BATCH_NO_AUTOSTATE(7 + 2);
 	R600_OUT_BATCH_REGSEQ(DB_DEPTH_SIZE, 2);
 	R600_OUT_BATCH(r700->DB_DEPTH_SIZE.u32All);
 	R600_OUT_BATCH(r700->DB_DEPTH_VIEW.u32All);
-	R600_OUT_BATCH_REGSEQ(DB_DEPTH_BASE, 2);
+	R600_OUT_BATCH_REGSEQ(DB_DEPTH_BASE, 1);
 	R600_OUT_BATCH(r700->DB_DEPTH_BASE.u32All);
-	R600_OUT_BATCH(r700->DB_DEPTH_INFO.u32All);
 	R600_OUT_BATCH_RELOC(r700->DB_DEPTH_BASE.u32All,
 			     rrb->bo,
 			     r700->DB_DEPTH_BASE.u32All,
 			     0, RADEON_GEM_DOMAIN_VRAM, 0);
         END_BATCH();
+        BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+	R600_OUT_BATCH_REGSEQ(DB_DEPTH_INFO, 1);
+	R600_OUT_BATCH(r700->DB_DEPTH_INFO.u32All);
+	R600_OUT_BATCH_RELOC(r700->DB_DEPTH_INFO.u32All,
+			     rrb->bo,
+			     r700->DB_DEPTH_INFO.u32All,
+			     0, RADEON_GEM_DOMAIN_VRAM, 0);
+        END_BATCH();
 
 	if ((context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) &&
 	    (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)) {
@@ -687,27 +694,35 @@ static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom *
 	BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
 	R600_OUT_BATCH_REGSEQ(CB_COLOR0_TILE + (4 * id), 1);
 	R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_TILE.u32All);
-	R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All,
+	R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_TILE.u32All,
 			     rrb->bo,
-			     r700->render_target[id].CB_COLOR0_BASE.u32All,
+			     r700->render_target[id].CB_COLOR0_TILE.u32All,
 			     0, RADEON_GEM_DOMAIN_VRAM, 0);
 	END_BATCH();
 	BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
 	R600_OUT_BATCH_REGSEQ(CB_COLOR0_FRAG + (4 * id), 1);
 	R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_FRAG.u32All);
-	R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All,
+	R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_FRAG.u32All,
 			     rrb->bo,
-			     r700->render_target[id].CB_COLOR0_BASE.u32All,
+			     r700->render_target[id].CB_COLOR0_FRAG.u32All,
 			     0, RADEON_GEM_DOMAIN_VRAM, 0);
         END_BATCH();
 
-        BEGIN_BATCH_NO_AUTOSTATE(12);
+        BEGIN_BATCH_NO_AUTOSTATE(9);
 	R600_OUT_BATCH_REGVAL(CB_COLOR0_SIZE + (4 * id), r700->render_target[id].CB_COLOR0_SIZE.u32All);
 	R600_OUT_BATCH_REGVAL(CB_COLOR0_VIEW + (4 * id), r700->render_target[id].CB_COLOR0_VIEW.u32All);
-	R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), r700->render_target[id].CB_COLOR0_INFO.u32All);
 	R600_OUT_BATCH_REGVAL(CB_COLOR0_MASK + (4 * id), r700->render_target[id].CB_COLOR0_MASK.u32All);
         END_BATCH();
 
+	BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+	R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), r700->render_target[id].CB_COLOR0_INFO.u32All);
+	R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_INFO.u32All,
+			     rrb->bo,
+			     r700->render_target[id].CB_COLOR0_INFO.u32All,
+			     0, RADEON_GEM_DOMAIN_VRAM, 0);
+
+        END_BATCH();
+
 	COMMIT_BATCH();
 
 }
@@ -1567,7 +1582,7 @@ void r600InitAtoms(context_t *context)
 	ALLOC_STATE(sq, always, 34, r700SendSQConfig);
 	ALLOC_STATE(db, always, 17, r700SendDBState);
 	ALLOC_STATE(stencil, always, 4, r700SendStencilState);
-	ALLOC_STATE(db_target, always, 12, r700SendDepthTargetState);
+	ALLOC_STATE(db_target, always, 16, r700SendDepthTargetState);
 	ALLOC_STATE(sc, always, 15, r700SendSCState);
 	ALLOC_STATE(scissor, always, 22, r700SendScissorState);
 	ALLOC_STATE(aa, always, 12, r700SendAAState);
@@ -1578,7 +1593,7 @@ void r600InitAtoms(context_t *context)
 	ALLOC_STATE(poly, always, 10, r700SendPolyState);
 	ALLOC_STATE(cb, cb, 18, r700SendCBState);
 	ALLOC_STATE(clrcmp, always, 6, r700SendCBCLRCMPState);
-	ALLOC_STATE(cb_target, always, 29, r700SendRenderTargetState);
+	ALLOC_STATE(cb_target, always, 31, r700SendRenderTargetState);
 	ALLOC_STATE(blnd, blnd, (6 + (R700_MAX_RENDER_TARGETS * 3)), r700SendCBBlendState);
 	ALLOC_STATE(blnd_clr, always, 6, r700SendCBBlendColorState);
 	ALLOC_STATE(sx, always, 9, r700SendSXState);
-- 
1.5.6.3

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 3/3] r600: add support for getting the tiling config via drm ioctl
  2010-05-25 23:12   ` [PATCH 2/3] r600: add new relocs for tiling support Alex Deucher
@ 2010-05-25 23:12     ` Alex Deucher
  0 siblings, 0 replies; 16+ messages in thread
From: Alex Deucher @ 2010-05-25 23:12 UTC (permalink / raw)
  To: airlied, dri-devel

Needed for the the 2D tiling span functions.

Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
---
 src/mesa/drivers/dri/radeon/radeon_screen.c |   53 +++++++++++++++++++++++++++
 1 files changed, 53 insertions(+), 0 deletions(-)

diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
index 4f59511..175ef71 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.c
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
@@ -233,6 +233,9 @@ radeonGetParam(__DRIscreen *sPriv, int param, void *value)
       case RADEON_PARAM_NUM_Z_PIPES:
           info.request = RADEON_INFO_NUM_Z_PIPES;
           break;
+      case 0x05: /* RADEON_INFO_TILE_CONFIG */
+          info.request = 0x05;
+          break;
       default:
           return -EINVAL;
       }
@@ -1316,6 +1319,56 @@ radeonCreateScreen2(__DRIscreen *sPriv)
    else
 	   screen->chip_flags |= RADEON_CLASS_R600;
 
+   /* r6xx+ tiling */
+   if (IS_R600_CLASS(screen) && (sPriv->drm_version.minor >= 5)) {
+	   ret = radeonGetParam(sPriv, 0x05, &temp);
+	   if (ret)
+		   fprintf(stderr, "failed to get tiling info\n");
+	   else {
+		   screen->tile_config = temp;
+		   screen->r7xx_bank_op = 0;
+		   switch((screen->tile_config & 0xe) >> 1) {
+		   case 0:
+			   screen->num_channels = 1;
+			   break;
+		   case 1:
+			   screen->num_channels = 2;
+			   break;
+		   case 2:
+			   screen->num_channels = 4;
+			   break;
+		   case 3:
+			   screen->num_channels = 8;
+			   break;
+		   default:
+			   fprintf(stderr, "bad channels\n");
+			   break;
+		   }
+		   switch((screen->tile_config & 0x30) >> 4) {
+		   case 0:
+			   screen->num_banks = 4;
+			   break;
+		   case 1:
+			   screen->num_banks = 8;
+			   break;
+		   default:
+			   fprintf(stderr, "bad banks\n");
+			   break;
+		   }
+		   switch((screen->tile_config & 0xc0) >> 6) {
+		   case 0:
+			   screen->group_bytes = 256;
+			   break;
+		   case 1:
+			   screen->group_bytes = 512;
+			   break;
+		   default:
+			   fprintf(stderr, "bad group_bytes\n");
+			   break;
+		   }
+	   }
+   }
+
    if (IS_R300_CLASS(screen)) {
        ret = radeonGetParam(sPriv, RADEON_PARAM_NUM_GB_PIPES, &temp);
        if (ret) {
-- 
1.5.6.3

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] r600: add span support for 2D tiling
@ 2010-05-27  7:54 Frieder Ferlemann
  0 siblings, 0 replies; 16+ messages in thread
From: Frieder Ferlemann @ 2010-05-27  7:54 UTC (permalink / raw)
  To: dri-devel

Hi Alex,

not tested (admittedly I haven't compiled it),
and probably not really relevant but these
switch cases could be more compact:

+static inline GLint r600_2d_tile_helper(const struct radeon_renderbuffer * rrb,
+                    GLint x, GLint y, GLint is_depth, GLint is_stencil)

...

+        switch (element_bytes) {
+        case 1:
+            pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+            pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
+            pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
+            pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
+            pixel_number |= ((y >> 0) & 1) << 4; // pn[4] = y[0]
+            pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+            break;
+        case 2:
+            pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+            pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
+            pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
+            pixel_number |= ((y >> 0) & 1) << 3; // pn[3] = y[0]
+            pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
+            pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+            break;
+        case 4:
+            pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+            pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
+            pixel_number |= ((y >> 0) & 1) << 2; // pn[2] = y[0]
+            pixel_number |= ((x >> 2) & 1) << 3; // pn[3] = x[2]
+            pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
+            pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+            break;
+        }


when using 

+        switch (element_bytes) {
+        case 1:
+            pixel_number |= (x & 0x07) << 0;      // pn[0] = x[0], pn[1] = x[1], pn[2] = x[2]
+            pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
+            pixel_number |= ((y >> 0) & 1) << 4; // pn[4] = y[0]
+            pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+            break;
+        case 2:
+            pixel_number |= (x & 0x07) << 0;     // pn[0] = x[0], pn[1] = x[1], pn[2] = x[2]
+            pixel_number |= (y & 0x07) << 3;     // pn[3] = y[0], pn[4] = y[1], pn[5] = y[2]
+            break;
+        case 4:
+            pixel_number |= (x & 0x03) << 0;     // pn[0] = x[0], pn[1] = x[1]
+            pixel_number |= (y & 0x01) << 2;     // pn[2] = y[0]
+            pixel_number |= (x & 0x04) << 1;     // pn[3] = x[2]
+            pixel_number |= (y & 0x06) << 3;     // pn[4] = y[1], pn[5] = y[2]
+            break;
+        }

Greetings,
Frieder
___________________________________________________________
NEU: WEB.DE DSL für 19,99 EUR/mtl. und ohne Mindest-Laufzeit!
http://produkte.web.de/go/02/
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] r600: add span support for 2D tiling
  2010-05-25 23:12 ` [PATCH 1/3] r600: add span support for 2D tiling Alex Deucher
  2010-05-25 23:12   ` [PATCH 2/3] r600: add new relocs for tiling support Alex Deucher
@ 2010-05-27 14:55   ` Matt Turner
  2010-05-27 15:20     ` Alex Deucher
  1 sibling, 1 reply; 16+ messages in thread
From: Matt Turner @ 2010-05-27 14:55 UTC (permalink / raw)
  To: Alex Deucher; +Cc: dri-devel

> +static inline GLint r600_log2(GLint n)
> +{
> +       GLint log2 = 0;
> +
> +       while (n >>= 1)
> +               ++log2;
> +       return log2;
> +}

Does mesa not provide something like this?

Matt

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] r600: add span support for 2D tiling
  2010-05-27 14:55   ` [PATCH 1/3] r600: add span support for 2D tiling Matt Turner
@ 2010-05-27 15:20     ` Alex Deucher
  2010-05-27 15:37       ` Alan Cox
  2010-05-27 15:51       ` Brian Paul
  0 siblings, 2 replies; 16+ messages in thread
From: Alex Deucher @ 2010-05-27 15:20 UTC (permalink / raw)
  To: Matt Turner; +Cc: dri-devel

On Thu, May 27, 2010 at 10:55 AM, Matt Turner <mattst88@gmail.com> wrote:
>> +static inline GLint r600_log2(GLint n)
>> +{
>> +       GLint log2 = 0;
>> +
>> +       while (n >>= 1)
>> +               ++log2;
>> +       return log2;
>> +}
>
> Does mesa not provide something like this?

The only one I could find was a gallium utility function.

Alex

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] r600: add span support for 2D tiling
  2010-05-27 15:20     ` Alex Deucher
@ 2010-05-27 15:37       ` Alan Cox
  2010-05-27 15:51       ` Brian Paul
  1 sibling, 0 replies; 16+ messages in thread
From: Alan Cox @ 2010-05-27 15:37 UTC (permalink / raw)
  To: Alex Deucher; +Cc: dri-devel

On Thu, 27 May 2010 11:20:59 -0400
Alex Deucher <alexdeucher@gmail.com> wrote:

> On Thu, May 27, 2010 at 10:55 AM, Matt Turner <mattst88@gmail.com> wrote:
> >> +static inline GLint r600_log2(GLint n)
> >> +{
> >> +       GLint log2 = 0;
> >> +
> >> +       while (n >>= 1)
> >> +               ++log2;
> >> +       return log2;
> >> +}
> >
> > Does mesa not provide something like this?
> 
> The only one I could find was a gallium utility function.

include/linux/log2.h

The original is from Red Hat so if you need it non GPL maybe Red Hat can
help ?

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] r600: add span support for 2D tiling
  2010-05-27 15:20     ` Alex Deucher
  2010-05-27 15:37       ` Alan Cox
@ 2010-05-27 15:51       ` Brian Paul
  2010-05-27 22:04         ` Conn Clark
  1 sibling, 1 reply; 16+ messages in thread
From: Brian Paul @ 2010-05-27 15:51 UTC (permalink / raw)
  To: Alex Deucher; +Cc: dri-devel@lists.freedesktop.org

Alex Deucher wrote:
> On Thu, May 27, 2010 at 10:55 AM, Matt Turner <mattst88@gmail.com> wrote:
>>> +static inline GLint r600_log2(GLint n)
>>> +{
>>> +       GLint log2 = 0;
>>> +
>>> +       while (n >>= 1)
>>> +               ++log2;
>>> +       return log2;
>>> +}
>> Does mesa not provide something like this?
> 
> The only one I could find was a gallium utility function.

There's a logbase2() function in teximage.c but it might not be 
equivalent.

-Brian

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] r600: add span support for 2D tiling
  2010-05-27 15:51       ` Brian Paul
@ 2010-05-27 22:04         ` Conn Clark
  2010-05-27 22:36           ` Alan Cox
  2010-05-27 23:01           ` Frieder Ferlemann
  0 siblings, 2 replies; 16+ messages in thread
From: Conn Clark @ 2010-05-27 22:04 UTC (permalink / raw)
  To: Brian Paul; +Cc: dri-devel@lists.freedesktop.org

On Thu, May 27, 2010 at 8:51 AM, Brian Paul <brianp@vmware.com> wrote:
> Alex Deucher wrote:
>>
>> On Thu, May 27, 2010 at 10:55 AM, Matt Turner <mattst88@gmail.com> wrote:
>>>>
>>>> +static inline GLint r600_log2(GLint n)
>>>> +{
>>>> +       GLint log2 = 0;
>>>> +
>>>> +       while (n >>= 1)
>>>> +               ++log2;
>>>> +       return log2;
>>>> +}
>>>
>>> Does mesa not provide something like this?
>>
>> The only one I could find was a gallium utility function.
>
> There's a logbase2() function in teximage.c but it might not be equivalent.
>
> -Brian
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
>

This code could be written with a faster algorithm requiring  just 13 operations

+               pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+               pixel_number |= ((y >> 0) & 1) << 1; // pn[1] = y[0]
+               pixel_number |= ((x >> 1) & 1) << 2; // pn[2] = x[1]
+               pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
+               pixel_number |= ((x >> 2) & 1) << 4; // pn[4] = x[2]
+               pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]



/* suitable for all 16 bit or greater processors that can do an
unsigned 16 bit or greater multiply */
/*  tested and verified  */

pixel_number = ((((x & 0x07) * 0x1111 & 0x8421) * 0x1249 >> 9) & 0x55 ) |
                             ((((y & 0x07) * 0x1111 & 0x8421) * 0x1249
>> 8) & 0xAA );

Note if it is known that x and y are less than or equal to 7 it can be
done in 11 operations.

Cheers

Conn
-- 

Conn O. Clark

Observation: In formal computer science advances are made
by standing on the shoulders of giants. Linux has proved
that if there are enough of you, you can advance just as
far by stepping on each others toes.
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] r600: add span support for 2D tiling
  2010-05-27 22:04         ` Conn Clark
@ 2010-05-27 22:36           ` Alan Cox
  2010-05-27 23:01           ` Frieder Ferlemann
  1 sibling, 0 replies; 16+ messages in thread
From: Alan Cox @ 2010-05-27 22:36 UTC (permalink / raw)
  To: Conn Clark; +Cc: dri-devel@lists.freedesktop.org

> Note if it is known that x and y are less than or equal to 7 it can be
> done in 11 operations.

And bsr is one instruction for x86, cntlzw for ppc

Alan

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] r600: add span support for 2D tiling
  2010-05-27 22:04         ` Conn Clark
  2010-05-27 22:36           ` Alan Cox
@ 2010-05-27 23:01           ` Frieder Ferlemann
  2010-05-27 23:34             ` Conn Clark
  1 sibling, 1 reply; 16+ messages in thread
From: Frieder Ferlemann @ 2010-05-27 23:01 UTC (permalink / raw)
  To: dri-devel@lists.freedesktop.org

Hi,

Am 28.05.2010 00:04, schrieb Conn Clark:
> On Thu, May 27, 2010 at 8:51 AM, Brian Paul <brianp@vmware.com> wrote:
> 
> This code could be written with a faster algorithm requiring  just 13 operations
> 
> +               pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
> +               pixel_number |= ((y >> 0) & 1) << 1; // pn[1] = y[0]
> +               pixel_number |= ((x >> 1) & 1) << 2; // pn[2] = x[1]
> +               pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
> +               pixel_number |= ((x >> 2) & 1) << 4; // pn[4] = x[2]
> +               pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
> 


> /* suitable for all 16 bit or greater processors that can do an
> unsigned 16 bit or greater multiply */
> /*  tested and verified  */
> 
> pixel_number = ((((x & 0x07) * 0x1111 & 0x8421) * 0x1249 >> 9) & 0x55 ) |
>                              ((((y & 0x07) * 0x1111 & 0x8421) * 0x1249
>>> 8) & 0xAA );
> 
> Note if it is known that x and y are less than or equal to 7 it can be
> done in 11 operations.

Cool. How does it compare to:

        const unsigned char /*int*/ spread_bits[8] = {
                0x00,  /* 0b000 to 0b00000 */
                0x01,  /* 0b001 to 0b00001 */
                0x04,  /* 0b010 to 0b00100 */
                0x05,  /* 0b011 to 0b00101 */
                0x10,  /* 0b100 to 0b10000 */
                0x11,  /* 0b101 to 0b10001 */
                0x14,  /* 0b110 to 0b10100 */
                0x15,  /* 0b111 to 0b10101 */
        };

        pixel_number |= spread_bits[x & 0x07];
        pixel_number |= spread_bits[y & 0x07] << 1;


Greetings,
Frieder

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] r600: add span support for 2D tiling
  2010-05-27 23:01           ` Frieder Ferlemann
@ 2010-05-27 23:34             ` Conn Clark
  2010-05-27 23:52               ` Alan Cox
  0 siblings, 1 reply; 16+ messages in thread
From: Conn Clark @ 2010-05-27 23:34 UTC (permalink / raw)
  To: Frieder Ferlemann; +Cc: dri-devel@lists.freedesktop.org

On Thu, May 27, 2010 at 4:01 PM, Frieder Ferlemann
<frieder.ferlemann@web.de> wrote:
> Hi,
>
> Am 28.05.2010 00:04, schrieb Conn Clark:
>> On Thu, May 27, 2010 at 8:51 AM, Brian Paul <brianp@vmware.com> wrote:
>>
>> This code could be written with a faster algorithm requiring  just 13 operations
>>
>> +               pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
>> +               pixel_number |= ((y >> 0) & 1) << 1; // pn[1] = y[0]
>> +               pixel_number |= ((x >> 1) & 1) << 2; // pn[2] = x[1]
>> +               pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
>> +               pixel_number |= ((x >> 2) & 1) << 4; // pn[4] = x[2]
>> +               pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
>>
>
>
>> /* suitable for all 16 bit or greater processors that can do an
>> unsigned 16 bit or greater multiply */
>> /*  tested and verified  */
>>
>> pixel_number = ((((x & 0x07) * 0x1111 & 0x8421) * 0x1249 >> 9) & 0x55 ) |
>>                              ((((y & 0x07) * 0x1111 & 0x8421) * 0x1249
>>>> 8) & 0xAA );
>>
>> Note if it is known that x and y are less than or equal to 7 it can be
>> done in 11 operations.
>
> Cool. How does it compare to:
>
>        const unsigned char /*int*/ spread_bits[8] = {
>                0x00,  /* 0b000 to 0b00000 */
>                0x01,  /* 0b001 to 0b00001 */
>                0x04,  /* 0b010 to 0b00100 */
>                0x05,  /* 0b011 to 0b00101 */
>                0x10,  /* 0b100 to 0b10000 */
>                0x11,  /* 0b101 to 0b10001 */
>                0x14,  /* 0b110 to 0b10100 */
>                0x15,  /* 0b111 to 0b10101 */
>        };
>
>        pixel_number |= spread_bits[x & 0x07];
>        pixel_number |= spread_bits[y & 0x07] << 1;
>
>
> Greetings,
> Frieder
>

Look up tables have some hidden penalties but I think it might be a
win. Looks like we may have to benchmark the solutions against one
another to really know which is best in real life.

Conn

-- 

Conn O. Clark

Observation: In formal computer science advances are made
by standing on the shoulders of giants. Linux has proved
that if there are enough of you, you can advance just as
far by stepping on each others toes.
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] r600: add span support for 2D tiling
  2010-05-27 23:52               ` Alan Cox
@ 2010-05-27 23:47                 ` Alex Deucher
  2010-05-28  0:10                 ` Matt Turner
  1 sibling, 0 replies; 16+ messages in thread
From: Alex Deucher @ 2010-05-27 23:47 UTC (permalink / raw)
  To: Alan Cox; +Cc: Frieder Ferlemann, dri-devel@lists.freedesktop.org

On Thu, May 27, 2010 at 7:52 PM, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
>> Look up tables have some hidden penalties but I think it might be a
>> win. Looks like we may have to benchmark the solutions against one
>> another to really know which is best in real life.
>
> For x86 and ppc the single assembler instruction is fastest. Can you wire
> an R600 to anything else ?

Anything with a pci or pcie bus in theory.  This is a slow path
already, so I doesn't really matter what we use.  I'd prefer to keep
the code readable.

Alex

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] r600: add span support for 2D tiling
  2010-05-27 23:34             ` Conn Clark
@ 2010-05-27 23:52               ` Alan Cox
  2010-05-27 23:47                 ` Alex Deucher
  2010-05-28  0:10                 ` Matt Turner
  0 siblings, 2 replies; 16+ messages in thread
From: Alan Cox @ 2010-05-27 23:52 UTC (permalink / raw)
  To: Conn Clark; +Cc: dri-devel@lists.freedesktop.org, Frieder Ferlemann

> Look up tables have some hidden penalties but I think it might be a
> win. Looks like we may have to benchmark the solutions against one
> another to really know which is best in real life.

For x86 and ppc the single assembler instruction is fastest. Can you wire
an R600 to anything else ?

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/3] r600: add span support for 2D tiling
  2010-05-27 23:52               ` Alan Cox
  2010-05-27 23:47                 ` Alex Deucher
@ 2010-05-28  0:10                 ` Matt Turner
  1 sibling, 0 replies; 16+ messages in thread
From: Matt Turner @ 2010-05-28  0:10 UTC (permalink / raw)
  To: Alan Cox; +Cc: Frieder Ferlemann, dri-devel@lists.freedesktop.org

On Thu, May 27, 2010 at 7:52 PM, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
>> Look up tables have some hidden penalties but I think it might be a
>> win. Looks like we may have to benchmark the solutions against one
>> another to really know which is best in real life.
>
> For x86 and ppc the single assembler instruction is fastest. Can you wire
> an R600 to anything else ?

2400HD and 4350HD are available as PCI, so I could get one in an
Alpha, but I haven't yet.

(Alpha has count-{leading,trailing} zero instructions too)

Matt

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2010-05-28  0:10 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-05-25 23:12 [PATCH 0/3] Add r6xx/r7xx tiling support to mesa Alex Deucher
2010-05-25 23:12 ` [PATCH 1/3] r600: add span support for 2D tiling Alex Deucher
2010-05-25 23:12   ` [PATCH 2/3] r600: add new relocs for tiling support Alex Deucher
2010-05-25 23:12     ` [PATCH 3/3] r600: add support for getting the tiling config via drm ioctl Alex Deucher
2010-05-27 14:55   ` [PATCH 1/3] r600: add span support for 2D tiling Matt Turner
2010-05-27 15:20     ` Alex Deucher
2010-05-27 15:37       ` Alan Cox
2010-05-27 15:51       ` Brian Paul
2010-05-27 22:04         ` Conn Clark
2010-05-27 22:36           ` Alan Cox
2010-05-27 23:01           ` Frieder Ferlemann
2010-05-27 23:34             ` Conn Clark
2010-05-27 23:52               ` Alan Cox
2010-05-27 23:47                 ` Alex Deucher
2010-05-28  0:10                 ` Matt Turner
  -- strict thread matches above, loose matches on Subject: below --
2010-05-27  7:54 Frieder Ferlemann

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.