All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] drm/i915: swizzling support for snb/ivb
@ 2012-01-31 15:47 Daniel Vetter
  2012-01-31 15:47 ` [PATCH 2/3] drm/i915: consolidate swizzling control bit frobbing Daniel Vetter
                   ` (2 more replies)
  0 siblings, 3 replies; 22+ messages in thread
From: Daniel Vetter @ 2012-01-31 15:47 UTC (permalink / raw)
  To: Intel Graphics Development; +Cc: Daniel Vetter, Ben Widawsky

We have to do this manually. Somebody had a Great Idea.

I've measured speed-ups just a few percent above the noise level
(below 5% for the best case), but no slowdows. Chris Wilson measured
quite a bit more (10-20% above the usual snb variance) on a more
recent and better tuned version of sna, but also recorded a few
slow-downs on benchmarks know for uglier amounts of snb-induced
variance.

v2: Incorporate Ben Widawsky's preliminary review comments and
elaborate a bit about the performance impact in the changelog.

Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_dma.c        |    2 +-
 drivers/gpu/drm/i915/i915_drv.c        |    4 ++-
 drivers/gpu/drm/i915/i915_drv.h        |    3 +-
 drivers/gpu/drm/i915/i915_gem.c        |   23 +++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_tiling.c |   16 +++++++++++++-
 drivers/gpu/drm/i915/i915_reg.h        |   34 ++++++++++++++++++++++++++++++++
 6 files changed, 75 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 3f27173..dfef956 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1208,7 +1208,7 @@ static int i915_load_gem_init(struct drm_device *dev)
 	i915_gem_do_init(dev, 0, mappable_size, gtt_size - PAGE_SIZE);
 
 	mutex_lock(&dev->struct_mutex);
-	ret = i915_gem_init_ringbuffer(dev);
+	ret = i915_gem_init_hw(dev);
 	mutex_unlock(&dev->struct_mutex);
 	if (ret)
 		return ret;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 1658cfd..12ddf47 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -495,7 +495,7 @@ static int i915_drm_thaw(struct drm_device *dev)
 		mutex_lock(&dev->struct_mutex);
 		dev_priv->mm.suspended = 0;
 
-		error = i915_gem_init_ringbuffer(dev);
+		error = i915_gem_init_hw(dev);
 		mutex_unlock(&dev->struct_mutex);
 
 		if (HAS_PCH_SPLIT(dev))
@@ -686,6 +686,8 @@ int i915_reset(struct drm_device *dev, u8 flags)
 			!dev_priv->mm.suspended) {
 		dev_priv->mm.suspended = 0;
 
+		i915_gem_init_swizzling(dev);
+
 		dev_priv->ring[RCS].init(&dev_priv->ring[RCS]);
 		if (HAS_BSD(dev))
 		    dev_priv->ring[VCS].init(&dev_priv->ring[VCS]);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 865de80..0845419 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1187,7 +1187,8 @@ int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj,
 					    uint32_t read_domains,
 					    uint32_t write_domain);
 int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
-int __must_check i915_gem_init_ringbuffer(struct drm_device *dev);
+int __must_check i915_gem_init_hw(struct drm_device *dev);
+void i915_gem_init_swizzling(struct drm_device *dev);
 void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
 void i915_gem_do_init(struct drm_device *dev,
 		      unsigned long start,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 51a2b0c..86fffd2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3681,12 +3681,31 @@ i915_gem_idle(struct drm_device *dev)
 	return 0;
 }
 
+void i915_gem_init_swizzling(struct drm_device *dev)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+
+	if (INTEL_INFO(dev)->gen < 6 ||
+	    dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
+		return;
+
+	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
+				 DISP_TILE_SURFACE_SWIZZLING);
+
+	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
+	if (IS_GEN6(dev))
+		I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_SNB));
+	else
+		I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_IVB));
+}
 int
-i915_gem_init_ringbuffer(struct drm_device *dev)
+i915_gem_init_hw(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	int ret;
 
+	i915_gem_init_swizzling(dev);
+
 	ret = intel_init_render_ring_buffer(dev);
 	if (ret)
 		return ret;
@@ -3742,7 +3761,7 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
 	mutex_lock(&dev->struct_mutex);
 	dev_priv->mm.suspended = 0;
 
-	ret = i915_gem_init_ringbuffer(dev);
+	ret = i915_gem_init_hw(dev);
 	if (ret != 0) {
 		mutex_unlock(&dev->struct_mutex);
 		return ret;
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 861223b..acf89fe 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -93,8 +93,20 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
 	uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
 
 	if (INTEL_INFO(dev)->gen >= 6) {
-		swizzle_x = I915_BIT_6_SWIZZLE_NONE;
-		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+		uint32_t dimm_c0, dimm_c1;
+		dimm_c0 = I915_READ(MAD_DIMM_C0);
+		dimm_c1 = I915_READ(MAD_DIMM_C1);
+		dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
+		dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
+		/* Enable swizzling when the channels are populated with
+		 * identically sized dimms. */
+		if (dimm_c0 == dimm_c1) {
+			swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+			swizzle_y = I915_BIT_6_SWIZZLE_9;
+		} else {
+			swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+			swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+		}
 	} else if (IS_GEN5(dev)) {
 		/* On Ironlake whatever DRAM config, GPU always do
 		 * same swizzling setup.
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f960738..539ef90 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -295,6 +295,12 @@
 #define FENCE_REG_SANDYBRIDGE_0		0x100000
 #define   SANDYBRIDGE_FENCE_PITCH_SHIFT	32
 
+/* control register for cpu gtt access */
+#define TILECTL				0x101000
+#define   TILECTL_SWZCTL			(1 << 0)
+#define   TILECTL_TLB_PREFETCH_DIS	(1 << 2)
+#define   TILECTL_BACKSNOOP_DIS		(1 << 3)
+
 /*
  * Instruction and interrupt control regs
  */
@@ -318,6 +324,11 @@
 #define RING_MAX_IDLE(base)	((base)+0x54)
 #define RING_HWS_PGA(base)	((base)+0x80)
 #define RING_HWS_PGA_GEN6(base)	((base)+0x2080)
+#define ARB_MODE		0x04030
+#define   ARB_MODE_SWIZZLE_SNB	(1<<4)
+#define   ARB_MODE_SWIZZLE_IVB	(1<<5)
+#define   ARB_MODE_ENABLE(x)	GFX_MODE_ENABLE(x)
+#define   ARB_MODE_DISABLE(x)	GFX_MODE_DISABLE(x)
 #define RENDER_HWS_PGA_GEN7	(0x04080)
 #define RING_FAULT_REG(ring)	(0x4094 + 0x100*(ring)->id)
 #define DONE_REG		0x40b0
@@ -1037,6 +1048,29 @@
 #define C0DRB3			0x10206
 #define C1DRB3			0x10606
 
+/** snb MCH registers for reading the DRAM channel configuration */
+#define MAD_DIMM_C0			(MCHBAR_MIRROR_BASE_SNB + 0x5004)
+#define   MAD_DIMM_C1			(MCHBAR_MIRROR_BASE_SNB + 0x5008)
+#define   MAD_DIMM_C2			(MCHBAR_MIRROR_BASE_SNB + 0x500C)
+#define   MAD_DIMM_ECC_MASK		(0x3 << 24)
+#define   MAD_DIMM_ECC_OFF		(0x0 << 24)
+#define   MAD_DIMM_ECC_IO_ON_LOGIC_OFF	(0x1 << 24)
+#define   MAD_DIMM_ECC_IO_OFF_LOGIC_ON	(0x2 << 24)
+#define   MAD_DIMM_ECC_ON		(0x3 << 24)
+#define   MAD_DIMM_ENH_INTERLEAVE	(0x1 << 22)
+#define   MAD_DIMM_RANK_INTERLEAVE	(0x1 << 21)
+#define   MAD_DIMM_B_WIDTH_X16		(0x1 << 20) /* X8 chips if unset */
+#define   MAD_DIMM_A_WIDTH_X16		(0x1 << 19) /* X8 chips if unset */
+#define   MAD_DIMM_B_DUAL_RANK		(0x1 << 18)
+#define   MAD_DIMM_A_DUAL_RANK		(0x1 << 17)
+#define   MAD_DIMM_A_SELECT		(0x1 << 16)
+/* DIMM sizes are in multiples of 256mb. */
+#define   MAD_DIMM_B_SIZE_SHIFT		8
+#define   MAD_DIMM_B_SIZE_MASK		(0xff << MAD_DIMM_B_SIZE_SHIFT)
+#define   MAD_DIMM_A_SIZE_SHIFT		0
+#define   MAD_DIMM_A_SIZE_MASK		(0xff << MAD_DIMM_A_SIZE_SHIFT)
+
+
 /* Clocking configuration register */
 #define CLKCFG			0x10c00
 #define CLKCFG_FSB_400					(5 << 0)	/* hrawclk 100 */
-- 
1.7.7.5

^ permalink raw reply related	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2012-02-08 22:20 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-01-31 15:47 [PATCH 1/3] drm/i915: swizzling support for snb/ivb Daniel Vetter
2012-01-31 15:47 ` [PATCH 2/3] drm/i915: consolidate swizzling control bit frobbing Daniel Vetter
2012-02-01 21:37   ` Ben Widawsky
2012-02-01 22:23     ` Daniel Vetter
2012-02-08 22:17   ` Ben Widawsky
2012-02-08 22:19     ` Daniel Vetter
2012-01-31 15:47 ` [PATCH 3/3] drm/i915: add gen6+ registers to i915_swizzle_info Daniel Vetter
2012-02-01 21:39   ` Ben Widawsky
2012-02-08 22:20     ` Daniel Vetter
2012-02-01 21:35 ` [PATCH 1/3] drm/i915: swizzling support for snb/ivb Ben Widawsky
2012-02-01 22:16   ` Daniel Vetter
2012-02-01 22:26     ` Chris Wilson
2012-02-01 23:15       ` [PATCH] " Daniel Vetter
2012-02-02  5:30         ` Ben Widawsky
2012-02-02  8:40           ` Daniel Vetter
2012-02-05  3:13             ` Ben Widawsky
2012-02-06 16:06               ` Daniel Vetter
2012-02-02  8:58           ` Daniel Vetter
2012-02-04 20:59             ` Eric Anholt
2012-02-06 15:45               ` Daniel Vetter
2012-02-07 19:56                 ` Eric Anholt
2012-02-08 22:17                   ` Daniel Vetter

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.