* [PATCH 1/3] drm/nouveau: Pre-G80 tiling support.
@ 2009-12-11 18:33 Francisco Jerez
[not found] ` <1260556404-13389-1-git-send-email-currojerez-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org>
0 siblings, 1 reply; 7+ messages in thread
From: Francisco Jerez @ 2009-12-11 18:33 UTC (permalink / raw)
To: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Signed-off-by: Francisco Jerez <currojerez-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org>
---
drivers/gpu/drm/nouveau/nouveau_drv.h | 23 +++++
drivers/gpu/drm/nouveau/nouveau_reg.h | 16 ++--
drivers/gpu/drm/nouveau/nouveau_state.c | 8 ++
drivers/gpu/drm/nouveau/nv10_fb.c | 32 ++++++--
drivers/gpu/drm/nouveau/nv10_graph.c | 47 ++++++++---
drivers/gpu/drm/nouveau/nv20_graph.c | 80 +++++++++++--------
drivers/gpu/drm/nouveau/nv40_fb.c | 53 ++++++++-----
drivers/gpu/drm/nouveau/nv40_graph.c | 135 +++++++++++++++----------------
8 files changed, 247 insertions(+), 147 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 88b4c7b..2730497 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -276,8 +276,13 @@ struct nouveau_timer_engine {
};
struct nouveau_fb_engine {
+ int num_tiles;
+
int (*init)(struct drm_device *dev);
void (*takedown)(struct drm_device *dev);
+
+ void (*set_region_tiling)(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch);
};
struct nouveau_fifo_engine {
@@ -328,6 +333,9 @@ struct nouveau_pgraph_engine {
void (*destroy_context)(struct nouveau_channel *);
int (*load_context)(struct nouveau_channel *);
int (*unload_context)(struct drm_device *);
+
+ void (*set_region_tiling)(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch);
};
struct nouveau_engine {
@@ -876,10 +884,16 @@ extern void nv04_fb_takedown(struct drm_device *);
/* nv10_fb.c */
extern int nv10_fb_init(struct drm_device *);
extern void nv10_fb_takedown(struct drm_device *);
+extern void nv10_fb_set_region_tiling(struct drm_device *dev, int i,
+ uint32_t addr, uint32_t size,
+ uint32_t pitch);
/* nv40_fb.c */
extern int nv40_fb_init(struct drm_device *);
extern void nv40_fb_takedown(struct drm_device *);
+extern void nv40_fb_set_region_tiling(struct drm_device *dev, int i,
+ uint32_t addr, uint32_t size,
+ uint32_t pitch);
/* nv04_fifo.c */
extern int nv04_fifo_init(struct drm_device *);
@@ -938,6 +952,9 @@ extern void nv10_graph_destroy_context(struct nouveau_channel *);
extern int nv10_graph_load_context(struct nouveau_channel *);
extern int nv10_graph_unload_context(struct drm_device *);
extern void nv10_graph_context_switch(struct drm_device *);
+extern void nv10_graph_set_region_tiling(struct drm_device *dev, int i,
+ uint32_t addr, uint32_t size,
+ uint32_t pitch);
/* nv20_graph.c */
extern struct nouveau_pgraph_object_class nv20_graph_grclass[];
@@ -949,6 +966,9 @@ extern int nv20_graph_unload_context(struct drm_device *);
extern int nv20_graph_init(struct drm_device *);
extern void nv20_graph_takedown(struct drm_device *);
extern int nv30_graph_init(struct drm_device *);
+extern void nv20_graph_set_region_tiling(struct drm_device *dev, int i,
+ uint32_t addr, uint32_t size,
+ uint32_t pitch);
/* nv40_graph.c */
extern struct nouveau_pgraph_object_class nv40_graph_grclass[];
@@ -962,6 +982,9 @@ extern int nv40_graph_unload_context(struct drm_device *);
extern int nv40_grctx_init(struct drm_device *);
extern void nv40_grctx_fini(struct drm_device *);
extern void nv40_grctx_vals_load(struct drm_device *, struct nouveau_gpuobj *);
+extern void nv40_graph_set_region_tiling(struct drm_device *dev, int i,
+ uint32_t addr, uint32_t size,
+ uint32_t pitch);
/* nv50_graph.c */
extern struct nouveau_pgraph_object_class nv50_graph_grclass[];
diff --git a/drivers/gpu/drm/nouveau/nouveau_reg.h b/drivers/gpu/drm/nouveau/nouveau_reg.h
index fa1b0e7..251f1b3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_reg.h
+++ b/drivers/gpu/drm/nouveau/nouveau_reg.h
@@ -349,19 +349,19 @@
#define NV04_PGRAPH_BLEND 0x00400824
#define NV04_PGRAPH_STORED_FMT 0x00400830
#define NV04_PGRAPH_PATT_COLORRAM 0x00400900
-#define NV40_PGRAPH_TILE0(i) (0x00400900 + (i*16))
-#define NV40_PGRAPH_TLIMIT0(i) (0x00400904 + (i*16))
-#define NV40_PGRAPH_TSIZE0(i) (0x00400908 + (i*16))
-#define NV40_PGRAPH_TSTATUS0(i) (0x0040090C + (i*16))
+#define NV20_PGRAPH_TILE(i) (0x00400900 + (i*16))
+#define NV20_PGRAPH_TLIMIT(i) (0x00400904 + (i*16))
+#define NV20_PGRAPH_TSIZE(i) (0x00400908 + (i*16))
+#define NV20_PGRAPH_TSTATUS(i) (0x0040090C + (i*16))
#define NV10_PGRAPH_TILE(i) (0x00400B00 + (i*16))
#define NV10_PGRAPH_TLIMIT(i) (0x00400B04 + (i*16))
#define NV10_PGRAPH_TSIZE(i) (0x00400B08 + (i*16))
#define NV10_PGRAPH_TSTATUS(i) (0x00400B0C + (i*16))
#define NV04_PGRAPH_U_RAM 0x00400D00
-#define NV47_PGRAPH_TILE0(i) (0x00400D00 + (i*16))
-#define NV47_PGRAPH_TLIMIT0(i) (0x00400D04 + (i*16))
-#define NV47_PGRAPH_TSIZE0(i) (0x00400D08 + (i*16))
-#define NV47_PGRAPH_TSTATUS0(i) (0x00400D0C + (i*16))
+#define NV47_PGRAPH_TILE(i) (0x00400D00 + (i*16))
+#define NV47_PGRAPH_TLIMIT(i) (0x00400D04 + (i*16))
+#define NV47_PGRAPH_TSIZE(i) (0x00400D08 + (i*16))
+#define NV47_PGRAPH_TSTATUS(i) (0x00400D0C + (i*16))
#define NV04_PGRAPH_V_RAM 0x00400D40
#define NV04_PGRAPH_W_RAM 0x00400D80
#define NV10_PGRAPH_COMBINER0_IN_ALPHA 0x00400E40
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index 2ed41d3..4342867 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -100,6 +100,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->timer.takedown = nv04_timer_takedown;
engine->fb.init = nv10_fb_init;
engine->fb.takedown = nv10_fb_takedown;
+ engine->fb.set_region_tiling = nv10_fb_set_region_tiling;
engine->graph.grclass = nv10_graph_grclass;
engine->graph.init = nv10_graph_init;
engine->graph.takedown = nv10_graph_takedown;
@@ -109,6 +110,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->graph.fifo_access = nv04_graph_fifo_access;
engine->graph.load_context = nv10_graph_load_context;
engine->graph.unload_context = nv10_graph_unload_context;
+ engine->graph.set_region_tiling = nv10_graph_set_region_tiling;
engine->fifo.channels = 32;
engine->fifo.init = nv10_fifo_init;
engine->fifo.takedown = nouveau_stub_takedown;
@@ -139,6 +141,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->timer.takedown = nv04_timer_takedown;
engine->fb.init = nv10_fb_init;
engine->fb.takedown = nv10_fb_takedown;
+ engine->fb.set_region_tiling = nv10_fb_set_region_tiling;
engine->graph.grclass = nv20_graph_grclass;
engine->graph.init = nv20_graph_init;
engine->graph.takedown = nv20_graph_takedown;
@@ -148,6 +151,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->graph.fifo_access = nv04_graph_fifo_access;
engine->graph.load_context = nv20_graph_load_context;
engine->graph.unload_context = nv20_graph_unload_context;
+ engine->graph.set_region_tiling = nv20_graph_set_region_tiling;
engine->fifo.channels = 32;
engine->fifo.init = nv10_fifo_init;
engine->fifo.takedown = nouveau_stub_takedown;
@@ -178,6 +182,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->timer.takedown = nv04_timer_takedown;
engine->fb.init = nv10_fb_init;
engine->fb.takedown = nv10_fb_takedown;
+ engine->fb.set_region_tiling = nv10_fb_set_region_tiling;
engine->graph.grclass = nv30_graph_grclass;
engine->graph.init = nv30_graph_init;
engine->graph.takedown = nv20_graph_takedown;
@@ -187,6 +192,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->graph.destroy_context = nv20_graph_destroy_context;
engine->graph.load_context = nv20_graph_load_context;
engine->graph.unload_context = nv20_graph_unload_context;
+ engine->graph.set_region_tiling = nv20_graph_set_region_tiling;
engine->fifo.channels = 32;
engine->fifo.init = nv10_fifo_init;
engine->fifo.takedown = nouveau_stub_takedown;
@@ -218,6 +224,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->timer.takedown = nv04_timer_takedown;
engine->fb.init = nv40_fb_init;
engine->fb.takedown = nv40_fb_takedown;
+ engine->fb.set_region_tiling = nv40_fb_set_region_tiling;
engine->graph.grclass = nv40_graph_grclass;
engine->graph.init = nv40_graph_init;
engine->graph.takedown = nv40_graph_takedown;
@@ -227,6 +234,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->graph.destroy_context = nv40_graph_destroy_context;
engine->graph.load_context = nv40_graph_load_context;
engine->graph.unload_context = nv40_graph_unload_context;
+ engine->graph.set_region_tiling = nv40_graph_set_region_tiling;
engine->fifo.channels = 32;
engine->fifo.init = nv40_fifo_init;
engine->fifo.takedown = nouveau_stub_takedown;
diff --git a/drivers/gpu/drm/nouveau/nv10_fb.c b/drivers/gpu/drm/nouveau/nv10_fb.c
index 79e2d10..cc5cda4 100644
--- a/drivers/gpu/drm/nouveau/nv10_fb.c
+++ b/drivers/gpu/drm/nouveau/nv10_fb.c
@@ -3,17 +3,37 @@
#include "nouveau_drv.h"
#include "nouveau_drm.h"
+void
+nv10_fb_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ uint32_t limit = max(1u, addr + size) - 1;
+
+ if (pitch) {
+ if (dev_priv->card_type >= NV_20)
+ addr |= 1;
+ else
+ addr |= 1 << 31;
+ }
+
+ nv_wr32(dev, NV10_PFB_TLIMIT(i), limit);
+ nv_wr32(dev, NV10_PFB_TSIZE(i), pitch);
+ nv_wr32(dev, NV10_PFB_TILE(i), addr);
+}
+
int
nv10_fb_init(struct drm_device *dev)
{
- uint32_t fb_bar_size;
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
int i;
- fb_bar_size = drm_get_resource_len(dev, 0) - 1;
- for (i = 0; i < NV10_PFB_TILE__SIZE; i++) {
- nv_wr32(dev, NV10_PFB_TILE(i), 0);
- nv_wr32(dev, NV10_PFB_TLIMIT(i), fb_bar_size);
- }
+ pfb->num_tiles = NV10_PFB_TILE__SIZE;
+
+ /* Turn all the tiling regions off. */
+ for (i = 0; i < pfb->num_tiles; i++)
+ pfb->set_region_tiling(dev, i, 0, 0, 0);
return 0;
}
diff --git a/drivers/gpu/drm/nouveau/nv10_graph.c b/drivers/gpu/drm/nouveau/nv10_graph.c
index 6bf6804..2aeac8b 100644
--- a/drivers/gpu/drm/nouveau/nv10_graph.c
+++ b/drivers/gpu/drm/nouveau/nv10_graph.c
@@ -808,6 +808,39 @@ void nv10_graph_destroy_context(struct nouveau_channel *chan)
chan->pgraph_ctx = NULL;
}
+static void
+nv10_graph_write_tile(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch)
+{
+ uint32_t limit = max(1u, addr + size) - 1;
+
+ if (pitch)
+ addr |= 1 << 31;
+
+ nv_wr32(dev, NV10_PGRAPH_TLIMIT(i), limit);
+ nv_wr32(dev, NV10_PGRAPH_TSIZE(i), pitch);
+ nv_wr32(dev, NV10_PGRAPH_TILE(i), addr);
+}
+
+void
+nv10_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
+ struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
+
+ pfifo->reassign(dev, false);
+ pgraph->fifo_access(dev, false);
+
+ nouveau_wait_for_idle(dev);
+
+ nv10_graph_write_tile(dev, i, addr, size, pitch);
+
+ pgraph->fifo_access(dev, true);
+ pfifo->reassign(dev, true);
+}
+
int nv10_graph_init(struct drm_device *dev)
{
struct drm_nouveau_private *dev_priv = dev->dev_private;
@@ -836,17 +869,9 @@ int nv10_graph_init(struct drm_device *dev)
} else
nv_wr32(dev, NV10_PGRAPH_DEBUG_4, 0x00000000);
- /* copy tile info from PFB */
- for (i = 0; i < NV10_PFB_TILE__SIZE; i++) {
- nv_wr32(dev, NV10_PGRAPH_TILE(i),
- nv_rd32(dev, NV10_PFB_TILE(i)));
- nv_wr32(dev, NV10_PGRAPH_TLIMIT(i),
- nv_rd32(dev, NV10_PFB_TLIMIT(i)));
- nv_wr32(dev, NV10_PGRAPH_TSIZE(i),
- nv_rd32(dev, NV10_PFB_TSIZE(i)));
- nv_wr32(dev, NV10_PGRAPH_TSTATUS(i),
- nv_rd32(dev, NV10_PFB_TSTATUS(i)));
- }
+ /* Turn all the tiling regions off. */
+ for (i = 0; i < NV10_PFB_TILE__SIZE; i++)
+ nv10_graph_write_tile(dev, i, 0, 0, 0);
nv_wr32(dev, NV10_PGRAPH_CTX_SWITCH1, 0x00000000);
nv_wr32(dev, NV10_PGRAPH_CTX_SWITCH2, 0x00000000);
diff --git a/drivers/gpu/drm/nouveau/nv20_graph.c b/drivers/gpu/drm/nouveau/nv20_graph.c
index 18ba74f..55d1a8e 100644
--- a/drivers/gpu/drm/nouveau/nv20_graph.c
+++ b/drivers/gpu/drm/nouveau/nv20_graph.c
@@ -514,6 +514,46 @@ nv20_graph_rdi(struct drm_device *dev)
nouveau_wait_for_idle(dev);
}
+static void
+nv20_graph_write_tile(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch)
+{
+ uint32_t limit = max(1u, addr + size) - 1;
+
+ if (pitch)
+ addr |= 1;
+
+ nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit);
+ nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch);
+ nv_wr32(dev, NV20_PGRAPH_TILE(i), addr);
+
+ nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0030 + 4 * i);
+ nv_wr32(dev, NV10_PGRAPH_RDI_DATA, limit);
+ nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0050 + 4 * i);
+ nv_wr32(dev, NV10_PGRAPH_RDI_DATA, pitch);
+ nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + 4 * i);
+ nv_wr32(dev, NV10_PGRAPH_RDI_DATA, addr);
+}
+
+void
+nv20_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
+ struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
+
+ pfifo->reassign(dev, false);
+ pgraph->fifo_access(dev, false);
+
+ nouveau_wait_for_idle(dev);
+
+ nv20_graph_write_tile(dev, i, addr, size, pitch);
+
+ pgraph->fifo_access(dev, true);
+ pfifo->reassign(dev, true);
+}
+
int
nv20_graph_init(struct drm_device *dev)
{
@@ -572,27 +612,10 @@ nv20_graph_init(struct drm_device *dev)
nv_wr32(dev, NV10_PGRAPH_RDI_DATA , 0x00000030);
}
- /* copy tile info from PFB */
- for (i = 0; i < NV10_PFB_TILE__SIZE; i++) {
- nv_wr32(dev, 0x00400904 + i * 0x10,
- nv_rd32(dev, NV10_PFB_TLIMIT(i)));
- /* which is NV40_PGRAPH_TLIMIT0(i) ?? */
- nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0030 + i * 4);
- nv_wr32(dev, NV10_PGRAPH_RDI_DATA,
- nv_rd32(dev, NV10_PFB_TLIMIT(i)));
- nv_wr32(dev, 0x00400908 + i * 0x10,
- nv_rd32(dev, NV10_PFB_TSIZE(i)));
- /* which is NV40_PGRAPH_TSIZE0(i) ?? */
- nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0050 + i * 4);
- nv_wr32(dev, NV10_PGRAPH_RDI_DATA,
- nv_rd32(dev, NV10_PFB_TSIZE(i)));
- nv_wr32(dev, 0x00400900 + i * 0x10,
- nv_rd32(dev, NV10_PFB_TILE(i)));
- /* which is NV40_PGRAPH_TILE0(i) ?? */
- nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + i * 4);
- nv_wr32(dev, NV10_PGRAPH_RDI_DATA,
- nv_rd32(dev, NV10_PFB_TILE(i)));
- }
+ /* Turn all the tiling regions off. */
+ for (i = 0; i < NV10_PFB_TILE__SIZE; i++)
+ nv20_graph_write_tile(dev, i, 0, 0, 0);
+
for (i = 0; i < 8; i++) {
nv_wr32(dev, 0x400980 + i * 4, nv_rd32(dev, 0x100300 + i * 4));
nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0090 + i * 4);
@@ -704,18 +727,9 @@ nv30_graph_init(struct drm_device *dev)
nv_wr32(dev, 0x4000c0, 0x00000016);
- /* copy tile info from PFB */
- for (i = 0; i < NV10_PFB_TILE__SIZE; i++) {
- nv_wr32(dev, 0x00400904 + i * 0x10,
- nv_rd32(dev, NV10_PFB_TLIMIT(i)));
- /* which is NV40_PGRAPH_TLIMIT0(i) ?? */
- nv_wr32(dev, 0x00400908 + i * 0x10,
- nv_rd32(dev, NV10_PFB_TSIZE(i)));
- /* which is NV40_PGRAPH_TSIZE0(i) ?? */
- nv_wr32(dev, 0x00400900 + i * 0x10,
- nv_rd32(dev, NV10_PFB_TILE(i)));
- /* which is NV40_PGRAPH_TILE0(i) ?? */
- }
+ /* Turn all the tiling regions off. */
+ for (i = 0; i < NV10_PFB_TILE__SIZE; i++)
+ nv20_graph_write_tile(dev, i, 0, 0, 0);
nv_wr32(dev, NV10_PGRAPH_CTX_CONTROL, 0x10000100);
nv_wr32(dev, NV10_PGRAPH_STATE , 0xFFFFFFFF);
diff --git a/drivers/gpu/drm/nouveau/nv40_fb.c b/drivers/gpu/drm/nouveau/nv40_fb.c
index ca1d271..3cd07d8 100644
--- a/drivers/gpu/drm/nouveau/nv40_fb.c
+++ b/drivers/gpu/drm/nouveau/nv40_fb.c
@@ -3,12 +3,37 @@
#include "nouveau_drv.h"
#include "nouveau_drm.h"
+void
+nv40_fb_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ uint32_t limit = max(1u, addr + size) - 1;
+
+ if (pitch)
+ addr |= 1;
+
+ switch (dev_priv->chipset) {
+ case 0x40:
+ nv_wr32(dev, NV10_PFB_TLIMIT(i), limit);
+ nv_wr32(dev, NV10_PFB_TSIZE(i), pitch);
+ nv_wr32(dev, NV10_PFB_TILE(i), addr);
+ break;
+
+ default:
+ nv_wr32(dev, NV40_PFB_TLIMIT(i), limit);
+ nv_wr32(dev, NV40_PFB_TSIZE(i), pitch);
+ nv_wr32(dev, NV40_PFB_TILE(i), addr);
+ break;
+ }
+}
+
int
nv40_fb_init(struct drm_device *dev)
{
struct drm_nouveau_private *dev_priv = dev->dev_private;
- uint32_t fb_bar_size, tmp;
- int num_tiles;
+ struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
+ uint32_t tmp;
int i;
/* This is strictly a NV4x register (don't know about NV5x). */
@@ -23,35 +48,23 @@ nv40_fb_init(struct drm_device *dev)
case 0x45:
tmp = nv_rd32(dev, NV10_PFB_CLOSE_PAGE2);
nv_wr32(dev, NV10_PFB_CLOSE_PAGE2, tmp & ~(1 << 15));
- num_tiles = NV10_PFB_TILE__SIZE;
+ pfb->num_tiles = NV10_PFB_TILE__SIZE;
break;
case 0x46: /* G72 */
case 0x47: /* G70 */
case 0x49: /* G71 */
case 0x4b: /* G73 */
case 0x4c: /* C51 (G7X version) */
- num_tiles = NV40_PFB_TILE__SIZE_1;
+ pfb->num_tiles = NV40_PFB_TILE__SIZE_1;
break;
default:
- num_tiles = NV40_PFB_TILE__SIZE_0;
+ pfb->num_tiles = NV40_PFB_TILE__SIZE_0;
break;
}
- fb_bar_size = drm_get_resource_len(dev, 0) - 1;
- switch (dev_priv->chipset) {
- case 0x40:
- for (i = 0; i < num_tiles; i++) {
- nv_wr32(dev, NV10_PFB_TILE(i), 0);
- nv_wr32(dev, NV10_PFB_TLIMIT(i), fb_bar_size);
- }
- break;
- default:
- for (i = 0; i < num_tiles; i++) {
- nv_wr32(dev, NV40_PFB_TILE(i), 0);
- nv_wr32(dev, NV40_PFB_TLIMIT(i), fb_bar_size);
- }
- break;
- }
+ /* Turn all the tiling regions off. */
+ for (i = 0; i < pfb->num_tiles; i++)
+ pfb->set_region_tiling(dev, i, 0, 0, 0);
return 0;
}
diff --git a/drivers/gpu/drm/nouveau/nv40_graph.c b/drivers/gpu/drm/nouveau/nv40_graph.c
index d3e0a2a..2435d49 100644
--- a/drivers/gpu/drm/nouveau/nv40_graph.c
+++ b/drivers/gpu/drm/nouveau/nv40_graph.c
@@ -333,6 +333,67 @@ nv40_grctx_vals_load(struct drm_device *dev, struct nouveau_gpuobj *ctx)
nv_wo32(dev, ctx, cv->data[i].offset, cv->data[i].value);
}
+static void
+nv40_graph_write_tile(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ uint32_t limit = max(1u, addr + size) - 1;
+
+ if (pitch)
+ addr |= 1;
+
+ switch (dev_priv->chipset) {
+ case 0x44:
+ case 0x4a:
+ case 0x4e:
+ nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch);
+ nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit);
+ nv_wr32(dev, NV20_PGRAPH_TILE(i), addr);
+ break;
+
+ case 0x46:
+ case 0x47:
+ case 0x49:
+ case 0x4b:
+ nv_wr32(dev, NV47_PGRAPH_TSIZE(i), pitch);
+ nv_wr32(dev, NV47_PGRAPH_TLIMIT(i), limit);
+ nv_wr32(dev, NV47_PGRAPH_TILE(i), addr);
+ nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), pitch);
+ nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), limit);
+ nv_wr32(dev, NV40_PGRAPH_TILE1(i), addr);
+ break;
+
+ default:
+ nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch);
+ nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit);
+ nv_wr32(dev, NV20_PGRAPH_TILE(i), addr);
+ nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), pitch);
+ nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), limit);
+ nv_wr32(dev, NV40_PGRAPH_TILE1(i), addr);
+ break;
+ }
+}
+
+void
+nv40_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
+ struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
+
+ pfifo->reassign(dev, false);
+ pgraph->fifo_access(dev, false);
+
+ nouveau_wait_for_idle(dev);
+
+ nv40_graph_write_tile(dev, i, addr, size, pitch);
+
+ pgraph->fifo_access(dev, true);
+ pfifo->reassign(dev, true);
+}
+
/*
* G70 0x47
* G71 0x49
@@ -347,7 +408,8 @@ nv40_graph_init(struct drm_device *dev)
{
struct drm_nouveau_private *dev_priv =
(struct drm_nouveau_private *)dev->dev_private;
- uint32_t vramsz, tmp;
+ struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
+ uint32_t vramsz;
int i, j;
nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) &
@@ -425,74 +487,9 @@ nv40_graph_init(struct drm_device *dev)
nv_wr32(dev, 0x400b38, 0x2ffff800);
nv_wr32(dev, 0x400b3c, 0x00006000);
- /* copy tile info from PFB */
- switch (dev_priv->chipset) {
- case 0x40: /* vanilla NV40 */
- for (i = 0; i < NV10_PFB_TILE__SIZE; i++) {
- tmp = nv_rd32(dev, NV10_PFB_TILE(i));
- nv_wr32(dev, NV40_PGRAPH_TILE0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TILE1(i), tmp);
- tmp = nv_rd32(dev, NV10_PFB_TLIMIT(i));
- nv_wr32(dev, NV40_PGRAPH_TLIMIT0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tmp);
- tmp = nv_rd32(dev, NV10_PFB_TSIZE(i));
- nv_wr32(dev, NV40_PGRAPH_TSIZE0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), tmp);
- tmp = nv_rd32(dev, NV10_PFB_TSTATUS(i));
- nv_wr32(dev, NV40_PGRAPH_TSTATUS0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TSTATUS1(i), tmp);
- }
- break;
- case 0x44:
- case 0x4a:
- case 0x4e: /* NV44-based cores don't have 0x406900? */
- for (i = 0; i < NV40_PFB_TILE__SIZE_0; i++) {
- tmp = nv_rd32(dev, NV40_PFB_TILE(i));
- nv_wr32(dev, NV40_PGRAPH_TILE0(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TLIMIT(i));
- nv_wr32(dev, NV40_PGRAPH_TLIMIT0(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TSIZE(i));
- nv_wr32(dev, NV40_PGRAPH_TSIZE0(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TSTATUS(i));
- nv_wr32(dev, NV40_PGRAPH_TSTATUS0(i), tmp);
- }
- break;
- case 0x46:
- case 0x47:
- case 0x49:
- case 0x4b: /* G7X-based cores */
- for (i = 0; i < NV40_PFB_TILE__SIZE_1; i++) {
- tmp = nv_rd32(dev, NV40_PFB_TILE(i));
- nv_wr32(dev, NV47_PGRAPH_TILE0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TILE1(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TLIMIT(i));
- nv_wr32(dev, NV47_PGRAPH_TLIMIT0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TSIZE(i));
- nv_wr32(dev, NV47_PGRAPH_TSIZE0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TSTATUS(i));
- nv_wr32(dev, NV47_PGRAPH_TSTATUS0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TSTATUS1(i), tmp);
- }
- break;
- default: /* everything else */
- for (i = 0; i < NV40_PFB_TILE__SIZE_0; i++) {
- tmp = nv_rd32(dev, NV40_PFB_TILE(i));
- nv_wr32(dev, NV40_PGRAPH_TILE0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TILE1(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TLIMIT(i));
- nv_wr32(dev, NV40_PGRAPH_TLIMIT0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TSIZE(i));
- nv_wr32(dev, NV40_PGRAPH_TSIZE0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TSTATUS(i));
- nv_wr32(dev, NV40_PGRAPH_TSTATUS0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TSTATUS1(i), tmp);
- }
- break;
- }
+ /* Turn all the tiling regions off. */
+ for (i = 0; i < pfb->num_tiles; i++)
+ nv40_graph_write_tile(dev, i, 0, 0, 0);
/* begin RAM config */
vramsz = drm_get_resource_len(dev, 0) - 1;
--
1.6.4.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/3] drm/nouveau: Make the MM aware of pre-G80 tiling.
[not found] ` <1260556404-13389-1-git-send-email-currojerez-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org>
@ 2009-12-11 18:33 ` Francisco Jerez
[not found] ` <1260556404-13389-2-git-send-email-currojerez-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org>
2009-12-11 18:33 ` [PATCH 3/3] drm/nouveau: Fix up buffer eviction, and evict them to GART, if possible Francisco Jerez
` (2 subsequent siblings)
3 siblings, 1 reply; 7+ messages in thread
From: Francisco Jerez @ 2009-12-11 18:33 UTC (permalink / raw)
To: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Signed-off-by: Francisco Jerez <currojerez-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org>
---
drivers/gpu/drm/nouveau/nouveau_bo.c | 221 +++++++++++++++++++++++----------
drivers/gpu/drm/nouveau/nouveau_drv.h | 22 ++++
drivers/gpu/drm/nouveau/nouveau_mem.c | 80 ++++++++++++
3 files changed, 258 insertions(+), 65 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 320a14b..4616bae 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -37,6 +37,7 @@ static void
nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
{
struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
+ struct drm_device *dev = dev_priv->dev;
struct nouveau_bo *nvbo = nouveau_bo(bo);
ttm_bo_kunmap(&nvbo->kmap);
@@ -44,12 +45,83 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
if (unlikely(nvbo->gem))
DRM_ERROR("bo %p still attached to GEM object\n", bo);
+ if (nvbo->tile)
+ nv10_mem_expire_tiling(dev, nvbo->tile, NULL);
+
spin_lock(&dev_priv->ttm.bo_list_lock);
list_del(&nvbo->head);
spin_unlock(&dev_priv->ttm.bo_list_lock);
kfree(nvbo);
}
+static void
+nouveau_bo_fixup_align(struct drm_device *dev,
+ uint32_t tile_mode, uint32_t tile_flags,
+ int *align, int *size)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+
+ /*
+ * Some of the tile_flags have a periodic structure of N*4096 bytes,
+ * align to to that as well as the page size. Overallocate memory to
+ * avoid corruption of other buffer objects.
+ */
+ if (dev_priv->card_type == NV_50) {
+ switch (tile_flags) {
+ case 0x1800:
+ case 0x2800:
+ case 0x4800:
+ case 0x7a00:
+ if (dev_priv->chipset >= 0xA0) {
+ /* This is based on high end cards with 448 bits
+ * memory bus, could be different elsewhere.*/
+ *size += 6 * 28672;
+ /* 8 * 28672 is the actual alignment requirement
+ * but we must also align to page size. */
+ *align = 2 * 8 * 28672;
+ } else if (dev_priv->chipset >= 0x90) {
+ *size += 3 * 16384;
+ *align = 12 * 16834;
+ } else {
+ *size += 3 * 8192;
+ /* 12 * 8192 is the actual alignment requirement
+ * but we must also align to page size. */
+ *align = 2 * 12 * 8192;
+ }
+ break;
+ default:
+ break;
+ }
+
+ } else {
+ if (tile_mode) {
+ if (dev_priv->chipset >= 0x40) {
+ *align = 65536;
+ *size = roundup(*size, 64 * tile_mode);
+
+ } else if (dev_priv->chipset >= 0x30) {
+ *align = 32768;
+ *size = roundup(*size, 32 * tile_mode);
+
+ } else if (dev_priv->chipset >= 0x20) {
+ *align = 16384;
+ *size = roundup(*size, 32 * tile_mode);
+
+ } else if (dev_priv->chipset >= 0x10) {
+ *align = 4096;
+ *size = roundup(*size, 16 * tile_mode);
+ }
+ }
+ }
+
+ *size = ALIGN(*size, PAGE_SIZE);
+
+ if (dev_priv->card_type == NV_50) {
+ *size = ALIGN(*size, 65536);
+ *align = max(65536, *align);
+ }
+}
+
int
nouveau_bo_new(struct drm_device *dev, struct nouveau_channel *chan,
int size, int align, uint32_t flags, uint32_t tile_mode,
@@ -70,46 +142,9 @@ nouveau_bo_new(struct drm_device *dev, struct nouveau_channel *chan,
nvbo->tile_mode = tile_mode;
nvbo->tile_flags = tile_flags;
- /*
- * Some of the tile_flags have a periodic structure of N*4096 bytes,
- * align to to that as well as the page size. Overallocate memory to
- * avoid corruption of other buffer objects.
- */
- switch (tile_flags) {
- case 0x1800:
- case 0x2800:
- case 0x4800:
- case 0x7a00:
- if (dev_priv->chipset >= 0xA0) {
- /* This is based on high end cards with 448 bits
- * memory bus, could be different elsewhere.*/
- size += 6 * 28672;
- /* 8 * 28672 is the actual alignment requirement,
- * but we must also align to page size. */
- align = 2 * 8 * 28672;
- } else if (dev_priv->chipset >= 0x90) {
- size += 3 * 16384;
- align = 12 * 16834;
- } else {
- size += 3 * 8192;
- /* 12 * 8192 is the actual alignment requirement,
- * but we must also align to page size. */
- align = 2 * 12 * 8192;
- }
- break;
- default:
- break;
- }
-
+ nouveau_bo_fixup_align(dev, tile_mode, tile_flags, &align, &size);
align >>= PAGE_SHIFT;
- size = (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
- if (dev_priv->card_type == NV_50) {
- size = (size + 65535) & ~65535;
- if (align < (65536 / PAGE_SIZE))
- align = (65536 / PAGE_SIZE);
- }
-
if (flags & TTM_PL_FLAG_VRAM)
nvbo->placements[n++] = TTM_PL_FLAG_VRAM | TTM_PL_MASK_CACHING;
if (flags & TTM_PL_FLAG_TT)
@@ -408,6 +443,7 @@ nouveau_bo_evict_flags(struct ttm_buffer_object *bo, struct ttm_placement *pl)
/* GPU-assisted copy using NV_MEMORY_TO_MEMORY_FORMAT, can access
* TTM_PL_{VRAM,TT} directly.
*/
+
static int
nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan,
struct nouveau_bo *nvbo, bool evict, bool no_wait,
@@ -442,11 +478,12 @@ nouveau_bo_mem_ctxdma(struct nouveau_bo *nvbo, struct nouveau_channel *chan,
}
static int
-nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, int no_wait,
- struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
+ int no_wait, struct ttm_mem_reg *new_mem)
{
struct nouveau_bo *nvbo = nouveau_bo(bo);
struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
+ struct ttm_mem_reg *old_mem = &bo->mem;
struct nouveau_channel *chan;
uint64_t src_offset, dst_offset;
uint32_t page_count;
@@ -549,7 +586,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
if (ret)
goto out;
- ret = nouveau_bo_move_m2mf(bo, true, no_wait, &bo->mem, &tmp_mem);
+ ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait, &tmp_mem);
if (ret)
goto out;
@@ -587,7 +624,7 @@ nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr,
if (ret)
goto out;
- ret = nouveau_bo_move_m2mf(bo, true, no_wait, &bo->mem, new_mem);
+ ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait, new_mem);
if (ret)
goto out;
@@ -602,51 +639,105 @@ out:
}
static int
-nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
- bool no_wait, struct ttm_mem_reg *new_mem)
+nouveau_bo_vm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_mem,
+ struct nouveau_tile_reg **new_tile)
{
struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
- struct nouveau_bo *nvbo = nouveau_bo(bo);
struct drm_device *dev = dev_priv->dev;
- struct ttm_mem_reg *old_mem = &bo->mem;
+ struct nouveau_bo *nvbo = nouveau_bo(bo);
+ uint64_t offset;
int ret;
- if (dev_priv->card_type == NV_50 && new_mem->mem_type == TTM_PL_VRAM &&
- !nvbo->no_vm) {
- uint64_t offset = new_mem->mm_node->start << PAGE_SHIFT;
+ if (nvbo->no_vm || new_mem->mem_type != TTM_PL_VRAM) {
+ /* Nothing to do. */
+ *new_tile = NULL;
+ return 0;
+ }
+
+ offset = new_mem->mm_node->start << PAGE_SHIFT;
+ if (dev_priv->card_type == NV_50) {
ret = nv50_mem_vm_bind_linear(dev,
offset + dev_priv->vm_vram_base,
new_mem->size, nvbo->tile_flags,
offset);
if (ret)
return ret;
+
+ } else if (dev_priv->card_type >= NV_10) {
+ *new_tile = nv10_mem_set_tiling(dev, offset, new_mem->size,
+ nvbo->tile_mode);
}
- if (dev_priv->init_state != NOUVEAU_CARD_INIT_DONE)
- return ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
+ return 0;
+}
+static void
+nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
+ struct nouveau_tile_reg *new_tile,
+ struct nouveau_tile_reg **old_tile)
+{
+ struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
+ struct drm_device *dev = dev_priv->dev;
+
+ if (dev_priv->card_type >= NV_10 &&
+ dev_priv->card_type < NV_50) {
+ if (*old_tile)
+ nv10_mem_expire_tiling(dev, *old_tile, bo->sync_obj);
+
+ *old_tile = new_tile;
+ }
+}
+
+static int
+nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
+ bool no_wait, struct ttm_mem_reg *new_mem)
+{
+ struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
+ struct nouveau_bo *nvbo = nouveau_bo(bo);
+ struct ttm_mem_reg *old_mem = &bo->mem;
+ struct nouveau_tile_reg *new_tile = NULL;
+ int ret = 0;
+
+ ret = nouveau_bo_vm_bind(bo, new_mem, &new_tile);
+ if (ret)
+ return ret;
+
+ /* Software copy if the card isn't up and running yet. */
+ if (dev_priv->init_state != NOUVEAU_CARD_INIT_DONE) {
+ ret = ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
+ goto out;
+ }
+
+ /* Fake bo copy. */
if (old_mem->mem_type == TTM_PL_SYSTEM && !bo->ttm) {
BUG_ON(bo->mem.mm_node != NULL);
bo->mem = *new_mem;
new_mem->mm_node = NULL;
- return 0;
+ goto out;
}
- if (new_mem->mem_type == TTM_PL_SYSTEM) {
- if (old_mem->mem_type == TTM_PL_SYSTEM)
- return ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
- if (nouveau_bo_move_flipd(bo, evict, intr, no_wait, new_mem))
- return ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
- } else if (old_mem->mem_type == TTM_PL_SYSTEM) {
- if (nouveau_bo_move_flips(bo, evict, intr, no_wait, new_mem))
- return ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
- } else {
- if (nouveau_bo_move_m2mf(bo, evict, no_wait, old_mem, new_mem))
- return ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
- }
+ /* Hardware assisted copy. */
+ if (new_mem->mem_type == TTM_PL_SYSTEM)
+ ret = nouveau_bo_move_flipd(bo, evict, intr, no_wait, new_mem);
+ else if (old_mem->mem_type == TTM_PL_SYSTEM)
+ ret = nouveau_bo_move_flips(bo, evict, intr, no_wait, new_mem);
+ else
+ ret = nouveau_bo_move_m2mf(bo, evict, intr, no_wait, new_mem);
- return 0;
+ if (!ret)
+ goto out;
+
+ /* Fallback to software copy. */
+ ret = ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
+
+out:
+ if (ret)
+ nouveau_bo_vm_cleanup(bo, NULL, &new_tile);
+ else
+ nouveau_bo_vm_cleanup(bo, new_tile, &nvbo->tile);
+
+ return ret;
}
static int
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 2730497..7a2a322 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -58,11 +58,19 @@ struct nouveau_fpriv {
#define MAX_NUM_DCB_ENTRIES 16
#define NOUVEAU_MAX_CHANNEL_NR 128
+#define NOUVEAU_MAX_TILE_NR 15
#define NV50_VM_MAX_VRAM (2*1024*1024*1024ULL)
#define NV50_VM_BLOCK (512*1024*1024ULL)
#define NV50_VM_VRAM_NR (NV50_VM_MAX_VRAM / NV50_VM_BLOCK)
+struct nouveau_tile_reg {
+ struct nouveau_fence *fence;
+ uint32_t addr;
+ uint32_t size;
+ bool used;
+};
+
struct nouveau_bo {
struct ttm_buffer_object bo;
struct ttm_placement placement;
@@ -82,6 +90,7 @@ struct nouveau_bo {
uint32_t tile_mode;
uint32_t tile_flags;
+ struct nouveau_tile_reg *tile;
struct drm_gem_object *gem;
struct drm_file *cpu_filp;
@@ -554,6 +563,12 @@ struct drm_nouveau_private {
unsigned long sg_handle;
} gart_info;
+ /* nv10-nv40 tiling regions */
+ struct {
+ struct nouveau_tile_reg reg[NOUVEAU_MAX_TILE_NR];
+ spinlock_t lock;
+ } tile;
+
/* G8x/G9x virtual address space */
uint64_t vm_gart_base;
uint64_t vm_gart_size;
@@ -690,6 +705,13 @@ extern void nouveau_mem_release(struct drm_file *, struct mem_block *heap);
extern int nouveau_mem_init(struct drm_device *);
extern int nouveau_mem_init_agp(struct drm_device *);
extern void nouveau_mem_close(struct drm_device *);
+extern struct nouveau_tile_reg *nv10_mem_set_tiling(struct drm_device *dev,
+ uint32_t addr,
+ uint32_t size,
+ uint32_t pitch);
+extern void nv10_mem_expire_tiling(struct drm_device *dev,
+ struct nouveau_tile_reg *tile,
+ struct nouveau_fence *fence);
extern int nv50_mem_vm_bind_linear(struct drm_device *, uint64_t virt,
uint32_t size, uint32_t flags,
uint64_t phys);
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index 0275571..6056f32 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -192,6 +192,85 @@ void nouveau_mem_release(struct drm_file *file_priv, struct mem_block *heap)
}
/*
+ * NV10-NV40 tiling helpers
+ */
+
+static void
+nv10_mem_set_tiling_locked(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
+ struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
+ struct nouveau_tile_reg *tile = &dev_priv->tile.reg[i];
+
+ tile->used = true;
+
+ spin_unlock(&dev_priv->tile.lock);
+
+ pgraph->set_region_tiling(dev, i, addr, size, pitch);
+ pfb->set_region_tiling(dev, i, addr, size, pitch);
+
+ spin_lock(&dev_priv->tile.lock);
+
+ tile->addr = addr;
+ tile->size = size;
+ tile->used = !!pitch;
+ nouveau_fence_unref((void **)&tile->fence);
+}
+
+struct nouveau_tile_reg *
+nv10_mem_set_tiling(struct drm_device *dev, uint32_t addr, uint32_t size,
+ uint32_t pitch)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
+ struct nouveau_tile_reg *tile = dev_priv->tile.reg, *found = NULL;
+ int i;
+
+ spin_lock(&dev_priv->tile.lock);
+
+ for (i = 0; i < pfb->num_tiles; i++) {
+ if (tile[i].used)
+ /* Tile region in use. */
+ continue;
+
+ if (tile[i].fence &&
+ !nouveau_fence_signalled(tile[i].fence, NULL))
+ /* Pending tile region. */
+ continue;
+
+ if (max(tile[i].addr, addr) <
+ min(tile[i].addr + tile[i].size, addr + size))
+ /* Kill an intersecting tile region. */
+ nv10_mem_set_tiling_locked(dev, i, 0, 0, 0);
+
+ if (pitch && !found) {
+ /* Free tile region. */
+ nv10_mem_set_tiling_locked(dev, i, addr, size, pitch);
+ found = &tile[i];
+ }
+ }
+
+ spin_unlock(&dev_priv->tile.lock);
+
+ return found;
+}
+
+void
+nv10_mem_expire_tiling(struct drm_device *dev, struct nouveau_tile_reg *tile,
+ struct nouveau_fence *fence)
+{
+ if (fence) {
+ /* Mark it as pending. */
+ tile->fence = fence;
+ nouveau_fence_ref(fence);
+ }
+
+ tile->used = false;
+}
+
+/*
* NV50 VM helpers
*/
int
@@ -509,6 +588,7 @@ nouveau_mem_init(struct drm_device *dev)
INIT_LIST_HEAD(&dev_priv->ttm.bo_list);
spin_lock_init(&dev_priv->ttm.bo_list_lock);
+ spin_lock_init(&dev_priv->tile.lock);
dev_priv->fb_available_size = nouveau_mem_fb_amount(dev);
--
1.6.4.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 3/3] drm/nouveau: Fix up buffer eviction, and evict them to GART, if possible.
[not found] ` <1260556404-13389-1-git-send-email-currojerez-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org>
2009-12-11 18:33 ` [PATCH 2/3] drm/nouveau: Make the MM aware of pre-G80 tiling Francisco Jerez
@ 2009-12-11 18:33 ` Francisco Jerez
2009-12-11 20:18 ` [PATCH 1/3] drm/nouveau: Pre-G80 tiling support Jimmy Rentz
2009-12-14 3:03 ` [PATCHv2 " Francisco Jerez
3 siblings, 0 replies; 7+ messages in thread
From: Francisco Jerez @ 2009-12-11 18:33 UTC (permalink / raw)
To: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Signed-off-by: Francisco Jerez <currojerez-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org>
---
drivers/gpu/drm/nouveau/nouveau_bo.c | 6 ++++++
1 files changed, 6 insertions(+), 0 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 4616bae..af0a1f5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -433,10 +433,16 @@ nouveau_bo_evict_flags(struct ttm_buffer_object *bo, struct ttm_placement *pl)
struct nouveau_bo *nvbo = nouveau_bo(bo);
switch (bo->mem.mem_type) {
+ case TTM_PL_VRAM:
+ nouveau_bo_placement_set(nvbo, TTM_PL_FLAG_TT |
+ TTM_PL_FLAG_SYSTEM);
+ break;
default:
nouveau_bo_placement_set(nvbo, TTM_PL_FLAG_SYSTEM);
break;
}
+
+ *pl = nvbo->placement;
}
--
1.6.4.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 1/3] drm/nouveau: Pre-G80 tiling support.
[not found] ` <1260556404-13389-1-git-send-email-currojerez-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org>
2009-12-11 18:33 ` [PATCH 2/3] drm/nouveau: Make the MM aware of pre-G80 tiling Francisco Jerez
2009-12-11 18:33 ` [PATCH 3/3] drm/nouveau: Fix up buffer eviction, and evict them to GART, if possible Francisco Jerez
@ 2009-12-11 20:18 ` Jimmy Rentz
[not found] ` <87tyvxnrtp.fsf@riseup.net>
2009-12-14 3:03 ` [PATCHv2 " Francisco Jerez
3 siblings, 1 reply; 7+ messages in thread
From: Jimmy Rentz @ 2009-12-11 20:18 UTC (permalink / raw)
To: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
On Fri, 11 Dec 2009 19:33:22 +0100
Francisco Jerez <currojerez-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org> wrote:
> Signed-off-by: Francisco Jerez <currojerez-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org>
> ---
> drivers/gpu/drm/nouveau/nouveau_drv.h | 23 +++++
> drivers/gpu/drm/nouveau/nouveau_reg.h | 16 ++--
> drivers/gpu/drm/nouveau/nouveau_state.c | 8 ++
> drivers/gpu/drm/nouveau/nv10_fb.c | 32 ++++++--
> drivers/gpu/drm/nouveau/nv10_graph.c | 47 ++++++++---
> drivers/gpu/drm/nouveau/nv20_graph.c | 80 +++++++++++--------
> drivers/gpu/drm/nouveau/nv40_fb.c | 53 ++++++++-----
> drivers/gpu/drm/nouveau/nv40_graph.c | 135
> +++++++++++++++---------------- 8 files changed, 247 insertions(+),
> 147 deletions(-)
>
> diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h
> b/drivers/gpu/drm/nouveau/nouveau_drv.h index 88b4c7b..2730497 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_drv.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
> @@ -276,8 +276,13 @@ struct nouveau_timer_engine {
> };
>
> struct nouveau_fb_engine {
> + int num_tiles;
> +
> int (*init)(struct drm_device *dev);
> void (*takedown)(struct drm_device *dev);
> +
> + void (*set_region_tiling)(struct drm_device *dev, int i,
> uint32_t addr,
> + uint32_t size, uint32_t pitch);
> };
>
> struct nouveau_fifo_engine {
> @@ -328,6 +333,9 @@ struct nouveau_pgraph_engine {
> void (*destroy_context)(struct nouveau_channel *);
> int (*load_context)(struct nouveau_channel *);
> int (*unload_context)(struct drm_device *);
> +
> + void (*set_region_tiling)(struct drm_device *dev, int i,
> uint32_t addr,
> + uint32_t size, uint32_t pitch);
> };
>
> struct nouveau_engine {
> @@ -876,10 +884,16 @@ extern void nv04_fb_takedown(struct drm_device
> *); /* nv10_fb.c */
> extern int nv10_fb_init(struct drm_device *);
> extern void nv10_fb_takedown(struct drm_device *);
> +extern void nv10_fb_set_region_tiling(struct drm_device *dev, int i,
> + uint32_t addr, uint32_t size,
> + uint32_t pitch);
>
> /* nv40_fb.c */
> extern int nv40_fb_init(struct drm_device *);
> extern void nv40_fb_takedown(struct drm_device *);
> +extern void nv40_fb_set_region_tiling(struct drm_device *dev, int i,
> + uint32_t addr, uint32_t size,
> + uint32_t pitch);
>
> /* nv04_fifo.c */
> extern int nv04_fifo_init(struct drm_device *);
> @@ -938,6 +952,9 @@ extern void nv10_graph_destroy_context(struct
> nouveau_channel *); extern int nv10_graph_load_context(struct
> nouveau_channel *); extern int nv10_graph_unload_context(struct
> drm_device *); extern void nv10_graph_context_switch(struct
> drm_device *); +extern void nv10_graph_set_region_tiling(struct
> drm_device *dev, int i,
> + uint32_t addr, uint32_t
> size,
> + uint32_t pitch);
>
> /* nv20_graph.c */
> extern struct nouveau_pgraph_object_class nv20_graph_grclass[];
> @@ -949,6 +966,9 @@ extern int nv20_graph_unload_context(struct
> drm_device *); extern int nv20_graph_init(struct drm_device *);
> extern void nv20_graph_takedown(struct drm_device *);
> extern int nv30_graph_init(struct drm_device *);
> +extern void nv20_graph_set_region_tiling(struct drm_device *dev, int
> i,
> + uint32_t addr, uint32_t
> size,
> + uint32_t pitch);
>
> /* nv40_graph.c */
> extern struct nouveau_pgraph_object_class nv40_graph_grclass[];
> @@ -962,6 +982,9 @@ extern int nv40_graph_unload_context(struct
> drm_device *); extern int nv40_grctx_init(struct drm_device *);
> extern void nv40_grctx_fini(struct drm_device *);
> extern void nv40_grctx_vals_load(struct drm_device *, struct
> nouveau_gpuobj *); +extern void nv40_graph_set_region_tiling(struct
> drm_device *dev, int i,
> + uint32_t addr, uint32_t
> size,
> + uint32_t pitch);
>
> /* nv50_graph.c */
> extern struct nouveau_pgraph_object_class nv50_graph_grclass[];
> diff --git a/drivers/gpu/drm/nouveau/nouveau_reg.h
> b/drivers/gpu/drm/nouveau/nouveau_reg.h index fa1b0e7..251f1b3 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_reg.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_reg.h
> @@ -349,19 +349,19 @@
> #define NV04_PGRAPH_BLEND 0x00400824
> #define NV04_PGRAPH_STORED_FMT 0x00400830
> #define NV04_PGRAPH_PATT_COLORRAM 0x00400900
> -#define NV40_PGRAPH_TILE0(i)
> (0x00400900 + (i*16)) -#define
> NV40_PGRAPH_TLIMIT0(i) (0x00400904 +
> (i*16)) -#define NV40_PGRAPH_TSIZE0(i)
> (0x00400908 + (i*16)) -#define
> NV40_PGRAPH_TSTATUS0(i) (0x0040090C +
> (i*16)) +#define NV20_PGRAPH_TILE(i)
> (0x00400900 + (i*16)) +#define
> NV20_PGRAPH_TLIMIT(i) (0x00400904 +
> (i*16)) +#define NV20_PGRAPH_TSIZE(i)
> (0x00400908 + (i*16)) +#define
> NV20_PGRAPH_TSTATUS(i) (0x0040090C +
> (i*16)) #define NV10_PGRAPH_TILE(i)
> (0x00400B00 + (i*16)) #define
> NV10_PGRAPH_TLIMIT(i) (0x00400B04 +
> (i*16)) #define NV10_PGRAPH_TSIZE(i)
> (0x00400B08 + (i*16)) #define
> NV10_PGRAPH_TSTATUS(i) (0x00400B0C +
> (i*16)) #define NV04_PGRAPH_U_RAM
> 0x00400D00 -#define
> NV47_PGRAPH_TILE0(i) (0x00400D00 +
> (i*16)) -#define NV47_PGRAPH_TLIMIT0(i)
> (0x00400D04 + (i*16)) -#define
> NV47_PGRAPH_TSIZE0(i) (0x00400D08 +
> (i*16)) -#define NV47_PGRAPH_TSTATUS0(i)
> (0x00400D0C + (i*16)) +#define
> NV47_PGRAPH_TILE(i) (0x00400D00 +
> (i*16)) +#define NV47_PGRAPH_TLIMIT(i)
> (0x00400D04 + (i*16)) +#define
> NV47_PGRAPH_TSIZE(i) (0x00400D08 +
> (i*16)) +#define NV47_PGRAPH_TSTATUS(i)
> (0x00400D0C + (i*16)) #define
> NV04_PGRAPH_V_RAM 0x00400D40 #define
> NV04_PGRAPH_W_RAM 0x00400D80 #define
> NV10_PGRAPH_COMBINER0_IN_ALPHA 0x00400E40 diff
> --git a/drivers/gpu/drm/nouveau/nouveau_state.c
> b/drivers/gpu/drm/nouveau/nouveau_state.c index 2ed41d3..4342867
> 100644 --- a/drivers/gpu/drm/nouveau/nouveau_state.c +++
> b/drivers/gpu/drm/nouveau/nouveau_state.c @@ -100,6 +100,7 @@ static
> int nouveau_init_engine_ptrs(struct drm_device *dev)
> engine->timer.takedown = nv04_timer_takedown;
> engine->fb.init = nv10_fb_init;
> engine->fb.takedown = nv10_fb_takedown;
> + engine->fb.set_region_tiling =
> nv10_fb_set_region_tiling; engine->graph.grclass =
> nv10_graph_grclass; engine->graph.init =
> nv10_graph_init; engine->graph.takedown =
> nv10_graph_takedown; @@ -109,6 +110,7 @@ static int
> nouveau_init_engine_ptrs(struct drm_device *dev)
> engine->graph.fifo_access = nv04_graph_fifo_access;
> engine->graph.load_context = nv10_graph_load_context;
> engine->graph.unload_context = nv10_graph_unload_context;
> + engine->graph.set_region_tiling =
> nv10_graph_set_region_tiling; engine->fifo.channels =
> 32; engine->fifo.init = nv10_fifo_init;
> engine->fifo.takedown =
> nouveau_stub_takedown; @@ -139,6 +141,7 @@ static int
> nouveau_init_engine_ptrs(struct drm_device *dev)
> engine->timer.takedown = nv04_timer_takedown;
> engine->fb.init = nv10_fb_init;
> engine->fb.takedown = nv10_fb_takedown;
> + engine->fb.set_region_tiling =
> nv10_fb_set_region_tiling; engine->graph.grclass =
> nv20_graph_grclass; engine->graph.init =
> nv20_graph_init; engine->graph.takedown =
> nv20_graph_takedown; @@ -148,6 +151,7 @@ static int
> nouveau_init_engine_ptrs(struct drm_device *dev)
> engine->graph.fifo_access = nv04_graph_fifo_access;
> engine->graph.load_context = nv20_graph_load_context;
> engine->graph.unload_context = nv20_graph_unload_context;
> + engine->graph.set_region_tiling =
> nv20_graph_set_region_tiling; engine->fifo.channels =
> 32; engine->fifo.init = nv10_fifo_init;
> engine->fifo.takedown =
> nouveau_stub_takedown; @@ -178,6 +182,7 @@ static int
> nouveau_init_engine_ptrs(struct drm_device *dev)
> engine->timer.takedown = nv04_timer_takedown;
> engine->fb.init = nv10_fb_init;
> engine->fb.takedown = nv10_fb_takedown;
> + engine->fb.set_region_tiling =
> nv10_fb_set_region_tiling; engine->graph.grclass =
> nv30_graph_grclass; engine->graph.init =
> nv30_graph_init; engine->graph.takedown =
> nv20_graph_takedown; @@ -187,6 +192,7 @@ static int
> nouveau_init_engine_ptrs(struct drm_device *dev)
> engine->graph.destroy_context = nv20_graph_destroy_context;
> engine->graph.load_context = nv20_graph_load_context;
> engine->graph.unload_context = nv20_graph_unload_context;
> + engine->graph.set_region_tiling =
> nv20_graph_set_region_tiling; engine->fifo.channels =
> 32; engine->fifo.init = nv10_fifo_init;
> engine->fifo.takedown =
> nouveau_stub_takedown; @@ -218,6 +224,7 @@ static int
> nouveau_init_engine_ptrs(struct drm_device *dev)
> engine->timer.takedown = nv04_timer_takedown;
> engine->fb.init = nv40_fb_init;
> engine->fb.takedown = nv40_fb_takedown;
> + engine->fb.set_region_tiling =
> nv40_fb_set_region_tiling; engine->graph.grclass =
> nv40_graph_grclass; engine->graph.init =
> nv40_graph_init; engine->graph.takedown =
> nv40_graph_takedown; @@ -227,6 +234,7 @@ static int
> nouveau_init_engine_ptrs(struct drm_device *dev)
> engine->graph.destroy_context = nv40_graph_destroy_context;
> engine->graph.load_context = nv40_graph_load_context;
> engine->graph.unload_context = nv40_graph_unload_context;
> + engine->graph.set_region_tiling =
> nv40_graph_set_region_tiling; engine->fifo.channels =
> 32; engine->fifo.init = nv40_fifo_init;
> engine->fifo.takedown =
> nouveau_stub_takedown; diff --git a/drivers/gpu/drm/nouveau/nv10_fb.c
> b/drivers/gpu/drm/nouveau/nv10_fb.c index 79e2d10..cc5cda4 100644
> --- a/drivers/gpu/drm/nouveau/nv10_fb.c
> +++ b/drivers/gpu/drm/nouveau/nv10_fb.c
> @@ -3,17 +3,37 @@
> #include "nouveau_drv.h"
> #include "nouveau_drm.h"
>
> +void
> +nv10_fb_set_region_tiling(struct drm_device *dev, int i, uint32_t
> addr,
> + uint32_t size, uint32_t pitch)
> +{
> + struct drm_nouveau_private *dev_priv = dev->dev_private;
> + uint32_t limit = max(1u, addr + size) - 1;
> +
> + if (pitch) {
> + if (dev_priv->card_type >= NV_20)
> + addr |= 1;
> + else
> + addr |= 1 << 31;
> + }
> +
> + nv_wr32(dev, NV10_PFB_TLIMIT(i), limit);
> + nv_wr32(dev, NV10_PFB_TSIZE(i), pitch);
> + nv_wr32(dev, NV10_PFB_TILE(i), addr);
> +}
> +
> int
> nv10_fb_init(struct drm_device *dev)
> {
> - uint32_t fb_bar_size;
> + struct drm_nouveau_private *dev_priv = dev->dev_private;
> + struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
> int i;
>
> - fb_bar_size = drm_get_resource_len(dev, 0) - 1;
> - for (i = 0; i < NV10_PFB_TILE__SIZE; i++) {
> - nv_wr32(dev, NV10_PFB_TILE(i), 0);
> - nv_wr32(dev, NV10_PFB_TLIMIT(i), fb_bar_size);
> - }
> + pfb->num_tiles = NV10_PFB_TILE__SIZE;
> +
> + /* Turn all the tiling regions off. */
> + for (i = 0; i < pfb->num_tiles; i++)
> + pfb->set_region_tiling(dev, i, 0, 0, 0);
>
> return 0;
> }
> diff --git a/drivers/gpu/drm/nouveau/nv10_graph.c
> b/drivers/gpu/drm/nouveau/nv10_graph.c index 6bf6804..2aeac8b 100644
> --- a/drivers/gpu/drm/nouveau/nv10_graph.c
> +++ b/drivers/gpu/drm/nouveau/nv10_graph.c
> @@ -808,6 +808,39 @@ void nv10_graph_destroy_context(struct
> nouveau_channel *chan) chan->pgraph_ctx = NULL;
> }
>
> +static void
> +nv10_graph_write_tile(struct drm_device *dev, int i, uint32_t addr,
> + uint32_t size, uint32_t pitch)
> +{
> + uint32_t limit = max(1u, addr + size) - 1;
> +
> + if (pitch)
> + addr |= 1 << 31;
> +
> + nv_wr32(dev, NV10_PGRAPH_TLIMIT(i), limit);
> + nv_wr32(dev, NV10_PGRAPH_TSIZE(i), pitch);
> + nv_wr32(dev, NV10_PGRAPH_TILE(i), addr);
> +}
> +
> +void
> +nv10_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t
> addr,
> + uint32_t size, uint32_t pitch)
> +{
> + struct drm_nouveau_private *dev_priv = dev->dev_private;
> + struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
> + struct nouveau_pgraph_engine *pgraph =
> &dev_priv->engine.graph; +
> + pfifo->reassign(dev, false);
> + pgraph->fifo_access(dev, false);
> +
> + nouveau_wait_for_idle(dev);
> +
> + nv10_graph_write_tile(dev, i, addr, size, pitch);
> +
> + pgraph->fifo_access(dev, true);
> + pfifo->reassign(dev, true);
> +}
> +
> int nv10_graph_init(struct drm_device *dev)
> {
> struct drm_nouveau_private *dev_priv = dev->dev_private;
> @@ -836,17 +869,9 @@ int nv10_graph_init(struct drm_device *dev)
> } else
> nv_wr32(dev, NV10_PGRAPH_DEBUG_4, 0x00000000);
>
> - /* copy tile info from PFB */
> - for (i = 0; i < NV10_PFB_TILE__SIZE; i++) {
> - nv_wr32(dev, NV10_PGRAPH_TILE(i),
> - nv_rd32(dev,
> NV10_PFB_TILE(i)));
> - nv_wr32(dev, NV10_PGRAPH_TLIMIT(i),
> - nv_rd32(dev,
> NV10_PFB_TLIMIT(i)));
> - nv_wr32(dev, NV10_PGRAPH_TSIZE(i),
> - nv_rd32(dev,
> NV10_PFB_TSIZE(i)));
> - nv_wr32(dev, NV10_PGRAPH_TSTATUS(i),
> - nv_rd32(dev,
> NV10_PFB_TSTATUS(i)));
> - }
> + /* Turn all the tiling regions off. */
> + for (i = 0; i < NV10_PFB_TILE__SIZE; i++)
> + nv10_graph_write_tile(dev, i, 0, 0, 0);
>
> nv_wr32(dev, NV10_PGRAPH_CTX_SWITCH1, 0x00000000);
> nv_wr32(dev, NV10_PGRAPH_CTX_SWITCH2, 0x00000000);
> diff --git a/drivers/gpu/drm/nouveau/nv20_graph.c
> b/drivers/gpu/drm/nouveau/nv20_graph.c index 18ba74f..55d1a8e 100644
> --- a/drivers/gpu/drm/nouveau/nv20_graph.c
> +++ b/drivers/gpu/drm/nouveau/nv20_graph.c
> @@ -514,6 +514,46 @@ nv20_graph_rdi(struct drm_device *dev)
> nouveau_wait_for_idle(dev);
> }
>
> +static void
> +nv20_graph_write_tile(struct drm_device *dev, int i, uint32_t addr,
> + uint32_t size, uint32_t pitch)
> +{
> + uint32_t limit = max(1u, addr + size) - 1;
> +
> + if (pitch)
> + addr |= 1;
> +
> + nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit);
> + nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch);
> + nv_wr32(dev, NV20_PGRAPH_TILE(i), addr);
> +
> + nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0030 + 4 * i);
> + nv_wr32(dev, NV10_PGRAPH_RDI_DATA, limit);
> + nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0050 + 4 * i);
> + nv_wr32(dev, NV10_PGRAPH_RDI_DATA, pitch);
> + nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + 4 * i);
> + nv_wr32(dev, NV10_PGRAPH_RDI_DATA, addr);
> +}
> +
> +void
> +nv20_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t
> addr,
> + uint32_t size, uint32_t pitch)
> +{
> + struct drm_nouveau_private *dev_priv = dev->dev_private;
> + struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
> + struct nouveau_pgraph_engine *pgraph =
> &dev_priv->engine.graph; +
> + pfifo->reassign(dev, false);
> + pgraph->fifo_access(dev, false);
> +
> + nouveau_wait_for_idle(dev);
> +
> + nv20_graph_write_tile(dev, i, addr, size, pitch);
> +
> + pgraph->fifo_access(dev, true);
> + pfifo->reassign(dev, true);
> +}
> +
> int
> nv20_graph_init(struct drm_device *dev)
> {
> @@ -572,27 +612,10 @@ nv20_graph_init(struct drm_device *dev)
> nv_wr32(dev, NV10_PGRAPH_RDI_DATA , 0x00000030);
> }
>
> - /* copy tile info from PFB */
> - for (i = 0; i < NV10_PFB_TILE__SIZE; i++) {
> - nv_wr32(dev, 0x00400904 + i * 0x10,
> - nv_rd32(dev,
> NV10_PFB_TLIMIT(i)));
> - /* which is NV40_PGRAPH_TLIMIT0(i) ?? */
> - nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0030 + i *
> 4);
> - nv_wr32(dev, NV10_PGRAPH_RDI_DATA,
> - nv_rd32(dev,
> NV10_PFB_TLIMIT(i)));
> - nv_wr32(dev, 0x00400908 + i * 0x10,
> - nv_rd32(dev,
> NV10_PFB_TSIZE(i)));
> - /* which is NV40_PGRAPH_TSIZE0(i) ?? */
> - nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0050 + i *
> 4);
> - nv_wr32(dev, NV10_PGRAPH_RDI_DATA,
> - nv_rd32(dev,
> NV10_PFB_TSIZE(i)));
> - nv_wr32(dev, 0x00400900 + i * 0x10,
> - nv_rd32(dev,
> NV10_PFB_TILE(i)));
> - /* which is NV40_PGRAPH_TILE0(i) ?? */
> - nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + i *
> 4);
> - nv_wr32(dev, NV10_PGRAPH_RDI_DATA,
> - nv_rd32(dev,
> NV10_PFB_TILE(i)));
> - }
> + /* Turn all the tiling regions off. */
> + for (i = 0; i < NV10_PFB_TILE__SIZE; i++)
> + nv20_graph_write_tile(dev, i, 0, 0, 0);
> +
> for (i = 0; i < 8; i++) {
> nv_wr32(dev, 0x400980 + i * 4, nv_rd32(dev, 0x100300
> + i * 4)); nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0090 + i * 4);
> @@ -704,18 +727,9 @@ nv30_graph_init(struct drm_device *dev)
>
> nv_wr32(dev, 0x4000c0, 0x00000016);
>
> - /* copy tile info from PFB */
> - for (i = 0; i < NV10_PFB_TILE__SIZE; i++) {
> - nv_wr32(dev, 0x00400904 + i * 0x10,
> - nv_rd32(dev,
> NV10_PFB_TLIMIT(i)));
> - /* which is NV40_PGRAPH_TLIMIT0(i) ?? */
> - nv_wr32(dev, 0x00400908 + i * 0x10,
> - nv_rd32(dev,
> NV10_PFB_TSIZE(i)));
> - /* which is NV40_PGRAPH_TSIZE0(i) ?? */
> - nv_wr32(dev, 0x00400900 + i * 0x10,
> - nv_rd32(dev,
> NV10_PFB_TILE(i)));
> - /* which is NV40_PGRAPH_TILE0(i) ?? */
> - }
> + /* Turn all the tiling regions off. */
> + for (i = 0; i < NV10_PFB_TILE__SIZE; i++)
> + nv20_graph_write_tile(dev, i, 0, 0, 0);
>
> nv_wr32(dev, NV10_PGRAPH_CTX_CONTROL, 0x10000100);
> nv_wr32(dev, NV10_PGRAPH_STATE , 0xFFFFFFFF);
> diff --git a/drivers/gpu/drm/nouveau/nv40_fb.c
> b/drivers/gpu/drm/nouveau/nv40_fb.c index ca1d271..3cd07d8 100644
> --- a/drivers/gpu/drm/nouveau/nv40_fb.c
> +++ b/drivers/gpu/drm/nouveau/nv40_fb.c
> @@ -3,12 +3,37 @@
> #include "nouveau_drv.h"
> #include "nouveau_drm.h"
>
> +void
> +nv40_fb_set_region_tiling(struct drm_device *dev, int i, uint32_t
> addr,
> + uint32_t size, uint32_t pitch)
> +{
> + struct drm_nouveau_private *dev_priv = dev->dev_private;
> + uint32_t limit = max(1u, addr + size) - 1;
> +
> + if (pitch)
> + addr |= 1;
> +
> + switch (dev_priv->chipset) {
> + case 0x40:
> + nv_wr32(dev, NV10_PFB_TLIMIT(i), limit);
> + nv_wr32(dev, NV10_PFB_TSIZE(i), pitch);
> + nv_wr32(dev, NV10_PFB_TILE(i), addr);
> + break;
> +
> + default:
> + nv_wr32(dev, NV40_PFB_TLIMIT(i), limit);
> + nv_wr32(dev, NV40_PFB_TSIZE(i), pitch);
> + nv_wr32(dev, NV40_PFB_TILE(i), addr);
> + break;
> + }
> +}
> +
> int
> nv40_fb_init(struct drm_device *dev)
> {
> struct drm_nouveau_private *dev_priv = dev->dev_private;
> - uint32_t fb_bar_size, tmp;
> - int num_tiles;
> + struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
> + uint32_t tmp;
> int i;
>
> /* This is strictly a NV4x register (don't know about NV5x).
> */ @@ -23,35 +48,23 @@ nv40_fb_init(struct drm_device *dev)
> case 0x45:
> tmp = nv_rd32(dev, NV10_PFB_CLOSE_PAGE2);
> nv_wr32(dev, NV10_PFB_CLOSE_PAGE2, tmp & ~(1 << 15));
> - num_tiles = NV10_PFB_TILE__SIZE;
> + pfb->num_tiles = NV10_PFB_TILE__SIZE;
> break;
> case 0x46: /* G72 */
> case 0x47: /* G70 */
> case 0x49: /* G71 */
> case 0x4b: /* G73 */
> case 0x4c: /* C51 (G7X version) */
> - num_tiles = NV40_PFB_TILE__SIZE_1;
> + pfb->num_tiles = NV40_PFB_TILE__SIZE_1;
> break;
> default:
> - num_tiles = NV40_PFB_TILE__SIZE_0;
> + pfb->num_tiles = NV40_PFB_TILE__SIZE_0;
> break;
> }
>
> - fb_bar_size = drm_get_resource_len(dev, 0) - 1;
> - switch (dev_priv->chipset) {
> - case 0x40:
> - for (i = 0; i < num_tiles; i++) {
> - nv_wr32(dev, NV10_PFB_TILE(i), 0);
> - nv_wr32(dev, NV10_PFB_TLIMIT(i),
> fb_bar_size);
> - }
> - break;
> - default:
> - for (i = 0; i < num_tiles; i++) {
> - nv_wr32(dev, NV40_PFB_TILE(i), 0);
> - nv_wr32(dev, NV40_PFB_TLIMIT(i),
> fb_bar_size);
> - }
> - break;
> - }
> + /* Turn all the tiling regions off. */
> + for (i = 0; i < pfb->num_tiles; i++)
> + pfb->set_region_tiling(dev, i, 0, 0, 0);
>
> return 0;
> }
> diff --git a/drivers/gpu/drm/nouveau/nv40_graph.c
> b/drivers/gpu/drm/nouveau/nv40_graph.c index d3e0a2a..2435d49 100644
> --- a/drivers/gpu/drm/nouveau/nv40_graph.c
> +++ b/drivers/gpu/drm/nouveau/nv40_graph.c
> @@ -333,6 +333,67 @@ nv40_grctx_vals_load(struct drm_device *dev,
> struct nouveau_gpuobj *ctx) nv_wo32(dev, ctx, cv->data[i].offset,
> cv->data[i].value); }
>
> +static void
> +nv40_graph_write_tile(struct drm_device *dev, int i, uint32_t addr,
> + uint32_t size, uint32_t pitch)
> +{
> + struct drm_nouveau_private *dev_priv = dev->dev_private;
> + uint32_t limit = max(1u, addr + size) - 1;
> +
> + if (pitch)
> + addr |= 1;
> +
> + switch (dev_priv->chipset) {
> + case 0x44:
> + case 0x4a:
> + case 0x4e:
> + nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch);
> + nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit);
> + nv_wr32(dev, NV20_PGRAPH_TILE(i), addr);
> + break;
> +
> + case 0x46:
> + case 0x47:
> + case 0x49:
> + case 0x4b:
> + nv_wr32(dev, NV47_PGRAPH_TSIZE(i), pitch);
> + nv_wr32(dev, NV47_PGRAPH_TLIMIT(i), limit);
> + nv_wr32(dev, NV47_PGRAPH_TILE(i), addr);
> + nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), pitch);
> + nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), limit);
> + nv_wr32(dev, NV40_PGRAPH_TILE1(i), addr);
> + break;
> +
> + default:
> + nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch);
> + nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit);
> + nv_wr32(dev, NV20_PGRAPH_TILE(i), addr);
> + nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), pitch);
> + nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), limit);
> + nv_wr32(dev, NV40_PGRAPH_TILE1(i), addr);
> + break;
> + }
> +}
> +
Have you looked at 0xB000, 0xB004, 0xB008 by chance?
I noticed that nv uses these tiling regs (what looks like) on nv4a/nv4e (NV44 core) cards but not any others at startup. I thought is was because 0x406900 regs are missing.
+#define NV40_PFB_TILE2(i) (0x0000B000 + (i*12))
+#define NV40_PFB_TILE__SIZE_2 12
+#define NV40_PFB_TLIMIT2(i) (0x0000B004 + (i*12))
+#define NV40_PFB_TSIZE2(i) (0x0000B008 + (i*12))
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/3] drm/nouveau: Pre-G80 tiling support.
[not found] ` <87tyvxnrtp.fsf-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org>
@ 2009-12-11 21:28 ` Francisco Jerez
0 siblings, 0 replies; 7+ messages in thread
From: Francisco Jerez @ 2009-12-11 21:28 UTC (permalink / raw)
To: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
[-- Attachment #1.1.1: Type: text/plain, Size: 1029 bytes --]
Francisco Jerez <currojerez-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org> writes:
> Jimmy Rentz <jb17bsome-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> writes:
>
>> On Fri, 11 Dec 2009 19:33:22 +0100
>>
>> Have you looked at 0xB000, 0xB004, 0xB008 by chance?
>> I noticed that nv uses these tiling regs (what looks like) on nv4a/nv4e (NV44 core) cards but not any others at startup. I thought is was because 0x406900 regs are missing.
>
> Yeah, I've seen the blob use those regs in some nv4a mmiotraces, but it
> seemed to work without them and they're outside PGRAPH/PFB or anything
> we currently exercise so I decided to leave them out.
>
>>
>> +#define NV40_PFB_TILE2(i) (0x0000B000 + (i*12))
>> +#define NV40_PFB_TILE__SIZE_2 12
>> +#define NV40_PFB_TLIMIT2(i) (0x0000B004 + (i*12))
>> +#define NV40_PFB_TSIZE2(i) (0x0000B008 + (i*12))
Sorry for not replying-to-all the first time...
[-- Attachment #1.2: Type: application/pgp-signature, Size: 196 bytes --]
[-- Attachment #2: Type: text/plain, Size: 181 bytes --]
_______________________________________________
Nouveau mailing list
Nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
http://lists.freedesktop.org/mailman/listinfo/nouveau
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCHv2 1/3] drm/nouveau: Pre-G80 tiling support.
[not found] ` <1260556404-13389-1-git-send-email-currojerez-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org>
` (2 preceding siblings ...)
2009-12-11 20:18 ` [PATCH 1/3] drm/nouveau: Pre-G80 tiling support Jimmy Rentz
@ 2009-12-14 3:03 ` Francisco Jerez
3 siblings, 0 replies; 7+ messages in thread
From: Francisco Jerez @ 2009-12-14 3:03 UTC (permalink / raw)
To: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Signed-off-by: Francisco Jerez <currojerez-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org>
---
v2: Simplify things a bit.
drivers/gpu/drm/nouveau/nouveau_drv.h | 18 +++++
drivers/gpu/drm/nouveau/nouveau_reg.h | 16 ++--
drivers/gpu/drm/nouveau/nouveau_state.c | 8 ++
drivers/gpu/drm/nouveau/nv10_fb.c | 32 +++++++--
drivers/gpu/drm/nouveau/nv10_graph.c | 28 +++++---
drivers/gpu/drm/nouveau/nv20_graph.c | 61 ++++++++---------
drivers/gpu/drm/nouveau/nv40_fb.c | 53 +++++++++-----
drivers/gpu/drm/nouveau/nv40_graph.c | 116 +++++++++++++------------------
8 files changed, 185 insertions(+), 147 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 88b4c7b..40b4a37 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -276,8 +276,13 @@ struct nouveau_timer_engine {
};
struct nouveau_fb_engine {
+ int num_tiles;
+
int (*init)(struct drm_device *dev);
void (*takedown)(struct drm_device *dev);
+
+ void (*set_region_tiling)(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch);
};
struct nouveau_fifo_engine {
@@ -328,6 +333,9 @@ struct nouveau_pgraph_engine {
void (*destroy_context)(struct nouveau_channel *);
int (*load_context)(struct nouveau_channel *);
int (*unload_context)(struct drm_device *);
+
+ void (*set_region_tiling)(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch);
};
struct nouveau_engine {
@@ -876,10 +884,14 @@ extern void nv04_fb_takedown(struct drm_device *);
/* nv10_fb.c */
extern int nv10_fb_init(struct drm_device *);
extern void nv10_fb_takedown(struct drm_device *);
+extern void nv10_fb_set_region_tiling(struct drm_device *, int, uint32_t,
+ uint32_t, uint32_t);
/* nv40_fb.c */
extern int nv40_fb_init(struct drm_device *);
extern void nv40_fb_takedown(struct drm_device *);
+extern void nv40_fb_set_region_tiling(struct drm_device *, int, uint32_t,
+ uint32_t, uint32_t);
/* nv04_fifo.c */
extern int nv04_fifo_init(struct drm_device *);
@@ -938,6 +950,8 @@ extern void nv10_graph_destroy_context(struct nouveau_channel *);
extern int nv10_graph_load_context(struct nouveau_channel *);
extern int nv10_graph_unload_context(struct drm_device *);
extern void nv10_graph_context_switch(struct drm_device *);
+extern void nv10_graph_set_region_tiling(struct drm_device *, int, uint32_t,
+ uint32_t, uint32_t);
/* nv20_graph.c */
extern struct nouveau_pgraph_object_class nv20_graph_grclass[];
@@ -949,6 +963,8 @@ extern int nv20_graph_unload_context(struct drm_device *);
extern int nv20_graph_init(struct drm_device *);
extern void nv20_graph_takedown(struct drm_device *);
extern int nv30_graph_init(struct drm_device *);
+extern void nv20_graph_set_region_tiling(struct drm_device *, int, uint32_t,
+ uint32_t, uint32_t);
/* nv40_graph.c */
extern struct nouveau_pgraph_object_class nv40_graph_grclass[];
@@ -962,6 +978,8 @@ extern int nv40_graph_unload_context(struct drm_device *);
extern int nv40_grctx_init(struct drm_device *);
extern void nv40_grctx_fini(struct drm_device *);
extern void nv40_grctx_vals_load(struct drm_device *, struct nouveau_gpuobj *);
+extern void nv40_graph_set_region_tiling(struct drm_device *, int, uint32_t,
+ uint32_t, uint32_t);
/* nv50_graph.c */
extern struct nouveau_pgraph_object_class nv50_graph_grclass[];
diff --git a/drivers/gpu/drm/nouveau/nouveau_reg.h b/drivers/gpu/drm/nouveau/nouveau_reg.h
index fa1b0e7..251f1b3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_reg.h
+++ b/drivers/gpu/drm/nouveau/nouveau_reg.h
@@ -349,19 +349,19 @@
#define NV04_PGRAPH_BLEND 0x00400824
#define NV04_PGRAPH_STORED_FMT 0x00400830
#define NV04_PGRAPH_PATT_COLORRAM 0x00400900
-#define NV40_PGRAPH_TILE0(i) (0x00400900 + (i*16))
-#define NV40_PGRAPH_TLIMIT0(i) (0x00400904 + (i*16))
-#define NV40_PGRAPH_TSIZE0(i) (0x00400908 + (i*16))
-#define NV40_PGRAPH_TSTATUS0(i) (0x0040090C + (i*16))
+#define NV20_PGRAPH_TILE(i) (0x00400900 + (i*16))
+#define NV20_PGRAPH_TLIMIT(i) (0x00400904 + (i*16))
+#define NV20_PGRAPH_TSIZE(i) (0x00400908 + (i*16))
+#define NV20_PGRAPH_TSTATUS(i) (0x0040090C + (i*16))
#define NV10_PGRAPH_TILE(i) (0x00400B00 + (i*16))
#define NV10_PGRAPH_TLIMIT(i) (0x00400B04 + (i*16))
#define NV10_PGRAPH_TSIZE(i) (0x00400B08 + (i*16))
#define NV10_PGRAPH_TSTATUS(i) (0x00400B0C + (i*16))
#define NV04_PGRAPH_U_RAM 0x00400D00
-#define NV47_PGRAPH_TILE0(i) (0x00400D00 + (i*16))
-#define NV47_PGRAPH_TLIMIT0(i) (0x00400D04 + (i*16))
-#define NV47_PGRAPH_TSIZE0(i) (0x00400D08 + (i*16))
-#define NV47_PGRAPH_TSTATUS0(i) (0x00400D0C + (i*16))
+#define NV47_PGRAPH_TILE(i) (0x00400D00 + (i*16))
+#define NV47_PGRAPH_TLIMIT(i) (0x00400D04 + (i*16))
+#define NV47_PGRAPH_TSIZE(i) (0x00400D08 + (i*16))
+#define NV47_PGRAPH_TSTATUS(i) (0x00400D0C + (i*16))
#define NV04_PGRAPH_V_RAM 0x00400D40
#define NV04_PGRAPH_W_RAM 0x00400D80
#define NV10_PGRAPH_COMBINER0_IN_ALPHA 0x00400E40
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index 2ed41d3..4342867 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -100,6 +100,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->timer.takedown = nv04_timer_takedown;
engine->fb.init = nv10_fb_init;
engine->fb.takedown = nv10_fb_takedown;
+ engine->fb.set_region_tiling = nv10_fb_set_region_tiling;
engine->graph.grclass = nv10_graph_grclass;
engine->graph.init = nv10_graph_init;
engine->graph.takedown = nv10_graph_takedown;
@@ -109,6 +110,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->graph.fifo_access = nv04_graph_fifo_access;
engine->graph.load_context = nv10_graph_load_context;
engine->graph.unload_context = nv10_graph_unload_context;
+ engine->graph.set_region_tiling = nv10_graph_set_region_tiling;
engine->fifo.channels = 32;
engine->fifo.init = nv10_fifo_init;
engine->fifo.takedown = nouveau_stub_takedown;
@@ -139,6 +141,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->timer.takedown = nv04_timer_takedown;
engine->fb.init = nv10_fb_init;
engine->fb.takedown = nv10_fb_takedown;
+ engine->fb.set_region_tiling = nv10_fb_set_region_tiling;
engine->graph.grclass = nv20_graph_grclass;
engine->graph.init = nv20_graph_init;
engine->graph.takedown = nv20_graph_takedown;
@@ -148,6 +151,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->graph.fifo_access = nv04_graph_fifo_access;
engine->graph.load_context = nv20_graph_load_context;
engine->graph.unload_context = nv20_graph_unload_context;
+ engine->graph.set_region_tiling = nv20_graph_set_region_tiling;
engine->fifo.channels = 32;
engine->fifo.init = nv10_fifo_init;
engine->fifo.takedown = nouveau_stub_takedown;
@@ -178,6 +182,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->timer.takedown = nv04_timer_takedown;
engine->fb.init = nv10_fb_init;
engine->fb.takedown = nv10_fb_takedown;
+ engine->fb.set_region_tiling = nv10_fb_set_region_tiling;
engine->graph.grclass = nv30_graph_grclass;
engine->graph.init = nv30_graph_init;
engine->graph.takedown = nv20_graph_takedown;
@@ -187,6 +192,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->graph.destroy_context = nv20_graph_destroy_context;
engine->graph.load_context = nv20_graph_load_context;
engine->graph.unload_context = nv20_graph_unload_context;
+ engine->graph.set_region_tiling = nv20_graph_set_region_tiling;
engine->fifo.channels = 32;
engine->fifo.init = nv10_fifo_init;
engine->fifo.takedown = nouveau_stub_takedown;
@@ -218,6 +224,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->timer.takedown = nv04_timer_takedown;
engine->fb.init = nv40_fb_init;
engine->fb.takedown = nv40_fb_takedown;
+ engine->fb.set_region_tiling = nv40_fb_set_region_tiling;
engine->graph.grclass = nv40_graph_grclass;
engine->graph.init = nv40_graph_init;
engine->graph.takedown = nv40_graph_takedown;
@@ -227,6 +234,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
engine->graph.destroy_context = nv40_graph_destroy_context;
engine->graph.load_context = nv40_graph_load_context;
engine->graph.unload_context = nv40_graph_unload_context;
+ engine->graph.set_region_tiling = nv40_graph_set_region_tiling;
engine->fifo.channels = 32;
engine->fifo.init = nv40_fifo_init;
engine->fifo.takedown = nouveau_stub_takedown;
diff --git a/drivers/gpu/drm/nouveau/nv10_fb.c b/drivers/gpu/drm/nouveau/nv10_fb.c
index 79e2d10..cc5cda4 100644
--- a/drivers/gpu/drm/nouveau/nv10_fb.c
+++ b/drivers/gpu/drm/nouveau/nv10_fb.c
@@ -3,17 +3,37 @@
#include "nouveau_drv.h"
#include "nouveau_drm.h"
+void
+nv10_fb_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ uint32_t limit = max(1u, addr + size) - 1;
+
+ if (pitch) {
+ if (dev_priv->card_type >= NV_20)
+ addr |= 1;
+ else
+ addr |= 1 << 31;
+ }
+
+ nv_wr32(dev, NV10_PFB_TLIMIT(i), limit);
+ nv_wr32(dev, NV10_PFB_TSIZE(i), pitch);
+ nv_wr32(dev, NV10_PFB_TILE(i), addr);
+}
+
int
nv10_fb_init(struct drm_device *dev)
{
- uint32_t fb_bar_size;
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
int i;
- fb_bar_size = drm_get_resource_len(dev, 0) - 1;
- for (i = 0; i < NV10_PFB_TILE__SIZE; i++) {
- nv_wr32(dev, NV10_PFB_TILE(i), 0);
- nv_wr32(dev, NV10_PFB_TLIMIT(i), fb_bar_size);
- }
+ pfb->num_tiles = NV10_PFB_TILE__SIZE;
+
+ /* Turn all the tiling regions off. */
+ for (i = 0; i < pfb->num_tiles; i++)
+ pfb->set_region_tiling(dev, i, 0, 0, 0);
return 0;
}
diff --git a/drivers/gpu/drm/nouveau/nv10_graph.c b/drivers/gpu/drm/nouveau/nv10_graph.c
index 6bf6804..669ad9d 100644
--- a/drivers/gpu/drm/nouveau/nv10_graph.c
+++ b/drivers/gpu/drm/nouveau/nv10_graph.c
@@ -808,6 +808,20 @@ void nv10_graph_destroy_context(struct nouveau_channel *chan)
chan->pgraph_ctx = NULL;
}
+void
+nv10_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch)
+{
+ uint32_t limit = max(1u, addr + size) - 1;
+
+ if (pitch)
+ addr |= 1 << 31;
+
+ nv_wr32(dev, NV10_PGRAPH_TLIMIT(i), limit);
+ nv_wr32(dev, NV10_PGRAPH_TSIZE(i), pitch);
+ nv_wr32(dev, NV10_PGRAPH_TILE(i), addr);
+}
+
int nv10_graph_init(struct drm_device *dev)
{
struct drm_nouveau_private *dev_priv = dev->dev_private;
@@ -836,17 +850,9 @@ int nv10_graph_init(struct drm_device *dev)
} else
nv_wr32(dev, NV10_PGRAPH_DEBUG_4, 0x00000000);
- /* copy tile info from PFB */
- for (i = 0; i < NV10_PFB_TILE__SIZE; i++) {
- nv_wr32(dev, NV10_PGRAPH_TILE(i),
- nv_rd32(dev, NV10_PFB_TILE(i)));
- nv_wr32(dev, NV10_PGRAPH_TLIMIT(i),
- nv_rd32(dev, NV10_PFB_TLIMIT(i)));
- nv_wr32(dev, NV10_PGRAPH_TSIZE(i),
- nv_rd32(dev, NV10_PFB_TSIZE(i)));
- nv_wr32(dev, NV10_PGRAPH_TSTATUS(i),
- nv_rd32(dev, NV10_PFB_TSTATUS(i)));
- }
+ /* Turn all the tiling regions off. */
+ for (i = 0; i < NV10_PFB_TILE__SIZE; i++)
+ nv10_graph_set_region_tiling(dev, i, 0, 0, 0);
nv_wr32(dev, NV10_PGRAPH_CTX_SWITCH1, 0x00000000);
nv_wr32(dev, NV10_PGRAPH_CTX_SWITCH2, 0x00000000);
diff --git a/drivers/gpu/drm/nouveau/nv20_graph.c b/drivers/gpu/drm/nouveau/nv20_graph.c
index 18ba74f..d6fc0a8 100644
--- a/drivers/gpu/drm/nouveau/nv20_graph.c
+++ b/drivers/gpu/drm/nouveau/nv20_graph.c
@@ -514,6 +514,27 @@ nv20_graph_rdi(struct drm_device *dev)
nouveau_wait_for_idle(dev);
}
+void
+nv20_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch)
+{
+ uint32_t limit = max(1u, addr + size) - 1;
+
+ if (pitch)
+ addr |= 1;
+
+ nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit);
+ nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch);
+ nv_wr32(dev, NV20_PGRAPH_TILE(i), addr);
+
+ nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0030 + 4 * i);
+ nv_wr32(dev, NV10_PGRAPH_RDI_DATA, limit);
+ nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0050 + 4 * i);
+ nv_wr32(dev, NV10_PGRAPH_RDI_DATA, pitch);
+ nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + 4 * i);
+ nv_wr32(dev, NV10_PGRAPH_RDI_DATA, addr);
+}
+
int
nv20_graph_init(struct drm_device *dev)
{
@@ -572,27 +593,10 @@ nv20_graph_init(struct drm_device *dev)
nv_wr32(dev, NV10_PGRAPH_RDI_DATA , 0x00000030);
}
- /* copy tile info from PFB */
- for (i = 0; i < NV10_PFB_TILE__SIZE; i++) {
- nv_wr32(dev, 0x00400904 + i * 0x10,
- nv_rd32(dev, NV10_PFB_TLIMIT(i)));
- /* which is NV40_PGRAPH_TLIMIT0(i) ?? */
- nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0030 + i * 4);
- nv_wr32(dev, NV10_PGRAPH_RDI_DATA,
- nv_rd32(dev, NV10_PFB_TLIMIT(i)));
- nv_wr32(dev, 0x00400908 + i * 0x10,
- nv_rd32(dev, NV10_PFB_TSIZE(i)));
- /* which is NV40_PGRAPH_TSIZE0(i) ?? */
- nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0050 + i * 4);
- nv_wr32(dev, NV10_PGRAPH_RDI_DATA,
- nv_rd32(dev, NV10_PFB_TSIZE(i)));
- nv_wr32(dev, 0x00400900 + i * 0x10,
- nv_rd32(dev, NV10_PFB_TILE(i)));
- /* which is NV40_PGRAPH_TILE0(i) ?? */
- nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + i * 4);
- nv_wr32(dev, NV10_PGRAPH_RDI_DATA,
- nv_rd32(dev, NV10_PFB_TILE(i)));
- }
+ /* Turn all the tiling regions off. */
+ for (i = 0; i < NV10_PFB_TILE__SIZE; i++)
+ nv20_graph_set_region_tiling(dev, i, 0, 0, 0);
+
for (i = 0; i < 8; i++) {
nv_wr32(dev, 0x400980 + i * 4, nv_rd32(dev, 0x100300 + i * 4));
nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0090 + i * 4);
@@ -704,18 +708,9 @@ nv30_graph_init(struct drm_device *dev)
nv_wr32(dev, 0x4000c0, 0x00000016);
- /* copy tile info from PFB */
- for (i = 0; i < NV10_PFB_TILE__SIZE; i++) {
- nv_wr32(dev, 0x00400904 + i * 0x10,
- nv_rd32(dev, NV10_PFB_TLIMIT(i)));
- /* which is NV40_PGRAPH_TLIMIT0(i) ?? */
- nv_wr32(dev, 0x00400908 + i * 0x10,
- nv_rd32(dev, NV10_PFB_TSIZE(i)));
- /* which is NV40_PGRAPH_TSIZE0(i) ?? */
- nv_wr32(dev, 0x00400900 + i * 0x10,
- nv_rd32(dev, NV10_PFB_TILE(i)));
- /* which is NV40_PGRAPH_TILE0(i) ?? */
- }
+ /* Turn all the tiling regions off. */
+ for (i = 0; i < NV10_PFB_TILE__SIZE; i++)
+ nv20_graph_set_region_tiling(dev, i, 0, 0, 0);
nv_wr32(dev, NV10_PGRAPH_CTX_CONTROL, 0x10000100);
nv_wr32(dev, NV10_PGRAPH_STATE , 0xFFFFFFFF);
diff --git a/drivers/gpu/drm/nouveau/nv40_fb.c b/drivers/gpu/drm/nouveau/nv40_fb.c
index ca1d271..3cd07d8 100644
--- a/drivers/gpu/drm/nouveau/nv40_fb.c
+++ b/drivers/gpu/drm/nouveau/nv40_fb.c
@@ -3,12 +3,37 @@
#include "nouveau_drv.h"
#include "nouveau_drm.h"
+void
+nv40_fb_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ uint32_t limit = max(1u, addr + size) - 1;
+
+ if (pitch)
+ addr |= 1;
+
+ switch (dev_priv->chipset) {
+ case 0x40:
+ nv_wr32(dev, NV10_PFB_TLIMIT(i), limit);
+ nv_wr32(dev, NV10_PFB_TSIZE(i), pitch);
+ nv_wr32(dev, NV10_PFB_TILE(i), addr);
+ break;
+
+ default:
+ nv_wr32(dev, NV40_PFB_TLIMIT(i), limit);
+ nv_wr32(dev, NV40_PFB_TSIZE(i), pitch);
+ nv_wr32(dev, NV40_PFB_TILE(i), addr);
+ break;
+ }
+}
+
int
nv40_fb_init(struct drm_device *dev)
{
struct drm_nouveau_private *dev_priv = dev->dev_private;
- uint32_t fb_bar_size, tmp;
- int num_tiles;
+ struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
+ uint32_t tmp;
int i;
/* This is strictly a NV4x register (don't know about NV5x). */
@@ -23,35 +48,23 @@ nv40_fb_init(struct drm_device *dev)
case 0x45:
tmp = nv_rd32(dev, NV10_PFB_CLOSE_PAGE2);
nv_wr32(dev, NV10_PFB_CLOSE_PAGE2, tmp & ~(1 << 15));
- num_tiles = NV10_PFB_TILE__SIZE;
+ pfb->num_tiles = NV10_PFB_TILE__SIZE;
break;
case 0x46: /* G72 */
case 0x47: /* G70 */
case 0x49: /* G71 */
case 0x4b: /* G73 */
case 0x4c: /* C51 (G7X version) */
- num_tiles = NV40_PFB_TILE__SIZE_1;
+ pfb->num_tiles = NV40_PFB_TILE__SIZE_1;
break;
default:
- num_tiles = NV40_PFB_TILE__SIZE_0;
+ pfb->num_tiles = NV40_PFB_TILE__SIZE_0;
break;
}
- fb_bar_size = drm_get_resource_len(dev, 0) - 1;
- switch (dev_priv->chipset) {
- case 0x40:
- for (i = 0; i < num_tiles; i++) {
- nv_wr32(dev, NV10_PFB_TILE(i), 0);
- nv_wr32(dev, NV10_PFB_TLIMIT(i), fb_bar_size);
- }
- break;
- default:
- for (i = 0; i < num_tiles; i++) {
- nv_wr32(dev, NV40_PFB_TILE(i), 0);
- nv_wr32(dev, NV40_PFB_TLIMIT(i), fb_bar_size);
- }
- break;
- }
+ /* Turn all the tiling regions off. */
+ for (i = 0; i < pfb->num_tiles; i++)
+ pfb->set_region_tiling(dev, i, 0, 0, 0);
return 0;
}
diff --git a/drivers/gpu/drm/nouveau/nv40_graph.c b/drivers/gpu/drm/nouveau/nv40_graph.c
index d3e0a2a..01773e3 100644
--- a/drivers/gpu/drm/nouveau/nv40_graph.c
+++ b/drivers/gpu/drm/nouveau/nv40_graph.c
@@ -333,6 +333,48 @@ nv40_grctx_vals_load(struct drm_device *dev, struct nouveau_gpuobj *ctx)
nv_wo32(dev, ctx, cv->data[i].offset, cv->data[i].value);
}
+void
+nv40_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ uint32_t limit = max(1u, addr + size) - 1;
+
+ if (pitch)
+ addr |= 1;
+
+ switch (dev_priv->chipset) {
+ case 0x44:
+ case 0x4a:
+ case 0x4e:
+ nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch);
+ nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit);
+ nv_wr32(dev, NV20_PGRAPH_TILE(i), addr);
+ break;
+
+ case 0x46:
+ case 0x47:
+ case 0x49:
+ case 0x4b:
+ nv_wr32(dev, NV47_PGRAPH_TSIZE(i), pitch);
+ nv_wr32(dev, NV47_PGRAPH_TLIMIT(i), limit);
+ nv_wr32(dev, NV47_PGRAPH_TILE(i), addr);
+ nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), pitch);
+ nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), limit);
+ nv_wr32(dev, NV40_PGRAPH_TILE1(i), addr);
+ break;
+
+ default:
+ nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch);
+ nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit);
+ nv_wr32(dev, NV20_PGRAPH_TILE(i), addr);
+ nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), pitch);
+ nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), limit);
+ nv_wr32(dev, NV40_PGRAPH_TILE1(i), addr);
+ break;
+ }
+}
+
/*
* G70 0x47
* G71 0x49
@@ -347,7 +389,8 @@ nv40_graph_init(struct drm_device *dev)
{
struct drm_nouveau_private *dev_priv =
(struct drm_nouveau_private *)dev->dev_private;
- uint32_t vramsz, tmp;
+ struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
+ uint32_t vramsz;
int i, j;
nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) &
@@ -425,74 +468,9 @@ nv40_graph_init(struct drm_device *dev)
nv_wr32(dev, 0x400b38, 0x2ffff800);
nv_wr32(dev, 0x400b3c, 0x00006000);
- /* copy tile info from PFB */
- switch (dev_priv->chipset) {
- case 0x40: /* vanilla NV40 */
- for (i = 0; i < NV10_PFB_TILE__SIZE; i++) {
- tmp = nv_rd32(dev, NV10_PFB_TILE(i));
- nv_wr32(dev, NV40_PGRAPH_TILE0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TILE1(i), tmp);
- tmp = nv_rd32(dev, NV10_PFB_TLIMIT(i));
- nv_wr32(dev, NV40_PGRAPH_TLIMIT0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tmp);
- tmp = nv_rd32(dev, NV10_PFB_TSIZE(i));
- nv_wr32(dev, NV40_PGRAPH_TSIZE0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), tmp);
- tmp = nv_rd32(dev, NV10_PFB_TSTATUS(i));
- nv_wr32(dev, NV40_PGRAPH_TSTATUS0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TSTATUS1(i), tmp);
- }
- break;
- case 0x44:
- case 0x4a:
- case 0x4e: /* NV44-based cores don't have 0x406900? */
- for (i = 0; i < NV40_PFB_TILE__SIZE_0; i++) {
- tmp = nv_rd32(dev, NV40_PFB_TILE(i));
- nv_wr32(dev, NV40_PGRAPH_TILE0(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TLIMIT(i));
- nv_wr32(dev, NV40_PGRAPH_TLIMIT0(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TSIZE(i));
- nv_wr32(dev, NV40_PGRAPH_TSIZE0(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TSTATUS(i));
- nv_wr32(dev, NV40_PGRAPH_TSTATUS0(i), tmp);
- }
- break;
- case 0x46:
- case 0x47:
- case 0x49:
- case 0x4b: /* G7X-based cores */
- for (i = 0; i < NV40_PFB_TILE__SIZE_1; i++) {
- tmp = nv_rd32(dev, NV40_PFB_TILE(i));
- nv_wr32(dev, NV47_PGRAPH_TILE0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TILE1(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TLIMIT(i));
- nv_wr32(dev, NV47_PGRAPH_TLIMIT0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TSIZE(i));
- nv_wr32(dev, NV47_PGRAPH_TSIZE0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TSTATUS(i));
- nv_wr32(dev, NV47_PGRAPH_TSTATUS0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TSTATUS1(i), tmp);
- }
- break;
- default: /* everything else */
- for (i = 0; i < NV40_PFB_TILE__SIZE_0; i++) {
- tmp = nv_rd32(dev, NV40_PFB_TILE(i));
- nv_wr32(dev, NV40_PGRAPH_TILE0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TILE1(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TLIMIT(i));
- nv_wr32(dev, NV40_PGRAPH_TLIMIT0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TSIZE(i));
- nv_wr32(dev, NV40_PGRAPH_TSIZE0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), tmp);
- tmp = nv_rd32(dev, NV40_PFB_TSTATUS(i));
- nv_wr32(dev, NV40_PGRAPH_TSTATUS0(i), tmp);
- nv_wr32(dev, NV40_PGRAPH_TSTATUS1(i), tmp);
- }
- break;
- }
+ /* Turn all the tiling regions off. */
+ for (i = 0; i < pfb->num_tiles; i++)
+ nv40_graph_set_region_tiling(dev, i, 0, 0, 0);
/* begin RAM config */
vramsz = drm_get_resource_len(dev, 0) - 1;
--
1.6.4.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCHv2 2/3] drm/nouveau: Make the MM aware of pre-G80 tiling.
[not found] ` <1260556404-13389-2-git-send-email-currojerez-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org>
@ 2009-12-14 3:10 ` Francisco Jerez
0 siblings, 0 replies; 7+ messages in thread
From: Francisco Jerez @ 2009-12-14 3:10 UTC (permalink / raw)
To: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Signed-off-by: Francisco Jerez <currojerez-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org>
---
v2: Fix the alignment requirements for some cards, and make
PFIFO stalling more reliable.
drivers/gpu/drm/nouveau/nouveau_bo.c | 221 +++++++++++++++++++++++----------
drivers/gpu/drm/nouveau/nouveau_drv.h | 22 ++++
drivers/gpu/drm/nouveau/nouveau_mem.c | 87 +++++++++++++
3 files changed, 265 insertions(+), 65 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 320a14b..7bcec6c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -37,6 +37,7 @@ static void
nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
{
struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
+ struct drm_device *dev = dev_priv->dev;
struct nouveau_bo *nvbo = nouveau_bo(bo);
ttm_bo_kunmap(&nvbo->kmap);
@@ -44,12 +45,83 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
if (unlikely(nvbo->gem))
DRM_ERROR("bo %p still attached to GEM object\n", bo);
+ if (nvbo->tile)
+ nv10_mem_expire_tiling(dev, nvbo->tile, NULL);
+
spin_lock(&dev_priv->ttm.bo_list_lock);
list_del(&nvbo->head);
spin_unlock(&dev_priv->ttm.bo_list_lock);
kfree(nvbo);
}
+static void
+nouveau_bo_fixup_align(struct drm_device *dev,
+ uint32_t tile_mode, uint32_t tile_flags,
+ int *align, int *size)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+
+ /*
+ * Some of the tile_flags have a periodic structure of N*4096 bytes,
+ * align to to that as well as the page size. Overallocate memory to
+ * avoid corruption of other buffer objects.
+ */
+ if (dev_priv->card_type == NV_50) {
+ switch (tile_flags) {
+ case 0x1800:
+ case 0x2800:
+ case 0x4800:
+ case 0x7a00:
+ if (dev_priv->chipset >= 0xA0) {
+ /* This is based on high end cards with 448 bits
+ * memory bus, could be different elsewhere.*/
+ *size += 6 * 28672;
+ /* 8 * 28672 is the actual alignment requirement
+ * but we must also align to page size. */
+ *align = 2 * 8 * 28672;
+ } else if (dev_priv->chipset >= 0x90) {
+ *size += 3 * 16384;
+ *align = 12 * 16834;
+ } else {
+ *size += 3 * 8192;
+ /* 12 * 8192 is the actual alignment requirement
+ * but we must also align to page size. */
+ *align = 2 * 12 * 8192;
+ }
+ break;
+ default:
+ break;
+ }
+
+ } else {
+ if (tile_mode) {
+ if (dev_priv->chipset >= 0x40) {
+ *align = 65536;
+ *size = roundup(*size, 64 * tile_mode);
+
+ } else if (dev_priv->chipset >= 0x30) {
+ *align = 32768;
+ *size = roundup(*size, 64 * tile_mode);
+
+ } else if (dev_priv->chipset >= 0x20) {
+ *align = 16384;
+ *size = roundup(*size, 32 * tile_mode);
+
+ } else if (dev_priv->chipset >= 0x10) {
+ *align = 16384;
+ *size = roundup(*size, 16 * tile_mode);
+ }
+ }
+ }
+
+ *size = ALIGN(*size, PAGE_SIZE);
+
+ if (dev_priv->card_type == NV_50) {
+ *size = ALIGN(*size, 65536);
+ *align = max(65536, *align);
+ }
+}
+
int
nouveau_bo_new(struct drm_device *dev, struct nouveau_channel *chan,
int size, int align, uint32_t flags, uint32_t tile_mode,
@@ -70,46 +142,9 @@ nouveau_bo_new(struct drm_device *dev, struct nouveau_channel *chan,
nvbo->tile_mode = tile_mode;
nvbo->tile_flags = tile_flags;
- /*
- * Some of the tile_flags have a periodic structure of N*4096 bytes,
- * align to to that as well as the page size. Overallocate memory to
- * avoid corruption of other buffer objects.
- */
- switch (tile_flags) {
- case 0x1800:
- case 0x2800:
- case 0x4800:
- case 0x7a00:
- if (dev_priv->chipset >= 0xA0) {
- /* This is based on high end cards with 448 bits
- * memory bus, could be different elsewhere.*/
- size += 6 * 28672;
- /* 8 * 28672 is the actual alignment requirement,
- * but we must also align to page size. */
- align = 2 * 8 * 28672;
- } else if (dev_priv->chipset >= 0x90) {
- size += 3 * 16384;
- align = 12 * 16834;
- } else {
- size += 3 * 8192;
- /* 12 * 8192 is the actual alignment requirement,
- * but we must also align to page size. */
- align = 2 * 12 * 8192;
- }
- break;
- default:
- break;
- }
-
+ nouveau_bo_fixup_align(dev, tile_mode, tile_flags, &align, &size);
align >>= PAGE_SHIFT;
- size = (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
- if (dev_priv->card_type == NV_50) {
- size = (size + 65535) & ~65535;
- if (align < (65536 / PAGE_SIZE))
- align = (65536 / PAGE_SIZE);
- }
-
if (flags & TTM_PL_FLAG_VRAM)
nvbo->placements[n++] = TTM_PL_FLAG_VRAM | TTM_PL_MASK_CACHING;
if (flags & TTM_PL_FLAG_TT)
@@ -408,6 +443,7 @@ nouveau_bo_evict_flags(struct ttm_buffer_object *bo, struct ttm_placement *pl)
/* GPU-assisted copy using NV_MEMORY_TO_MEMORY_FORMAT, can access
* TTM_PL_{VRAM,TT} directly.
*/
+
static int
nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan,
struct nouveau_bo *nvbo, bool evict, bool no_wait,
@@ -442,11 +478,12 @@ nouveau_bo_mem_ctxdma(struct nouveau_bo *nvbo, struct nouveau_channel *chan,
}
static int
-nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, int no_wait,
- struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
+ int no_wait, struct ttm_mem_reg *new_mem)
{
struct nouveau_bo *nvbo = nouveau_bo(bo);
struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
+ struct ttm_mem_reg *old_mem = &bo->mem;
struct nouveau_channel *chan;
uint64_t src_offset, dst_offset;
uint32_t page_count;
@@ -549,7 +586,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
if (ret)
goto out;
- ret = nouveau_bo_move_m2mf(bo, true, no_wait, &bo->mem, &tmp_mem);
+ ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait, &tmp_mem);
if (ret)
goto out;
@@ -587,7 +624,7 @@ nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr,
if (ret)
goto out;
- ret = nouveau_bo_move_m2mf(bo, true, no_wait, &bo->mem, new_mem);
+ ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait, new_mem);
if (ret)
goto out;
@@ -602,51 +639,105 @@ out:
}
static int
-nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
- bool no_wait, struct ttm_mem_reg *new_mem)
+nouveau_bo_vm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_mem,
+ struct nouveau_tile_reg **new_tile)
{
struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
- struct nouveau_bo *nvbo = nouveau_bo(bo);
struct drm_device *dev = dev_priv->dev;
- struct ttm_mem_reg *old_mem = &bo->mem;
+ struct nouveau_bo *nvbo = nouveau_bo(bo);
+ uint64_t offset;
int ret;
- if (dev_priv->card_type == NV_50 && new_mem->mem_type == TTM_PL_VRAM &&
- !nvbo->no_vm) {
- uint64_t offset = new_mem->mm_node->start << PAGE_SHIFT;
+ if (nvbo->no_vm || new_mem->mem_type != TTM_PL_VRAM) {
+ /* Nothing to do. */
+ *new_tile = NULL;
+ return 0;
+ }
+
+ offset = new_mem->mm_node->start << PAGE_SHIFT;
+ if (dev_priv->card_type == NV_50) {
ret = nv50_mem_vm_bind_linear(dev,
offset + dev_priv->vm_vram_base,
new_mem->size, nvbo->tile_flags,
offset);
if (ret)
return ret;
+
+ } else if (dev_priv->card_type >= NV_10) {
+ *new_tile = nv10_mem_set_tiling(dev, offset, new_mem->size,
+ nvbo->tile_mode);
}
- if (dev_priv->init_state != NOUVEAU_CARD_INIT_DONE)
- return ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
+ return 0;
+}
+static void
+nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
+ struct nouveau_tile_reg *new_tile,
+ struct nouveau_tile_reg **old_tile)
+{
+ struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
+ struct drm_device *dev = dev_priv->dev;
+
+ if (dev_priv->card_type >= NV_10 &&
+ dev_priv->card_type < NV_50) {
+ if (*old_tile)
+ nv10_mem_expire_tiling(dev, *old_tile, bo->sync_obj);
+
+ *old_tile = new_tile;
+ }
+}
+
+static int
+nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
+ bool no_wait, struct ttm_mem_reg *new_mem)
+{
+ struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
+ struct nouveau_bo *nvbo = nouveau_bo(bo);
+ struct ttm_mem_reg *old_mem = &bo->mem;
+ struct nouveau_tile_reg *new_tile = NULL;
+ int ret = 0;
+
+ ret = nouveau_bo_vm_bind(bo, new_mem, &new_tile);
+ if (ret)
+ return ret;
+
+ /* Software copy if the card isn't up and running yet. */
+ if (dev_priv->init_state != NOUVEAU_CARD_INIT_DONE) {
+ ret = ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
+ goto out;
+ }
+
+ /* Fake bo copy. */
if (old_mem->mem_type == TTM_PL_SYSTEM && !bo->ttm) {
BUG_ON(bo->mem.mm_node != NULL);
bo->mem = *new_mem;
new_mem->mm_node = NULL;
- return 0;
+ goto out;
}
- if (new_mem->mem_type == TTM_PL_SYSTEM) {
- if (old_mem->mem_type == TTM_PL_SYSTEM)
- return ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
- if (nouveau_bo_move_flipd(bo, evict, intr, no_wait, new_mem))
- return ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
- } else if (old_mem->mem_type == TTM_PL_SYSTEM) {
- if (nouveau_bo_move_flips(bo, evict, intr, no_wait, new_mem))
- return ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
- } else {
- if (nouveau_bo_move_m2mf(bo, evict, no_wait, old_mem, new_mem))
- return ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
- }
+ /* Hardware assisted copy. */
+ if (new_mem->mem_type == TTM_PL_SYSTEM)
+ ret = nouveau_bo_move_flipd(bo, evict, intr, no_wait, new_mem);
+ else if (old_mem->mem_type == TTM_PL_SYSTEM)
+ ret = nouveau_bo_move_flips(bo, evict, intr, no_wait, new_mem);
+ else
+ ret = nouveau_bo_move_m2mf(bo, evict, intr, no_wait, new_mem);
- return 0;
+ if (!ret)
+ goto out;
+
+ /* Fallback to software copy. */
+ ret = ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
+
+out:
+ if (ret)
+ nouveau_bo_vm_cleanup(bo, NULL, &new_tile);
+ else
+ nouveau_bo_vm_cleanup(bo, new_tile, &nvbo->tile);
+
+ return ret;
}
static int
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 40b4a37..6e12a3d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -58,11 +58,19 @@ struct nouveau_fpriv {
#define MAX_NUM_DCB_ENTRIES 16
#define NOUVEAU_MAX_CHANNEL_NR 128
+#define NOUVEAU_MAX_TILE_NR 15
#define NV50_VM_MAX_VRAM (2*1024*1024*1024ULL)
#define NV50_VM_BLOCK (512*1024*1024ULL)
#define NV50_VM_VRAM_NR (NV50_VM_MAX_VRAM / NV50_VM_BLOCK)
+struct nouveau_tile_reg {
+ struct nouveau_fence *fence;
+ uint32_t addr;
+ uint32_t size;
+ bool used;
+};
+
struct nouveau_bo {
struct ttm_buffer_object bo;
struct ttm_placement placement;
@@ -82,6 +90,7 @@ struct nouveau_bo {
uint32_t tile_mode;
uint32_t tile_flags;
+ struct nouveau_tile_reg *tile;
struct drm_gem_object *gem;
struct drm_file *cpu_filp;
@@ -554,6 +563,12 @@ struct drm_nouveau_private {
unsigned long sg_handle;
} gart_info;
+ /* nv10-nv40 tiling regions */
+ struct {
+ struct nouveau_tile_reg reg[NOUVEAU_MAX_TILE_NR];
+ spinlock_t lock;
+ } tile;
+
/* G8x/G9x virtual address space */
uint64_t vm_gart_base;
uint64_t vm_gart_size;
@@ -690,6 +705,13 @@ extern void nouveau_mem_release(struct drm_file *, struct mem_block *heap);
extern int nouveau_mem_init(struct drm_device *);
extern int nouveau_mem_init_agp(struct drm_device *);
extern void nouveau_mem_close(struct drm_device *);
+extern struct nouveau_tile_reg *nv10_mem_set_tiling(struct drm_device *dev,
+ uint32_t addr,
+ uint32_t size,
+ uint32_t pitch);
+extern void nv10_mem_expire_tiling(struct drm_device *dev,
+ struct nouveau_tile_reg *tile,
+ struct nouveau_fence *fence);
extern int nv50_mem_vm_bind_linear(struct drm_device *, uint64_t virt,
uint32_t size, uint32_t flags,
uint64_t phys);
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index 0275571..c0b68e0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -192,6 +192,92 @@ void nouveau_mem_release(struct drm_file *file_priv, struct mem_block *heap)
}
/*
+ * NV10-NV40 tiling helpers
+ */
+
+static void
+nv10_mem_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
+ uint32_t size, uint32_t pitch)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
+ struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
+ struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
+ struct nouveau_tile_reg *tile = &dev_priv->tile.reg[i];
+
+ tile->addr = addr;
+ tile->size = size;
+ tile->used = !!pitch;
+ nouveau_fence_unref((void **)&tile->fence);
+
+ if (!pfifo->cache_flush(dev))
+ return;
+
+ pfifo->reassign(dev, false);
+ pfifo->cache_flush(dev);
+ pfifo->cache_pull(dev, false);
+
+ nouveau_wait_for_idle(dev);
+
+ pgraph->set_region_tiling(dev, i, addr, size, pitch);
+ pfb->set_region_tiling(dev, i, addr, size, pitch);
+
+ pfifo->cache_pull(dev, true);
+ pfifo->reassign(dev, true);
+}
+
+struct nouveau_tile_reg *
+nv10_mem_set_tiling(struct drm_device *dev, uint32_t addr, uint32_t size,
+ uint32_t pitch)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
+ struct nouveau_tile_reg *tile = dev_priv->tile.reg, *found = NULL;
+ int i;
+
+ spin_lock(&dev_priv->tile.lock);
+
+ for (i = 0; i < pfb->num_tiles; i++) {
+ if (tile[i].used)
+ /* Tile region in use. */
+ continue;
+
+ if (tile[i].fence &&
+ !nouveau_fence_signalled(tile[i].fence, NULL))
+ /* Pending tile region. */
+ continue;
+
+ if (max(tile[i].addr, addr) <
+ min(tile[i].addr + tile[i].size, addr + size))
+ /* Kill an intersecting tile region. */
+ nv10_mem_set_region_tiling(dev, i, 0, 0, 0);
+
+ if (pitch && !found) {
+ /* Free tile region. */
+ nv10_mem_set_region_tiling(dev, i, addr, size, pitch);
+ found = &tile[i];
+ }
+ }
+
+ spin_unlock(&dev_priv->tile.lock);
+
+ return found;
+}
+
+void
+nv10_mem_expire_tiling(struct drm_device *dev, struct nouveau_tile_reg *tile,
+ struct nouveau_fence *fence)
+{
+ if (fence) {
+ /* Mark it as pending. */
+ tile->fence = fence;
+ nouveau_fence_ref(fence);
+ }
+
+ tile->used = false;
+}
+
+/*
* NV50 VM helpers
*/
int
@@ -509,6 +595,7 @@ nouveau_mem_init(struct drm_device *dev)
INIT_LIST_HEAD(&dev_priv->ttm.bo_list);
spin_lock_init(&dev_priv->ttm.bo_list_lock);
+ spin_lock_init(&dev_priv->tile.lock);
dev_priv->fb_available_size = nouveau_mem_fb_amount(dev);
--
1.6.4.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
end of thread, other threads:[~2009-12-14 3:10 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-12-11 18:33 [PATCH 1/3] drm/nouveau: Pre-G80 tiling support Francisco Jerez
[not found] ` <1260556404-13389-1-git-send-email-currojerez-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org>
2009-12-11 18:33 ` [PATCH 2/3] drm/nouveau: Make the MM aware of pre-G80 tiling Francisco Jerez
[not found] ` <1260556404-13389-2-git-send-email-currojerez-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org>
2009-12-14 3:10 ` [PATCHv2 " Francisco Jerez
2009-12-11 18:33 ` [PATCH 3/3] drm/nouveau: Fix up buffer eviction, and evict them to GART, if possible Francisco Jerez
2009-12-11 20:18 ` [PATCH 1/3] drm/nouveau: Pre-G80 tiling support Jimmy Rentz
[not found] ` <87tyvxnrtp.fsf@riseup.net>
[not found] ` <87tyvxnrtp.fsf-sGOZH3hwPm2sTnJN9+BGXg@public.gmane.org>
2009-12-11 21:28 ` Francisco Jerez
2009-12-14 3:03 ` [PATCHv2 " Francisco Jerez
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.