* nv50: wfb patches
@ 2009-03-29 23:54 Maarten Maathuis
[not found] ` <6d4bc9fc0903291654r70e3e6f0pd6f56874a6c361f7-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
0 siblings, 1 reply; 6+ messages in thread
From: Maarten Maathuis @ 2009-03-29 23:54 UTC (permalink / raw)
To: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
[-- Attachment #1: Type: text/plain, Size: 607 bytes --]
Not much has changed, except that i've test run these patches for a
few weeks now.
I'm actually surprised i've avoided Out-Of-Memory issues.
Patch 2 remains a temporary hack, awaiting a structural fix by darktama.
The whole thing works fine, but XSHM is an issue
(http://stillunknown.livejournal.com/928.html). With it disabled most
apps are fine, although a few issues remain.
As long as rendering isn't too dependent on software rendering (in
pixman) then things are fine (better?).
I will be away for some time in a few weeks, so i'm not sure if i will
see the proper solution to patch 2.
Maarten.
[-- Attachment #2: 0001-nv50-implement-wfb.patch --]
[-- Type: text/x-patch, Size: 17719 bytes --]
From 3425f32eb0d5c664cd5a4141812bc002960de795 Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <madman2003@gmail.com>
Date: Sat, 7 Mar 2009 23:49:19 +0100
Subject: [PATCH 1/6] nv50: implement wfb
- Only for sufficiently new xserver's and exa_driver_pixmaps.
---
src/nouveau_exa.c | 282 +++++++++++++++++++++++++++++++++++++++++++++++++++--
src/nv50_exa.c | 31 +++++-
src/nv_driver.c | 52 ++++++++--
src/nv_proto.h | 10 ++
src/nv_type.h | 4 +-
5 files changed, 357 insertions(+), 22 deletions(-)
diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c
index b7bcc87..74804ec 100644
--- a/src/nouveau_exa.c
+++ b/src/nouveau_exa.c
@@ -89,7 +89,7 @@ NVAccelDownloadM2MF(PixmapPtr pspix, int x, int y, int w, int h,
if (!linear) {
BEGIN_RING(chan, m2mf, 0x0200, 7);
OUT_RING (chan, 0);
- OUT_RING (chan, 0);
+ OUT_RING (chan, nv50_exa_get_tile_mode(pspix));
OUT_RING (chan, pspix->drawable.width * cpp);
OUT_RING (chan, pspix->drawable.height);
OUT_RING (chan, 1);
@@ -210,7 +210,7 @@ NVAccelUploadM2MF(PixmapPtr pdpix, int x, int y, int w, int h,
if (!linear) {
BEGIN_RING(chan, m2mf, 0x021c, 7);
OUT_RING (chan, 0);
- OUT_RING (chan, 0);
+ OUT_RING (chan, nv50_exa_get_tile_mode(pdpix));
OUT_RING (chan, pdpix->drawable.width * cpp);
OUT_RING (chan, pdpix->drawable.height);
OUT_RING (chan, 1);
@@ -259,7 +259,11 @@ nouveau_exa_mark_sync(ScreenPtr pScreen)
static void
nouveau_exa_wait_marker(ScreenPtr pScreen, int marker)
{
- NVSync(xf86Screens[pScreen->myNum]);
+ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
+ NVPtr pNv = NVPTR(pScrn);
+
+ if (!pNv->exa_driver_pixmaps)
+ NVSync(xf86Screens[pScreen->myNum]);
}
static Bool
@@ -351,17 +355,32 @@ nouveau_exa_modify_pixmap_header(PixmapPtr ppix, int width, int height,
if (!nvpix->bo && nvpix->size) {
uint32_t cpp = ppix->drawable.bitsPerPixel >> 3;
- /* At some point we should just keep 1bpp pixmaps in sysram */
uint32_t flags = NOUVEAU_BO_VRAM;
int ret;
if (pNv->Architecture >= NV_ARCH_50 && cpp) {
- uint32_t aw = (width + 7) & ~7;
- uint32_t ah = (height + 7) & ~7;
+ uint32_t ah;
+ if (height > 47) {
+ ah = (height + 63) & ~63;
+ nvpix->tiling_mode = 5;
+ } else if (height > 23) {
+ ah = (height + 31) & ~31;
+ nvpix->tiling_mode = 4;
+ } else if (height > 11) {
+ ah = (height + 15) & ~15;
+ nvpix->tiling_mode = 3;
+ } else if (height > 5) {
+ ah = (height + 7) & ~7;
+ nvpix->tiling_mode = 2;
+ } else {
+ ah = (height + 3) & ~3;
+ nvpix->tiling_mode = 1;
+ }
flags |= NOUVEAU_BO_TILED;
- devkind = ((aw * cpp) + 63) & ~63;
+ /* This allignment is very important. */
+ devkind = (width * cpp + 63) & ~63;
nvpix->size = devkind * ah;
}
@@ -390,8 +409,11 @@ nouveau_exa_pixmap_is_tiled(PixmapPtr ppix)
NVPtr pNv = NVPTR(pScrn);
if (pNv->exa_driver_pixmaps) {
- if (!nouveau_pixmap_bo(ppix)->tiled)
+ if (!nouveau_pixmap_bo(ppix))
+ return false;
+ if (nouveau_pixmap_bo(ppix)->tiled == 0)
return false;
+ return true;
} else
if (pNv->Architecture < NV_ARCH_50 ||
exaGetPixmapOffset(ppix) < pNv->EXADriverPtr->offScreenBase)
@@ -403,10 +425,12 @@ nouveau_exa_pixmap_is_tiled(PixmapPtr ppix)
static void *
nouveau_exa_pixmap_map(PixmapPtr ppix)
{
+ ScrnInfoPtr pScrn = xf86Screens[ppix->drawable.pScreen->myNum];
+ NVPtr pNv = NVPTR(pScrn);
struct nouveau_bo *bo = nouveau_pixmap_bo(ppix);
unsigned delta = nouveau_pixmap_offset(ppix);
- if (bo->tiled) {
+ if (!pNv->wfb_enabled && bo->tiled) {
struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix);
nvpix->map_refcount++;
@@ -430,9 +454,11 @@ nouveau_exa_pixmap_map(PixmapPtr ppix)
static void
nouveau_exa_pixmap_unmap(PixmapPtr ppix)
{
+ ScrnInfoPtr pScrn = xf86Screens[ppix->drawable.pScreen->myNum];
+ NVPtr pNv = NVPTR(pScrn);
struct nouveau_bo *bo = nouveau_pixmap_bo(ppix);
- if (bo->tiled) {
+ if (!pNv->wfb_enabled && bo->tiled) {
struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix);
if (--nvpix->map_refcount)
@@ -682,3 +708,239 @@ nouveau_exa_init(ScreenPtr pScreen)
pNv->EXADriverPtr = exa;
return TRUE;
}
+
+/* WFB functions. */
+
+static inline FbBits
+nouveau_exa_wfb_read_memory_linear(const void *src, int size)
+{
+ FbBits bits = 0;
+
+ memcpy(&bits, src, size);
+
+ return bits;
+}
+
+static inline void
+nouveau_exa_wfb_write_memory_linear(void *dst, FbBits value, int size)
+{
+ memcpy(dst, &value, size);
+}
+
+#define LINEAR_PITCH (pPixmap->devKind)
+
+/* Wfb related data. */
+static struct {
+ PixmapPtr ppix;
+ bool used;
+ bool tiled;
+ unsigned long start;
+ unsigned long end;
+ uint64_t multiply_factor;
+ uint8_t cpp;
+ unsigned int tile_height;
+ unsigned int num_tiles_width;
+} wfb_pixmaps[6];
+
+/* height: empty, 2, 4, 8, 16, 32, 64 */
+const unsigned int num_tiles[] = { 0, 0, 4, 2, 1, 1, 1 };
+const unsigned int tile_pitch[] = { 0, 3, 5, 6, 6, 6, 6 };
+const unsigned int mask_height_inv[] = {~0, ~1, ~3, ~7, ~15, ~31, ~63 };
+const unsigned int mask_pitch_inv[] = {~0, ~7, ~31, ~63, ~63, ~63, ~63 };
+const unsigned int mask_height[] = {0, 1, 3, 7, 15, 31, 63 };
+const unsigned int mask_pitch[] = {0, 7, 31, 63, 63, 63, 63 };
+
+#define X_REMAINDER (x & mask_pitch[tile_height])
+#define Y_REMAINDER (y & mask_height[tile_height])
+
+/* tile_height and tile_pitch are expressed in powers of two */
+static inline unsigned int
+nouveau_exa_wfb_recurse_offset(unsigned int offset, unsigned int tile_height,
+ unsigned int x, unsigned int y, unsigned int num_tiles_width)
+{
+ offset += (((x & mask_pitch_inv[tile_height]) >> tile_pitch[tile_height]) + (((y & mask_height_inv[tile_height]) >> tile_height) * num_tiles_width)) * (1 << (tile_height + tile_pitch[tile_height]));
+
+ if (tile_height > 1)
+ offset = nouveau_exa_wfb_recurse_offset(offset, tile_height - 1, X_REMAINDER, Y_REMAINDER, num_tiles[tile_height]);
+ else
+ offset += (Y_REMAINDER * (1 << tile_pitch[tile_height])) + X_REMAINDER;
+
+ return offset;
+}
+
+/* Note, we can only expose one read and write function, the linear versions are for internal consumption. */
+static FbBits
+nouveau_exa_wfb_read_memory(const void *src, int size)
+{
+ int i;
+ uint64_t line_x, line_y;
+ unsigned long offset = (unsigned long) src, subpixel_offset;
+ PixmapPtr pPixmap = NULL;
+ FbBits bits = 0;
+ void *new_src;
+
+ /* Find the right pixmap. */
+ for (i = 0; i < 6; i++)
+ if (offset >= wfb_pixmaps[i].start && offset < wfb_pixmaps[i].end) {
+ pPixmap = wfb_pixmaps[i].ppix;
+ break;
+ }
+
+ if (!pPixmap || !wfb_pixmaps[i].tiled)
+ return nouveau_exa_wfb_read_memory_linear(src, size);
+
+ /* Now comes the decoding. */
+ offset -= (unsigned long) pPixmap->devPrivate.ptr;
+ /* Assuming dword alligned offsets. */
+ subpixel_offset = offset & (wfb_pixmaps[i].cpp - 1);
+ offset &= ~(wfb_pixmaps[i].cpp - 1);
+
+ /* Determine the coordinate first. */
+ /* Division is too expensive for large numbers, so we precalculate a multiplication factor. */
+ line_y = (offset * wfb_pixmaps[i].multiply_factor) >> 32;
+ line_x = offset - line_y * LINEAR_PITCH;
+
+ new_src = pPixmap->devPrivate.ptr +
+ nouveau_exa_wfb_recurse_offset(0, wfb_pixmaps[i].tile_height, line_x, line_y, wfb_pixmaps[i].num_tiles_width) +
+ subpixel_offset;
+
+ memcpy(&bits, new_src, size);
+
+ return bits;
+}
+
+static void
+nouveau_exa_wfb_write_memory(void *dst, FbBits value, int size)
+{
+ int i;
+ uint64_t line_x, line_y;
+ unsigned long offset = (unsigned long) dst, subpixel_offset;
+ PixmapPtr pPixmap = NULL;
+ void *new_dst;
+
+ /* Find the right pixmap. */
+ for (i = 0; i < 6; i++)
+ if (offset >= wfb_pixmaps[i].start && offset < wfb_pixmaps[i].end) {
+ pPixmap = wfb_pixmaps[i].ppix;
+ break;
+ }
+
+ if (!pPixmap || !wfb_pixmaps[i].tiled) {
+ nouveau_exa_wfb_write_memory_linear(dst, value, size);
+ return;
+ }
+
+ /* Now comes the decoding. */
+ offset -= (unsigned long) pPixmap->devPrivate.ptr;
+ /* Assuming dword alligned offsets. */
+ subpixel_offset = offset & (wfb_pixmaps[i].cpp - 1);
+ offset &= ~(wfb_pixmaps[i].cpp - 1);
+
+ /* Determine the coordinate first. */
+ /* Division is too expensive for large numbers, so we precalculate a multiplication factor. */
+ line_y = (offset * wfb_pixmaps[i].multiply_factor) >> 32;
+ line_x = offset - line_y * LINEAR_PITCH;
+
+ new_dst = pPixmap->devPrivate.ptr +
+ nouveau_exa_wfb_recurse_offset(0, wfb_pixmaps[i].tile_height, line_x, line_y, wfb_pixmaps[i].num_tiles_width) +
+ subpixel_offset;
+
+ memcpy(new_dst, &value, size);
+}
+
+void
+nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
+ WriteMemoryProcPtr *pWrite,
+ DrawablePtr pDraw)
+{
+ PixmapPtr pPixmap;
+ struct nouveau_pixmap *nvpix;
+
+ if (!pRead || !pWrite)
+ return;
+
+ pPixmap = NVGetDrawablePixmap(pDraw);
+ if (!pPixmap)
+ return;
+
+ nvpix = nouveau_pixmap(pPixmap);
+
+ int i;
+ for (i = 0; i < 6; i++)
+ if (!wfb_pixmaps[i].used)
+ break;
+
+ if (i == 6) {
+ ErrorF("More than 6 wraps are setup, what the hell is going on?\n");
+ *pRead = NULL;
+ *pWrite = NULL;
+ return;
+ }
+
+ /* We will get a pointer, somewhere in the range of this pixmap. */
+ /* Based on linear representation ofcource. */
+ wfb_pixmaps[i].ppix = pPixmap;
+ wfb_pixmaps[i].start = (unsigned long) pPixmap->devPrivate.ptr;
+ if (!nvpix || !nvpix->bo)
+ wfb_pixmaps[i].end = wfb_pixmaps[i].start;
+ else
+ wfb_pixmaps[i].end = wfb_pixmaps[i].start + nvpix->bo->size;
+ wfb_pixmaps[i].used = true;
+ wfb_pixmaps[i].tiled = nouveau_exa_pixmap_is_tiled(pPixmap);
+ /* Division is too expensive for large numbers, so we precalculate a multiplication factor. */
+ wfb_pixmaps[i].multiply_factor = (0xFFFFFFFF/exaGetPixmapPitch(pPixmap)) + 1;
+ wfb_pixmaps[i].cpp = (pPixmap->drawable.bitsPerPixel >> 3);
+ if (!nvpix) {
+ wfb_pixmaps[i].tile_height = 0;
+ wfb_pixmaps[i].num_tiles_width = 0;
+ } else {
+ wfb_pixmaps[i].tile_height = nvpix->tiling_mode + 1;
+ wfb_pixmaps[i].num_tiles_width = LINEAR_PITCH/(1 << tile_pitch[wfb_pixmaps[i].tile_height]);
+ }
+
+ *pRead = nouveau_exa_wfb_read_memory;
+ *pWrite = nouveau_exa_wfb_write_memory;
+}
+
+void
+nouveau_exa_wfb_finish_wrap(DrawablePtr pDraw)
+{
+ PixmapPtr pPixmap;
+ int i;
+
+ pPixmap = NVGetDrawablePixmap(pDraw);
+ if (!pPixmap)
+ return;
+
+ for (i = 0; i < 6; i++)
+ if (wfb_pixmaps[i].ppix == pPixmap) {
+ wfb_pixmaps[i].ppix = NULL;
+ wfb_pixmaps[i].start = 0;
+ wfb_pixmaps[i].end = 0;
+ wfb_pixmaps[i].used = false;
+ wfb_pixmaps[i].tiled = false;
+ wfb_pixmaps[i].multiply_factor = 0;
+ wfb_pixmaps[i].cpp = 0;
+ wfb_pixmaps[i].tile_height = 0;
+ wfb_pixmaps[i].num_tiles_width = 0;
+ break;
+ }
+}
+
+void
+nouveau_exa_wfb_init()
+{
+ int i;
+
+ for (i = 0; i < 6; i++) {
+ wfb_pixmaps[i].ppix = NULL;
+ wfb_pixmaps[i].start = 0;
+ wfb_pixmaps[i].end = 0;
+ wfb_pixmaps[i].used = false;
+ wfb_pixmaps[i].tiled = false;
+ wfb_pixmaps[i].multiply_factor = 0;
+ wfb_pixmaps[i].cpp = 0;
+ wfb_pixmaps[i].tile_height = 0;
+ wfb_pixmaps[i].num_tiles_width = 0;
+ }
+}
diff --git a/src/nv50_exa.c b/src/nv50_exa.c
index 3831ec3..ac6b6b4 100644
--- a/src/nv50_exa.c
+++ b/src/nv50_exa.c
@@ -72,6 +72,31 @@ NV50EXABlendOp[] = {
/* Add */ { 0, 0, BF( ONE), BF( ONE) },
};
+
+uint32_t
+nv50_exa_get_tile_mode(PixmapPtr ppix)
+{
+ struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix);
+
+ if (!nvpix)
+ return 0x00;
+
+ switch (nvpix->tiling_mode) {
+ case 1: /* pitch 32, height 4 */
+ return 0x00;
+ case 2: /* pitch 64, height 8 */
+ return 0x10;
+ case 3: /* pitch 64, height 16 */
+ return 0x20;
+ case 4: /* pitch 64, height 32 */
+ return 0x30;
+ case 5: /* pitch 64, height 64 */
+ return 0x40;
+ default:
+ return 0x00;
+ }
+}
+
static Bool
NV50EXA2DSurfaceFormat(PixmapPtr ppix, uint32_t *fmt)
{
@@ -128,7 +153,7 @@ NV50EXAAcquireSurface2D(PixmapPtr ppix, int is_src)
BEGIN_RING(chan, eng2d, mthd, 5);
OUT_RING (chan, fmt);
OUT_RING (chan, 0);
- OUT_RING (chan, 0);
+ OUT_RING (chan, nv50_exa_get_tile_mode(ppix));
OUT_RING (chan, 1);
OUT_RING (chan, 0);
}
@@ -457,7 +482,7 @@ NV50EXARenderTarget(PixmapPtr ppix, PicturePtr ppict)
OUT_RELOCh(chan, bo, delta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
OUT_RELOCl(chan, bo, delta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
OUT_RING (chan, format);
- OUT_RING (chan, 0);
+ OUT_RING (chan, nv50_exa_get_tile_mode(ppix));
OUT_RING (chan, 0x00000000);
BEGIN_RING(chan, tesla, NV50TCL_RT_HORIZ(0), 2);
OUT_RING (chan, ppix->drawable.width);
@@ -577,7 +602,7 @@ NV50EXATexture(PixmapPtr ppix, PicturePtr ppict, unsigned unit)
NOUVEAU_FALLBACK("invalid picture format, this SHOULD NOT HAPPEN. Expect trouble.\n");
}
OUT_RELOCl(chan, bo, delta, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
- OUT_RING (chan, 0xd0005000);
+ OUT_RING (chan, 0xd0005000 | (nv50_exa_get_tile_mode(ppix) << 18));
OUT_RING (chan, 0x00300000);
OUT_RING (chan, ppix->drawable.width);
OUT_RING (chan, (1 << NV50TIC_0_5_DEPTH_SHIFT) | ppix->drawable.height);
diff --git a/src/nv_driver.c b/src/nv_driver.c
index 4f07836..f056c68 100644
--- a/src/nv_driver.c
+++ b/src/nv_driver.c
@@ -158,6 +158,12 @@ static const char *fbSymbols[] = {
NULL
};
+static const char *wfbSymbols[] = {
+ "wfbPictureInit",
+ "wfbScreenInit",
+ NULL
+};
+
static const char *exaSymbols[] = {
"exaDriverInit",
"exaOffscreenInit",
@@ -279,7 +285,7 @@ nouveauSetup(pointer module, pointer opts, int *errmaj, int *errmin)
* Tell the loader about symbols from other modules that this module
* might refer to.
*/
- LoaderRefSymLists(vgahwSymbols, exaSymbols, fbSymbols,
+ LoaderRefSymLists(vgahwSymbols, exaSymbols, fbSymbols, wfbSymbols,
shadowSymbols, drmSymbols,
i2cSymbols, ddcSymbols, vbeSymbols,
int10Symbols, NULL);
@@ -1521,10 +1527,23 @@ NVPreInit(ScrnInfoPtr pScrn, int flags)
* section.
*/
- if (xf86LoadSubModule(pScrn, "fb") == NULL)
- NVPreInitFail("\n");
+#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,0,0)
+ if (!pNv->NoAccel && pNv->exa_driver_pixmaps && pNv->Architecture == NV_ARCH_50) {
+ pNv->wfb_enabled = true;
+ nouveau_exa_wfb_init();
+ if (xf86LoadSubModule(pScrn, "wfb") == NULL)
+ NVPreInitFail("\n");
- xf86LoaderReqSymLists(fbSymbols, NULL);
+ xf86LoaderReqSymLists(wfbSymbols, NULL);
+ } else
+#endif
+ {
+ pNv->wfb_enabled = false;
+ if (xf86LoadSubModule(pScrn, "fb") == NULL)
+ NVPreInitFail("\n");
+
+ xf86LoaderReqSymLists(fbSymbols, NULL);
+ }
/* Load EXA if needed */
if (!pNv->NoAccel) {
@@ -2127,9 +2146,19 @@ NVScreenInit(int scrnIndex, ScreenPtr pScreen, int argc, char **argv)
switch (pScrn->bitsPerPixel) {
case 16:
case 32:
- ret = fbScreenInit(pScreen, FBStart, pScrn->virtualX, pScrn->virtualY,
- pScrn->xDpi, pScrn->yDpi,
- displayWidth, pScrn->bitsPerPixel);
+#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,0,0)
+ if (pNv->wfb_enabled) {
+ ret = wfbScreenInit(pScreen, FBStart, pScrn->virtualX, pScrn->virtualY,
+ pScrn->xDpi, pScrn->yDpi,
+ displayWidth, pScrn->bitsPerPixel,
+ nouveau_exa_wfb_setup_wrap, nouveau_exa_wfb_finish_wrap);
+ } else
+#endif
+ {
+ ret = fbScreenInit(pScreen, FBStart, pScrn->virtualX, pScrn->virtualY,
+ pScrn->xDpi, pScrn->yDpi,
+ displayWidth, pScrn->bitsPerPixel);
+ }
break;
default:
xf86DrvMsg(scrnIndex, X_ERROR,
@@ -2154,7 +2183,14 @@ NVScreenInit(int scrnIndex, ScreenPtr pScreen, int argc, char **argv)
}
}
- fbPictureInit (pScreen, 0, 0);
+#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,0,0)
+ if (pNv->wfb_enabled) {
+ wfbPictureInit(pScreen, 0, 0);
+ } else
+#endif
+ {
+ fbPictureInit(pScreen, 0, 0);
+ }
xf86SetBlackWhitePixels(pScreen);
diff --git a/src/nv_proto.h b/src/nv_proto.h
index 1d00e1a..762b22e 100644
--- a/src/nv_proto.h
+++ b/src/nv_proto.h
@@ -18,6 +18,10 @@ void NVAccelFree(ScrnInfoPtr pScrn);
/* in nv_driver.c */
Bool NVI2CInit(ScrnInfoPtr pScrn);
+/* We can only include fb.h in normal or wfb mode,
+ * so we have to declare one ourself. */
+extern Bool wfbPictureInit (ScreenPtr pScreen,
+ PictFormatPtr formats, int nformats);
/* in nv_dri.c */
Bool NVDRIScreenInit(ScrnInfoPtr pScrn);
@@ -69,6 +73,11 @@ void NVTakedownDma(ScrnInfoPtr pScrn);
Bool nouveau_exa_init(ScreenPtr pScreen);
Bool nouveau_exa_pixmap_is_onscreen(PixmapPtr pPixmap);
bool nouveau_exa_pixmap_is_tiled(PixmapPtr ppix);
+void nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
+ WriteMemoryProcPtr *pWrite,
+ DrawablePtr pDraw);
+void nouveau_exa_wfb_finish_wrap(DrawablePtr pDraw);
+void nouveau_exa_wfb_init();
/* in nv_hw.c */
void NVCalcStateExt(ScrnInfoPtr,struct _riva_hw_state *,int,int,int,int,int,int);
@@ -225,6 +234,7 @@ int NV40SetTexturePortAttribute(ScrnInfoPtr, Atom, INT32, pointer);
Bool NVAccelInitNV50TCL(ScrnInfoPtr pScrn);
/* in nv50_exa.c */
+uint32_t nv50_exa_get_tile_mode(PixmapPtr ppix);
Bool NV50EXAPrepareSolid(PixmapPtr, int, Pixel, Pixel);
void NV50EXASolid(PixmapPtr, int, int, int, int);
void NV50EXADoneSolid(PixmapPtr);
diff --git a/src/nv_type.h b/src/nv_type.h
index f03c198..aafaef6 100644
--- a/src/nv_type.h
+++ b/src/nv_type.h
@@ -304,7 +304,8 @@ typedef struct _NVRec {
uint8_t cur_head;
ExaDriverPtr EXADriverPtr;
- Bool exa_driver_pixmaps;
+ Bool exa_driver_pixmaps;
+ bool wfb_enabled;
ScreenBlockHandlerProcPtr BlockHandler;
CloseScreenProcPtr CloseScreen;
/* Cursor */
@@ -477,6 +478,7 @@ struct nouveau_pixmap {
void *linear;
unsigned size;
int map_refcount;
+ int tiling_mode;
};
static inline struct nouveau_pixmap *
--
1.6.2
[-- Attachment #3: 0002-exa-smarter-initial-mapping-of-driver-allocated-pix.patch --]
[-- Type: text/x-patch, Size: 8938 bytes --]
From 78457b975acd680469aa82800588540dd74142be Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <madman2003@gmail.com>
Date: Fri, 13 Mar 2009 09:57:12 +0100
Subject: [PATCH 2/6] exa: smarter initial mapping of driver allocated pixmaps
- Use a software copy until the first accelerated op, then UTS it.
- The path back does not exist.
---
src/nouveau_exa.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++-----
src/nv04_exa.c | 5 ++++
src/nv10_exa.c | 5 ++++
src/nv30_exa.c | 5 ++++
src/nv40_exa.c | 5 ++++
src/nv50_exa.c | 10 ++++++++
src/nv_proto.h | 1 +
src/nv_type.h | 1 +
8 files changed, 89 insertions(+), 7 deletions(-)
diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c
index 74804ec..72e783e 100644
--- a/src/nouveau_exa.c
+++ b/src/nouveau_exa.c
@@ -328,6 +328,9 @@ nouveau_exa_destroy_pixmap(ScreenPtr pScreen, void *priv)
if (!nvpix)
return;
+ if (nvpix->linear)
+ xfree(nvpix->linear);
+
nouveau_bo_ref(NULL, &nvpix->bo);
xfree(nvpix);
}
@@ -422,6 +425,39 @@ nouveau_exa_pixmap_is_tiled(PixmapPtr ppix)
return true;
}
+void
+nouveau_exa_pixmap_prepare_for_accel(PixmapPtr ppix)
+{
+ ScrnInfoPtr pScrn = xf86Screens[ppix->drawable.pScreen->myNum];
+ NVPtr pNv = NVPTR(pScrn);
+ struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix);
+
+ if (!pNv->exa_driver_pixmaps)
+ return;
+
+ if (nvpix->linear) {
+ int cpp = ppix->drawable.bitsPerPixel >> 3;
+ if ((ppix->drawable.width * ppix->drawable.height * cpp) < 16*1024) {
+ if (pNv->Architecture == NV_ARCH_50)
+ NV50EXAUploadSIFC(nvpix->linear, ppix->devKind, ppix, 0, 0, ppix->drawable.width,
+ ppix->drawable.height, cpp);
+ else
+ NV04EXAUploadIFC(pScrn, nvpix->linear, ppix->devKind, ppix, 0, 0, ppix->drawable.width,
+ ppix->drawable.height, cpp);
+ exaMarkSync(ppix->drawable.pScreen);
+ } else {
+ NVAccelUploadM2MF(ppix, 0, 0, ppix->drawable.width,
+ ppix->drawable.height, nvpix->linear,
+ ppix->devKind);
+ }
+
+ xfree(nvpix->linear);
+ nvpix->linear = NULL;
+ }
+
+ nvpix->dirty = true;
+}
+
static void *
nouveau_exa_pixmap_map(PixmapPtr ppix)
{
@@ -429,15 +465,26 @@ nouveau_exa_pixmap_map(PixmapPtr ppix)
NVPtr pNv = NVPTR(pScrn);
struct nouveau_bo *bo = nouveau_pixmap_bo(ppix);
unsigned delta = nouveau_pixmap_offset(ppix);
+ struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix);
- if (!pNv->wfb_enabled && bo->tiled) {
- struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix);
+ /* Attempt to handle first access with more grace.
+ * This handles multiple sw accesses (such as trapezoid rasterisation).
+ * This avoids the first access on potentially uncached memory.
+ */
+ if (!nvpix->dirty) {
+ if (!nvpix->linear)
+ nvpix->linear = xalloc(ppix->devKind * ppix->drawable.height);
+ nouveau_bo_map(bo, NOUVEAU_BO_RDWR);
+ return nvpix->linear;
+ }
+
+ if (!pNv->wfb_enabled && bo->tiled && nvpix && nvpix->dirty) {
nvpix->map_refcount++;
if (nvpix->linear)
return nvpix->linear;
- nvpix->linear = xcalloc(1, ppix->devKind * ppix->drawable.height);
+ nvpix->linear = xalloc(ppix->devKind * ppix->drawable.height);
NVAccelDownloadM2MF(ppix, 0, 0, ppix->drawable.width,
ppix->drawable.height, nvpix->linear,
@@ -457,10 +504,9 @@ nouveau_exa_pixmap_unmap(PixmapPtr ppix)
ScrnInfoPtr pScrn = xf86Screens[ppix->drawable.pScreen->myNum];
NVPtr pNv = NVPTR(pScrn);
struct nouveau_bo *bo = nouveau_pixmap_bo(ppix);
+ struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix);
- if (!pNv->wfb_enabled && bo->tiled) {
- struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix);
-
+ if (!pNv->wfb_enabled && bo->tiled && nvpix && nvpix->dirty) {
if (--nvpix->map_refcount)
return;
@@ -489,6 +535,8 @@ nouveau_exa_download_from_screen(PixmapPtr pspix, int x, int y, int w, int h,
cpp = pspix->drawable.bitsPerPixel >> 3;
offset = (y * src_pitch) + (x * cpp);
+ nouveau_exa_pixmap_prepare_for_accel(pspix);
+
if (pNv->GART) {
if (NVAccelDownloadM2MF(pspix, x, y, w, h, dst, dst_pitch))
return TRUE;
@@ -516,6 +564,8 @@ nouveau_exa_upload_to_screen(PixmapPtr pdpix, int x, int y, int w, int h,
dst_pitch = exaGetPixmapPitch(pdpix);
cpp = pdpix->drawable.bitsPerPixel >> 3;
+ nouveau_exa_pixmap_prepare_for_accel(pdpix);
+
/* try hostdata transfer */
if (w * h * cpp < 16*1024) /* heuristic */
{
@@ -886,7 +936,7 @@ nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
else
wfb_pixmaps[i].end = wfb_pixmaps[i].start + nvpix->bo->size;
wfb_pixmaps[i].used = true;
- wfb_pixmaps[i].tiled = nouveau_exa_pixmap_is_tiled(pPixmap);
+ wfb_pixmaps[i].tiled = nouveau_exa_pixmap_is_tiled(pPixmap) && !nvpix->linear;
/* Division is too expensive for large numbers, so we precalculate a multiplication factor. */
wfb_pixmaps[i].multiply_factor = (0xFFFFFFFF/exaGetPixmapPitch(pPixmap)) + 1;
wfb_pixmaps[i].cpp = (pPixmap->drawable.bitsPerPixel >> 3);
diff --git a/src/nv04_exa.c b/src/nv04_exa.c
index de5da67..a7e9b16 100644
--- a/src/nv04_exa.c
+++ b/src/nv04_exa.c
@@ -83,6 +83,8 @@ NV04EXAPrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg)
unsigned delta = nouveau_pixmap_offset(pPixmap);
unsigned int fmt, pitch, color;
+ nouveau_exa_pixmap_prepare_for_accel(pPixmap);
+
WAIT_RING(chan, 64);
planemask |= ~0 << pPixmap->drawable.bitsPerPixel;
@@ -190,6 +192,9 @@ NV04EXAPrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, int dx, int dy,
unsigned dst_delta = nouveau_pixmap_offset(pDstPixmap);
int fmt;
+ nouveau_exa_pixmap_prepare_for_accel(pSrcPixmap);
+ nouveau_exa_pixmap_prepare_for_accel(pDstPixmap);
+
WAIT_RING(chan, 64);
if (pSrcPixmap->drawable.bitsPerPixel !=
diff --git a/src/nv10_exa.c b/src/nv10_exa.c
index 291c2da..bef7d40 100644
--- a/src/nv10_exa.c
+++ b/src/nv10_exa.c
@@ -626,6 +626,11 @@ Bool NV10EXAPrepareComposite(int op,
NVPtr pNv = NVPTR(pScrn);
struct nouveau_channel *chan = pNv->chan;
+ nouveau_exa_pixmap_prepare_for_accel(pSrc);
+ if (pMask)
+ nouveau_exa_pixmap_prepare_for_accel(pMask);
+ nouveau_exa_pixmap_prepare_for_accel(pDst);
+
WAIT_RING(chan, 128);
if (NV10Check_A8plusA8_Feasability(pSrcPicture,pMaskPicture,pDstPicture,op))
diff --git a/src/nv30_exa.c b/src/nv30_exa.c
index d3f83d3..58e5f27 100644
--- a/src/nv30_exa.c
+++ b/src/nv30_exa.c
@@ -447,6 +447,11 @@ NV30EXAPrepareComposite(int op, PicturePtr psPict,
int fpid = NV30EXA_FPID_PASS_COL0;
NV30EXA_STATE;
+ nouveau_exa_pixmap_prepare_for_accel(psPix);
+ if (pmPix)
+ nouveau_exa_pixmap_prepare_for_accel(pmPix);
+ nouveau_exa_pixmap_prepare_for_accel(pdPix);
+
WAIT_RING(chan, 128);
blend = NV30_GetPictOpRec(op);
diff --git a/src/nv40_exa.c b/src/nv40_exa.c
index 68da331..d10d93b 100644
--- a/src/nv40_exa.c
+++ b/src/nv40_exa.c
@@ -406,6 +406,11 @@ NV40EXAPrepareComposite(int op, PicturePtr psPict,
int fpid = NV40EXA_FPID_PASS_COL0;
NV40EXA_STATE;
+ nouveau_exa_pixmap_prepare_for_accel(psPix);
+ if (pmPix)
+ nouveau_exa_pixmap_prepare_for_accel(pmPix);
+ nouveau_exa_pixmap_prepare_for_accel(pdPix);
+
WAIT_RING(chan, 128);
blend = NV40_GetPictOpRec(op);
diff --git a/src/nv50_exa.c b/src/nv50_exa.c
index ac6b6b4..8b3dab7 100644
--- a/src/nv50_exa.c
+++ b/src/nv50_exa.c
@@ -250,6 +250,8 @@ NV50EXAPrepareSolid(PixmapPtr pdpix, int alu, Pixel planemask, Pixel fg)
NV50EXA_LOCALS(pdpix);
uint32_t fmt;
+ nouveau_exa_pixmap_prepare_for_accel(pdpix);
+
WAIT_RING(chan, 64);
if (!NV50EXA2DSurfaceFormat(pdpix, &fmt))
@@ -311,6 +313,9 @@ NV50EXAPrepareCopy(PixmapPtr pspix, PixmapPtr pdpix, int dx, int dy,
{
NV50EXA_LOCALS(pdpix);
+ nouveau_exa_pixmap_prepare_for_accel(pspix);
+ nouveau_exa_pixmap_prepare_for_accel(pdpix);
+
WAIT_RING(chan, 64);
if (!NV50EXAAcquireSurface2D(pspix, 1))
@@ -769,6 +774,11 @@ NV50EXAPrepareComposite(int op,
NV50EXA_LOCALS(pspix);
const unsigned shd_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
+ nouveau_exa_pixmap_prepare_for_accel(pspix);
+ if (pmpix)
+ nouveau_exa_pixmap_prepare_for_accel(pmpix);
+ nouveau_exa_pixmap_prepare_for_accel(pdpix);
+
WAIT_RING (chan, 128);
BEGIN_RING(chan, eng2d, 0x0110, 1);
OUT_RING (chan, 0);
diff --git a/src/nv_proto.h b/src/nv_proto.h
index 762b22e..7d820b7 100644
--- a/src/nv_proto.h
+++ b/src/nv_proto.h
@@ -73,6 +73,7 @@ void NVTakedownDma(ScrnInfoPtr pScrn);
Bool nouveau_exa_init(ScreenPtr pScreen);
Bool nouveau_exa_pixmap_is_onscreen(PixmapPtr pPixmap);
bool nouveau_exa_pixmap_is_tiled(PixmapPtr ppix);
+void nouveau_exa_pixmap_prepare_for_accel(PixmapPtr ppix);
void nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
WriteMemoryProcPtr *pWrite,
DrawablePtr pDraw);
diff --git a/src/nv_type.h b/src/nv_type.h
index aafaef6..5cee6b3 100644
--- a/src/nv_type.h
+++ b/src/nv_type.h
@@ -479,6 +479,7 @@ struct nouveau_pixmap {
unsigned size;
int map_refcount;
int tiling_mode;
+ bool dirty;
};
static inline struct nouveau_pixmap *
--
1.6.2
[-- Attachment #4: 0003-nv50-support-NV9X-hw-with-wfb.patch --]
[-- Type: text/x-patch, Size: 5079 bytes --]
From af38223fe187797d2fbd94f59d549b1e039a4a0f Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <madman2003@gmail.com>
Date: Sat, 14 Mar 2009 18:07:31 +0100
Subject: [PATCH 3/6] nv50: support NV9X hw with wfb
- NVAX hardware seems to have the NV5X and NV8X beheaviour (based on one sample).
---
src/nouveau_exa.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++----
1 files changed, 46 insertions(+), 4 deletions(-)
diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c
index 72e783e..b8bbb53 100644
--- a/src/nouveau_exa.c
+++ b/src/nouveau_exa.c
@@ -385,6 +385,12 @@ nouveau_exa_modify_pixmap_header(PixmapPtr ppix, int width, int height,
/* This allignment is very important. */
devkind = (width * cpp + 63) & ~63;
nvpix->size = devkind * ah;
+
+ /* A 128 bytes block is potentially moved 6 positions ahead.
+ * It would be very difficult to predict this, so we overallocate.
+ */
+ if (pNv->NVArch >= 0x90 && pNv->NVArch < 0xA0)
+ nvpix->size += 768;
}
ret = nouveau_bo_new(pNv->dev, flags, 0, nvpix->size,
@@ -800,21 +806,51 @@ const unsigned int mask_pitch_inv[] = {~0, ~7, ~31, ~63, ~63, ~63, ~63 };
const unsigned int mask_height[] = {0, 1, 3, 7, 15, 31, 63 };
const unsigned int mask_pitch[] = {0, 7, 31, 63, 63, 63, 63 };
+/* This applies to nv9X hw. They do strange memory rearranging. */
+/* 128 bytes blocks (which correspond to 32x4 tiles) are moved forward and backward in a special pattern. */
+static bool nv90_mode;
+const int pattern[] = { 0, 1, 2, 3, /**/ 0, 1, 2, 3, /**/ 1, 2, 3, /**/ 0, 2, 3, /**/
+ 0, 1, 3, /**/ 0, 1, 2, /**/ 1, 2, 3, /**/ 0, 2, 3, /**/ 0, 1, 3, /**/ 0, 1, 2 /**/};
+const int tile_offset[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, /* type 0 */
+ 2, 2, 2, 2, 2, 2, -6, -6, /* type 1 */
+ 4, 4, 4, 4, -4, -4, -4, -4, /* type 2 */
+ 6, 6, -2, -2, -2, -2, -2, -2 /* type 3 */
+ };
+
#define X_REMAINDER (x & mask_pitch[tile_height])
#define Y_REMAINDER (y & mask_height[tile_height])
/* tile_height and tile_pitch are expressed in powers of two */
static inline unsigned int
nouveau_exa_wfb_recurse_offset(unsigned int offset, unsigned int tile_height,
- unsigned int x, unsigned int y, unsigned int num_tiles_width)
+ unsigned int x, unsigned int y, unsigned int num_tiles_width, bool first)
{
offset += (((x & mask_pitch_inv[tile_height]) >> tile_pitch[tile_height]) + (((y & mask_height_inv[tile_height]) >> tile_height) * num_tiles_width)) * (1 << (tile_height + tile_pitch[tile_height]));
if (tile_height > 1)
- offset = nouveau_exa_wfb_recurse_offset(offset, tile_height - 1, X_REMAINDER, Y_REMAINDER, num_tiles[tile_height]);
+ offset = nouveau_exa_wfb_recurse_offset(offset, tile_height - 1, X_REMAINDER, Y_REMAINDER, num_tiles[tile_height], false);
else
offset += (Y_REMAINDER * (1 << tile_pitch[tile_height])) + X_REMAINDER;
+ /* Here comes the correction for the unusual memory mapping of NV9X hw. */
+ if (first && nv90_mode) {
+ unsigned int suboffset, suboffset2, suboffset3;
+
+ /* 128 byte blocks within a larger block of 32768 bytes */
+ /* the first block misses the first "0, 1, 2, 3" sequence, so we add an extra 4096 bytes offset. */
+ suboffset = ((offset + 4096) & 0x7FFF) & ~0x7F;
+ /* now we have 256 blocks */
+ suboffset >>= 7;
+
+ /* now we have 32 rows */
+ suboffset2 = suboffset >> 3;
+ /* tile within row */
+ suboffset3 = suboffset & 0x7;
+
+ offset += tile_offset[suboffset3 + 8*pattern[suboffset2]] * 128;
+ }
+
return offset;
}
@@ -851,7 +887,7 @@ nouveau_exa_wfb_read_memory(const void *src, int size)
line_x = offset - line_y * LINEAR_PITCH;
new_src = pPixmap->devPrivate.ptr +
- nouveau_exa_wfb_recurse_offset(0, wfb_pixmaps[i].tile_height, line_x, line_y, wfb_pixmaps[i].num_tiles_width) +
+ nouveau_exa_wfb_recurse_offset(0, wfb_pixmaps[i].tile_height, line_x, line_y, wfb_pixmaps[i].num_tiles_width, true) +
subpixel_offset;
memcpy(&bits, new_src, size);
@@ -892,7 +928,7 @@ nouveau_exa_wfb_write_memory(void *dst, FbBits value, int size)
line_x = offset - line_y * LINEAR_PITCH;
new_dst = pPixmap->devPrivate.ptr +
- nouveau_exa_wfb_recurse_offset(0, wfb_pixmaps[i].tile_height, line_x, line_y, wfb_pixmaps[i].num_tiles_width) +
+ nouveau_exa_wfb_recurse_offset(0, wfb_pixmaps[i].tile_height, line_x, line_y, wfb_pixmaps[i].num_tiles_width, true) +
subpixel_offset;
memcpy(new_dst, &value, size);
@@ -905,6 +941,8 @@ nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
{
PixmapPtr pPixmap;
struct nouveau_pixmap *nvpix;
+ ScrnInfoPtr pScrn = xf86Screens[pDraw->pScreen->myNum];
+ NVPtr pNv = NVPTR(pScrn);
if (!pRead || !pWrite)
return;
@@ -947,6 +985,10 @@ nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
wfb_pixmaps[i].tile_height = nvpix->tiling_mode + 1;
wfb_pixmaps[i].num_tiles_width = LINEAR_PITCH/(1 << tile_pitch[wfb_pixmaps[i].tile_height]);
}
+ if (pNv->NVArch >= 0x90 && pNv->NVArch < 0xA0)
+ nv90_mode = true;
+ else
+ nv90_mode = false;
*pRead = nouveau_exa_wfb_read_memory;
*pWrite = nouveau_exa_wfb_write_memory;
--
1.6.2
[-- Attachment #5: 0004-exa-don-t-put-cpp-0-pixmaps-in-vram.patch --]
[-- Type: text/x-patch, Size: 884 bytes --]
From 882b51660c3ff04e3c55f23e5eb112c0ebe23773 Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <madman2003@gmail.com>
Date: Sat, 14 Mar 2009 19:38:09 +0100
Subject: [PATCH 4/6] exa: don't put cpp == 0 pixmaps in vram
---
src/nouveau_exa.c | 6 +++++-
1 files changed, 5 insertions(+), 1 deletions(-)
diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c
index b8bbb53..4cbcf50 100644
--- a/src/nouveau_exa.c
+++ b/src/nouveau_exa.c
@@ -358,9 +358,13 @@ nouveau_exa_modify_pixmap_header(PixmapPtr ppix, int width, int height,
if (!nvpix->bo && nvpix->size) {
uint32_t cpp = ppix->drawable.bitsPerPixel >> 3;
- uint32_t flags = NOUVEAU_BO_VRAM;
+ uint32_t flags = 0;
int ret;
+ /* Let's not waste vram on useless pixmaps. */
+ if (cpp)
+ flags |= NOUVEAU_BO_VRAM;
+
if (pNv->Architecture >= NV_ARCH_50 && cpp) {
uint32_t ah;
if (height > 47) {
--
1.6.2
[-- Attachment #6: 0005-xv-some-fixes.patch --]
[-- Type: text/x-patch, Size: 2053 bytes --]
From e51057b34064fb36d4d4873a85a07ff3d46e752a Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <madman2003@gmail.com>
Date: Sat, 14 Mar 2009 23:26:46 +0100
Subject: [PATCH 5/6] xv: some fixes
---
src/nouveau_xv.c | 24 +++++++++++++-----------
src/nv50_xv.c | 2 +-
2 files changed, 14 insertions(+), 12 deletions(-)
diff --git a/src/nouveau_xv.c b/src/nouveau_xv.c
index 246647b..33e49c5 100644
--- a/src/nouveau_xv.c
+++ b/src/nouveau_xv.c
@@ -718,10 +718,8 @@ NV_calculate_pitches_and_mem_size(NVPtr pNv, int action_flags, int *srcPitch,
{
int tmp;
- if (pNv->Architecture >= NV_ARCH_50) {
- npixels = (npixels + 7) & ~7;
- nlines = (nlines + 7) & ~7;
- }
+ if (pNv->Architecture >= NV_ARCH_50)
+ nlines = (nlines + 3) & ~3;
if (action_flags & IS_YV12) {
*srcPitch = (width + 3) & ~3; /* of luma */
@@ -1252,13 +1250,17 @@ CPU_copy:
exaMoveInPixmap(ppix);
/* check if it made it offscreen */
-#if NOUVEAU_EXA_PIXMAPS
- if (!pNv->EXADriverPtr->PixmapIsOffscreen(ppix))
-#else
- if (exaGetPixmapOffset(ppix) >= pNv->EXADriverPtr->memorySize)
-#endif
- /* we lost, insufficient space probably */
- return BadAlloc;
+ if (pNv->EXADriverPtr->PixmapIsOffscreen) {
+ if (!pNv->EXADriverPtr->PixmapIsOffscreen(ppix)) {
+ /* we lost, insufficient space probably */
+ return BadAlloc;
+ }
+ } else {
+ if (exaGetPixmapOffset(ppix) >= pNv->EXADriverPtr->memorySize) {
+ /* we lost, insufficient space probably */
+ return BadAlloc;
+ }
+ }
ExaOffscreenMarkUsed(ppix);
diff --git a/src/nv50_xv.c b/src/nv50_xv.c
index 9601326..df35b1a 100644
--- a/src/nv50_xv.c
+++ b/src/nv50_xv.c
@@ -76,7 +76,7 @@ nv50_xv_state_emit(PixmapPtr ppix, int id, struct nouveau_bo *src,
case 24: OUT_RING (chan, NV50TCL_RT_FORMAT_24BPP); break;
case 16: OUT_RING (chan, NV50TCL_RT_FORMAT_16BPP); break;
}
- OUT_RING (chan, 0);
+ OUT_RING (chan, nv50_exa_get_tile_mode(ppix));
OUT_RING (chan, 0);
BEGIN_RING(chan, tesla, NV50TCL_RT_HORIZ(0), 2);
OUT_RING (chan, ppix->drawable.width);
--
1.6.2
[-- Attachment #7: 0006-NV50-add-NV84-wfb-support.patch --]
[-- Type: text/x-patch, Size: 5869 bytes --]
From 13f16b41f7d8c703a8d5eec78ed7e64588e014cf Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <madman2003@gmail.com>
Date: Wed, 18 Mar 2009 09:36:51 +0100
Subject: [PATCH 6/6] NV50: add NV84 wfb support
---
src/nouveau_exa.c | 58 +++++++++++++++++++++++++++++++++++++++-------------
src/nv_driver.c | 2 +-
src/nv_proto.h | 2 +-
3 files changed, 45 insertions(+), 17 deletions(-)
diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c
index 4cbcf50..43d9945 100644
--- a/src/nouveau_exa.c
+++ b/src/nouveau_exa.c
@@ -390,11 +390,13 @@ nouveau_exa_modify_pixmap_header(PixmapPtr ppix, int width, int height,
devkind = (width * cpp + 63) & ~63;
nvpix->size = devkind * ah;
- /* A 128 bytes block is potentially moved 6 positions ahead.
+ /* A 128 bytes block is potentially moved 6/2 positions ahead.
* It would be very difficult to predict this, so we overallocate.
*/
if (pNv->NVArch >= 0x90 && pNv->NVArch < 0xA0)
nvpix->size += 768;
+ else if (pNv->NVArch == 0x84)
+ nvpix->size += 256;
}
ret = nouveau_bo_new(pNv->dev, flags, 0, nvpix->size,
@@ -810,16 +812,20 @@ const unsigned int mask_pitch_inv[] = {~0, ~7, ~31, ~63, ~63, ~63, ~63 };
const unsigned int mask_height[] = {0, 1, 3, 7, 15, 31, 63 };
const unsigned int mask_pitch[] = {0, 7, 31, 63, 63, 63, 63 };
-/* This applies to nv9X hw. They do strange memory rearranging. */
+/* Some cards do strange memory rearrangement. */
/* 128 bytes blocks (which correspond to 32x4 tiles) are moved forward and backward in a special pattern. */
-static bool nv90_mode;
-const int pattern[] = { 0, 1, 2, 3, /**/ 0, 1, 2, 3, /**/ 1, 2, 3, /**/ 0, 2, 3, /**/
+static bool nv9x_mode;
+static bool nv84_mode;
+const int pattern_nv9x[] = { 0, 1, 2, 3, /**/ 0, 1, 2, 3, /**/ 1, 2, 3, /**/ 0, 2, 3, /**/
0, 1, 3, /**/ 0, 1, 2, /**/ 1, 2, 3, /**/ 0, 2, 3, /**/ 0, 1, 3, /**/ 0, 1, 2 /**/};
+const int pattern_nv84[] = { 4, /**/ 5, 5, /**/ 4, 5, /**/ 4, 4, /**/ 5, 5, /**/ 4, 4, /**/ 5, 4, /**/ 5, 5, /**/ 4 };
const int tile_offset[] = {
0, 0, 0, 0, 0, 0, 0, 0, /* type 0 */
2, 2, 2, 2, 2, 2, -6, -6, /* type 1 */
4, 4, 4, 4, -4, -4, -4, -4, /* type 2 */
- 6, 6, -2, -2, -2, -2, -2, -2 /* type 3 */
+ 6, 6, -2, -2, -2, -2, -2, -2, /* type 3 */
+ 0, 0, 0, 0, 2, 2, -2, -2, /* type 4 */
+ 2, 2, -2, -2, 0, 0, 0, 0, /* type 5 */
};
#define X_REMAINDER (x & mask_pitch[tile_height])
@@ -837,8 +843,24 @@ nouveau_exa_wfb_recurse_offset(unsigned int offset, unsigned int tile_height,
else
offset += (Y_REMAINDER * (1 << tile_pitch[tile_height])) + X_REMAINDER;
- /* Here comes the correction for the unusual memory mapping of NV9X hw. */
- if (first && nv90_mode) {
+ /* Here comes the correction for the unusual memory mapping of some hw. */
+ if (first && nv84_mode) {
+ unsigned int suboffset, suboffset2, suboffset3;
+
+ /* 128 byte blocks within a larger block of 16384 bytes */
+ suboffset = (offset & 0x3FFF) & ~0x7F;
+ /* now we have 128 blocks */
+ suboffset >>= 7;
+
+ /* now we have 16 rows */
+ suboffset2 = suboffset >> 3;
+ /* tile within row */
+ suboffset3 = suboffset & 0x7;
+
+ offset += tile_offset[suboffset3 + 8*pattern_nv84[suboffset2]] * 128;
+ }
+
+ if (first && nv9x_mode) {
unsigned int suboffset, suboffset2, suboffset3;
/* 128 byte blocks within a larger block of 32768 bytes */
@@ -852,7 +874,7 @@ nouveau_exa_wfb_recurse_offset(unsigned int offset, unsigned int tile_height,
/* tile within row */
suboffset3 = suboffset & 0x7;
- offset += tile_offset[suboffset3 + 8*pattern[suboffset2]] * 128;
+ offset += tile_offset[suboffset3 + 8*pattern_nv9x[suboffset2]] * 128;
}
return offset;
@@ -945,8 +967,6 @@ nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
{
PixmapPtr pPixmap;
struct nouveau_pixmap *nvpix;
- ScrnInfoPtr pScrn = xf86Screens[pDraw->pScreen->myNum];
- NVPtr pNv = NVPTR(pScrn);
if (!pRead || !pWrite)
return;
@@ -989,10 +1009,6 @@ nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
wfb_pixmaps[i].tile_height = nvpix->tiling_mode + 1;
wfb_pixmaps[i].num_tiles_width = LINEAR_PITCH/(1 << tile_pitch[wfb_pixmaps[i].tile_height]);
}
- if (pNv->NVArch >= 0x90 && pNv->NVArch < 0xA0)
- nv90_mode = true;
- else
- nv90_mode = false;
*pRead = nouveau_exa_wfb_read_memory;
*pWrite = nouveau_exa_wfb_write_memory;
@@ -1024,10 +1040,22 @@ nouveau_exa_wfb_finish_wrap(DrawablePtr pDraw)
}
void
-nouveau_exa_wfb_init()
+nouveau_exa_wfb_init(ScrnInfoPtr pScrn)
{
+ NVPtr pNv = NVPTR(pScrn);
int i;
+ if (pNv->NVArch >= 0x90 && pNv->NVArch < 0xA0) {
+ nv9x_mode = true;
+ nv84_mode = false;
+ } else if (pNv->NVArch == 0x84) {
+ nv9x_mode = false;
+ nv84_mode = true;
+ } else {
+ nv9x_mode = false;
+ nv84_mode = false;
+ }
+
for (i = 0; i < 6; i++) {
wfb_pixmaps[i].ppix = NULL;
wfb_pixmaps[i].start = 0;
diff --git a/src/nv_driver.c b/src/nv_driver.c
index f056c68..b66e510 100644
--- a/src/nv_driver.c
+++ b/src/nv_driver.c
@@ -1530,7 +1530,7 @@ NVPreInit(ScrnInfoPtr pScrn, int flags)
#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,0,0)
if (!pNv->NoAccel && pNv->exa_driver_pixmaps && pNv->Architecture == NV_ARCH_50) {
pNv->wfb_enabled = true;
- nouveau_exa_wfb_init();
+ nouveau_exa_wfb_init(pScrn);
if (xf86LoadSubModule(pScrn, "wfb") == NULL)
NVPreInitFail("\n");
diff --git a/src/nv_proto.h b/src/nv_proto.h
index 7d820b7..44c8355 100644
--- a/src/nv_proto.h
+++ b/src/nv_proto.h
@@ -78,7 +78,7 @@ void nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
WriteMemoryProcPtr *pWrite,
DrawablePtr pDraw);
void nouveau_exa_wfb_finish_wrap(DrawablePtr pDraw);
-void nouveau_exa_wfb_init();
+void nouveau_exa_wfb_init(ScrnInfoPtr pScrn);
/* in nv_hw.c */
void NVCalcStateExt(ScrnInfoPtr,struct _riva_hw_state *,int,int,int,int,int,int);
--
1.6.2
[-- Attachment #8: Type: text/plain, Size: 181 bytes --]
_______________________________________________
Nouveau mailing list
Nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
http://lists.freedesktop.org/mailman/listinfo/nouveau
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: nv50: wfb patches
[not found] ` <6d4bc9fc0903291654r70e3e6f0pd6f56874a6c361f7-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2009-03-30 3:54 ` Younes Manton
[not found] ` <586c2acd0903292054r11841a3v7750ac27ea9a6963-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
0 siblings, 1 reply; 6+ messages in thread
From: Younes Manton @ 2009-03-30 3:54 UTC (permalink / raw)
To: Maarten Maathuis
Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
2009/3/29 Maarten Maathuis <madman2003-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>:
> The whole thing works fine, but XSHM is an issue
> (http://stillunknown.livejournal.com/928.html). With it disabled most
> apps are fine, although a few issues remain.
Just out of curiousity, why can't you alloc a pixmap you can
accelerate and just keep it sync'd with the xshm pixmap, track dirty
rects, etc? Also why can't you allocate a linear pixmap on software
fallbacks, copy+untile the original to the linear, let the fallback
complete, and copy+tile back to the original? Might be cheaper than
having to trap and transform every pixel access.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: nv50: wfb patches
[not found] ` <586c2acd0903292054r11841a3v7750ac27ea9a6963-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2009-03-30 10:20 ` Maarten Maathuis
[not found] ` <6d4bc9fc0903300320t7ab80b83k499c7aaf397303ea-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
0 siblings, 1 reply; 6+ messages in thread
From: Maarten Maathuis @ 2009-03-30 10:20 UTC (permalink / raw)
To: Younes Manton; +Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
XSHM could probably be fixed, but that would have to be done on a
larger scope (as in for everything, not just exa). This could
negatively impact those with no acceleration at all.
About the untiling + copy approach, this is what classic exa does, and
you are practically forced to keep a software copy around always.
Otherwise you get excessive copies. I know right now that reading
memory on uncached memory isn't so great, but maybe in the future we
could use the gart also.
wfb is just a "simple" way of handling things, exa's migration
scheme's aren't easy.
From a memory waste point of view, wfb is the way to go, and the fact
that nvidia use it says something.
And imo, the things that hit really hard on software fallbacks just
need to be fixed.
Anyway you're welcome to try something else and compare.
Maarten.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: nv50: wfb patches
[not found] ` <6d4bc9fc0903300320t7ab80b83k499c7aaf397303ea-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2009-04-10 19:31 ` Maarten Maathuis
[not found] ` <6d4bc9fc0904101231l4373cc92h1f32b93dec9cc8b8-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
0 siblings, 1 reply; 6+ messages in thread
From: Maarten Maathuis @ 2009-04-10 19:31 UTC (permalink / raw)
To: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
[-- Attachment #1: Type: text/plain, Size: 575 bytes --]
Updated patches, with one performance improvement. Not checking for
tiled pixmaps when all pending pixmaps are linear. This reduces the
overhead to the point that you are doing small memcopies instead of a
large one.
I think i solved the XSHM problem, simply by accelerating it
(http://cgit.freedesktop.org/xorg/xserver/commit/?id=1b5758bef0840c6614244e321790231b3c9477c9).
I'll be fading to the background for a few months pretty soon, so
questions that involve access to my main machine (running nouveau,
xorg-git, etc) should be asked soon (less than a week).
Maarten.
[-- Attachment #2: 0001-nv50-implement-wfb.patch --]
[-- Type: text/x-patch, Size: 17725 bytes --]
From fcf136164422dd4ddc13bda621aef0c73bd2ea7c Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <madman2003@gmail.com>
Date: Sat, 7 Mar 2009 23:49:19 +0100
Subject: [PATCH 1/8] nv50: implement wfb
- Only for sufficiently new xserver's and exa_driver_pixmaps.
---
src/nouveau_exa.c | 282 +++++++++++++++++++++++++++++++++++++++++++++++++++--
src/nv50_exa.c | 31 +++++-
src/nv_driver.c | 52 ++++++++--
src/nv_proto.h | 10 ++
src/nv_type.h | 4 +-
5 files changed, 357 insertions(+), 22 deletions(-)
diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c
index b7bcc87..74804ec 100644
--- a/src/nouveau_exa.c
+++ b/src/nouveau_exa.c
@@ -89,7 +89,7 @@ NVAccelDownloadM2MF(PixmapPtr pspix, int x, int y, int w, int h,
if (!linear) {
BEGIN_RING(chan, m2mf, 0x0200, 7);
OUT_RING (chan, 0);
- OUT_RING (chan, 0);
+ OUT_RING (chan, nv50_exa_get_tile_mode(pspix));
OUT_RING (chan, pspix->drawable.width * cpp);
OUT_RING (chan, pspix->drawable.height);
OUT_RING (chan, 1);
@@ -210,7 +210,7 @@ NVAccelUploadM2MF(PixmapPtr pdpix, int x, int y, int w, int h,
if (!linear) {
BEGIN_RING(chan, m2mf, 0x021c, 7);
OUT_RING (chan, 0);
- OUT_RING (chan, 0);
+ OUT_RING (chan, nv50_exa_get_tile_mode(pdpix));
OUT_RING (chan, pdpix->drawable.width * cpp);
OUT_RING (chan, pdpix->drawable.height);
OUT_RING (chan, 1);
@@ -259,7 +259,11 @@ nouveau_exa_mark_sync(ScreenPtr pScreen)
static void
nouveau_exa_wait_marker(ScreenPtr pScreen, int marker)
{
- NVSync(xf86Screens[pScreen->myNum]);
+ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
+ NVPtr pNv = NVPTR(pScrn);
+
+ if (!pNv->exa_driver_pixmaps)
+ NVSync(xf86Screens[pScreen->myNum]);
}
static Bool
@@ -351,17 +355,32 @@ nouveau_exa_modify_pixmap_header(PixmapPtr ppix, int width, int height,
if (!nvpix->bo && nvpix->size) {
uint32_t cpp = ppix->drawable.bitsPerPixel >> 3;
- /* At some point we should just keep 1bpp pixmaps in sysram */
uint32_t flags = NOUVEAU_BO_VRAM;
int ret;
if (pNv->Architecture >= NV_ARCH_50 && cpp) {
- uint32_t aw = (width + 7) & ~7;
- uint32_t ah = (height + 7) & ~7;
+ uint32_t ah;
+ if (height > 47) {
+ ah = (height + 63) & ~63;
+ nvpix->tiling_mode = 5;
+ } else if (height > 23) {
+ ah = (height + 31) & ~31;
+ nvpix->tiling_mode = 4;
+ } else if (height > 11) {
+ ah = (height + 15) & ~15;
+ nvpix->tiling_mode = 3;
+ } else if (height > 5) {
+ ah = (height + 7) & ~7;
+ nvpix->tiling_mode = 2;
+ } else {
+ ah = (height + 3) & ~3;
+ nvpix->tiling_mode = 1;
+ }
flags |= NOUVEAU_BO_TILED;
- devkind = ((aw * cpp) + 63) & ~63;
+ /* This allignment is very important. */
+ devkind = (width * cpp + 63) & ~63;
nvpix->size = devkind * ah;
}
@@ -390,8 +409,11 @@ nouveau_exa_pixmap_is_tiled(PixmapPtr ppix)
NVPtr pNv = NVPTR(pScrn);
if (pNv->exa_driver_pixmaps) {
- if (!nouveau_pixmap_bo(ppix)->tiled)
+ if (!nouveau_pixmap_bo(ppix))
+ return false;
+ if (nouveau_pixmap_bo(ppix)->tiled == 0)
return false;
+ return true;
} else
if (pNv->Architecture < NV_ARCH_50 ||
exaGetPixmapOffset(ppix) < pNv->EXADriverPtr->offScreenBase)
@@ -403,10 +425,12 @@ nouveau_exa_pixmap_is_tiled(PixmapPtr ppix)
static void *
nouveau_exa_pixmap_map(PixmapPtr ppix)
{
+ ScrnInfoPtr pScrn = xf86Screens[ppix->drawable.pScreen->myNum];
+ NVPtr pNv = NVPTR(pScrn);
struct nouveau_bo *bo = nouveau_pixmap_bo(ppix);
unsigned delta = nouveau_pixmap_offset(ppix);
- if (bo->tiled) {
+ if (!pNv->wfb_enabled && bo->tiled) {
struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix);
nvpix->map_refcount++;
@@ -430,9 +454,11 @@ nouveau_exa_pixmap_map(PixmapPtr ppix)
static void
nouveau_exa_pixmap_unmap(PixmapPtr ppix)
{
+ ScrnInfoPtr pScrn = xf86Screens[ppix->drawable.pScreen->myNum];
+ NVPtr pNv = NVPTR(pScrn);
struct nouveau_bo *bo = nouveau_pixmap_bo(ppix);
- if (bo->tiled) {
+ if (!pNv->wfb_enabled && bo->tiled) {
struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix);
if (--nvpix->map_refcount)
@@ -682,3 +708,239 @@ nouveau_exa_init(ScreenPtr pScreen)
pNv->EXADriverPtr = exa;
return TRUE;
}
+
+/* WFB functions. */
+
+static inline FbBits
+nouveau_exa_wfb_read_memory_linear(const void *src, int size)
+{
+ FbBits bits = 0;
+
+ memcpy(&bits, src, size);
+
+ return bits;
+}
+
+static inline void
+nouveau_exa_wfb_write_memory_linear(void *dst, FbBits value, int size)
+{
+ memcpy(dst, &value, size);
+}
+
+#define LINEAR_PITCH (pPixmap->devKind)
+
+/* Wfb related data. */
+static struct {
+ PixmapPtr ppix;
+ bool used;
+ bool tiled;
+ unsigned long start;
+ unsigned long end;
+ uint64_t multiply_factor;
+ uint8_t cpp;
+ unsigned int tile_height;
+ unsigned int num_tiles_width;
+} wfb_pixmaps[6];
+
+/* height: empty, 2, 4, 8, 16, 32, 64 */
+const unsigned int num_tiles[] = { 0, 0, 4, 2, 1, 1, 1 };
+const unsigned int tile_pitch[] = { 0, 3, 5, 6, 6, 6, 6 };
+const unsigned int mask_height_inv[] = {~0, ~1, ~3, ~7, ~15, ~31, ~63 };
+const unsigned int mask_pitch_inv[] = {~0, ~7, ~31, ~63, ~63, ~63, ~63 };
+const unsigned int mask_height[] = {0, 1, 3, 7, 15, 31, 63 };
+const unsigned int mask_pitch[] = {0, 7, 31, 63, 63, 63, 63 };
+
+#define X_REMAINDER (x & mask_pitch[tile_height])
+#define Y_REMAINDER (y & mask_height[tile_height])
+
+/* tile_height and tile_pitch are expressed in powers of two */
+static inline unsigned int
+nouveau_exa_wfb_recurse_offset(unsigned int offset, unsigned int tile_height,
+ unsigned int x, unsigned int y, unsigned int num_tiles_width)
+{
+ offset += (((x & mask_pitch_inv[tile_height]) >> tile_pitch[tile_height]) + (((y & mask_height_inv[tile_height]) >> tile_height) * num_tiles_width)) * (1 << (tile_height + tile_pitch[tile_height]));
+
+ if (tile_height > 1)
+ offset = nouveau_exa_wfb_recurse_offset(offset, tile_height - 1, X_REMAINDER, Y_REMAINDER, num_tiles[tile_height]);
+ else
+ offset += (Y_REMAINDER * (1 << tile_pitch[tile_height])) + X_REMAINDER;
+
+ return offset;
+}
+
+/* Note, we can only expose one read and write function, the linear versions are for internal consumption. */
+static FbBits
+nouveau_exa_wfb_read_memory(const void *src, int size)
+{
+ int i;
+ uint64_t line_x, line_y;
+ unsigned long offset = (unsigned long) src, subpixel_offset;
+ PixmapPtr pPixmap = NULL;
+ FbBits bits = 0;
+ void *new_src;
+
+ /* Find the right pixmap. */
+ for (i = 0; i < 6; i++)
+ if (offset >= wfb_pixmaps[i].start && offset < wfb_pixmaps[i].end) {
+ pPixmap = wfb_pixmaps[i].ppix;
+ break;
+ }
+
+ if (!pPixmap || !wfb_pixmaps[i].tiled)
+ return nouveau_exa_wfb_read_memory_linear(src, size);
+
+ /* Now comes the decoding. */
+ offset -= (unsigned long) pPixmap->devPrivate.ptr;
+ /* Assuming dword alligned offsets. */
+ subpixel_offset = offset & (wfb_pixmaps[i].cpp - 1);
+ offset &= ~(wfb_pixmaps[i].cpp - 1);
+
+ /* Determine the coordinate first. */
+ /* Division is too expensive for large numbers, so we precalculate a multiplication factor. */
+ line_y = (offset * wfb_pixmaps[i].multiply_factor) >> 32;
+ line_x = offset - line_y * LINEAR_PITCH;
+
+ new_src = pPixmap->devPrivate.ptr +
+ nouveau_exa_wfb_recurse_offset(0, wfb_pixmaps[i].tile_height, line_x, line_y, wfb_pixmaps[i].num_tiles_width) +
+ subpixel_offset;
+
+ memcpy(&bits, new_src, size);
+
+ return bits;
+}
+
+static void
+nouveau_exa_wfb_write_memory(void *dst, FbBits value, int size)
+{
+ int i;
+ uint64_t line_x, line_y;
+ unsigned long offset = (unsigned long) dst, subpixel_offset;
+ PixmapPtr pPixmap = NULL;
+ void *new_dst;
+
+ /* Find the right pixmap. */
+ for (i = 0; i < 6; i++)
+ if (offset >= wfb_pixmaps[i].start && offset < wfb_pixmaps[i].end) {
+ pPixmap = wfb_pixmaps[i].ppix;
+ break;
+ }
+
+ if (!pPixmap || !wfb_pixmaps[i].tiled) {
+ nouveau_exa_wfb_write_memory_linear(dst, value, size);
+ return;
+ }
+
+ /* Now comes the decoding. */
+ offset -= (unsigned long) pPixmap->devPrivate.ptr;
+ /* Assuming dword alligned offsets. */
+ subpixel_offset = offset & (wfb_pixmaps[i].cpp - 1);
+ offset &= ~(wfb_pixmaps[i].cpp - 1);
+
+ /* Determine the coordinate first. */
+ /* Division is too expensive for large numbers, so we precalculate a multiplication factor. */
+ line_y = (offset * wfb_pixmaps[i].multiply_factor) >> 32;
+ line_x = offset - line_y * LINEAR_PITCH;
+
+ new_dst = pPixmap->devPrivate.ptr +
+ nouveau_exa_wfb_recurse_offset(0, wfb_pixmaps[i].tile_height, line_x, line_y, wfb_pixmaps[i].num_tiles_width) +
+ subpixel_offset;
+
+ memcpy(new_dst, &value, size);
+}
+
+void
+nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
+ WriteMemoryProcPtr *pWrite,
+ DrawablePtr pDraw)
+{
+ PixmapPtr pPixmap;
+ struct nouveau_pixmap *nvpix;
+
+ if (!pRead || !pWrite)
+ return;
+
+ pPixmap = NVGetDrawablePixmap(pDraw);
+ if (!pPixmap)
+ return;
+
+ nvpix = nouveau_pixmap(pPixmap);
+
+ int i;
+ for (i = 0; i < 6; i++)
+ if (!wfb_pixmaps[i].used)
+ break;
+
+ if (i == 6) {
+ ErrorF("More than 6 wraps are setup, what the hell is going on?\n");
+ *pRead = NULL;
+ *pWrite = NULL;
+ return;
+ }
+
+ /* We will get a pointer, somewhere in the range of this pixmap. */
+ /* Based on linear representation ofcource. */
+ wfb_pixmaps[i].ppix = pPixmap;
+ wfb_pixmaps[i].start = (unsigned long) pPixmap->devPrivate.ptr;
+ if (!nvpix || !nvpix->bo)
+ wfb_pixmaps[i].end = wfb_pixmaps[i].start;
+ else
+ wfb_pixmaps[i].end = wfb_pixmaps[i].start + nvpix->bo->size;
+ wfb_pixmaps[i].used = true;
+ wfb_pixmaps[i].tiled = nouveau_exa_pixmap_is_tiled(pPixmap);
+ /* Division is too expensive for large numbers, so we precalculate a multiplication factor. */
+ wfb_pixmaps[i].multiply_factor = (0xFFFFFFFF/exaGetPixmapPitch(pPixmap)) + 1;
+ wfb_pixmaps[i].cpp = (pPixmap->drawable.bitsPerPixel >> 3);
+ if (!nvpix) {
+ wfb_pixmaps[i].tile_height = 0;
+ wfb_pixmaps[i].num_tiles_width = 0;
+ } else {
+ wfb_pixmaps[i].tile_height = nvpix->tiling_mode + 1;
+ wfb_pixmaps[i].num_tiles_width = LINEAR_PITCH/(1 << tile_pitch[wfb_pixmaps[i].tile_height]);
+ }
+
+ *pRead = nouveau_exa_wfb_read_memory;
+ *pWrite = nouveau_exa_wfb_write_memory;
+}
+
+void
+nouveau_exa_wfb_finish_wrap(DrawablePtr pDraw)
+{
+ PixmapPtr pPixmap;
+ int i;
+
+ pPixmap = NVGetDrawablePixmap(pDraw);
+ if (!pPixmap)
+ return;
+
+ for (i = 0; i < 6; i++)
+ if (wfb_pixmaps[i].ppix == pPixmap) {
+ wfb_pixmaps[i].ppix = NULL;
+ wfb_pixmaps[i].start = 0;
+ wfb_pixmaps[i].end = 0;
+ wfb_pixmaps[i].used = false;
+ wfb_pixmaps[i].tiled = false;
+ wfb_pixmaps[i].multiply_factor = 0;
+ wfb_pixmaps[i].cpp = 0;
+ wfb_pixmaps[i].tile_height = 0;
+ wfb_pixmaps[i].num_tiles_width = 0;
+ break;
+ }
+}
+
+void
+nouveau_exa_wfb_init()
+{
+ int i;
+
+ for (i = 0; i < 6; i++) {
+ wfb_pixmaps[i].ppix = NULL;
+ wfb_pixmaps[i].start = 0;
+ wfb_pixmaps[i].end = 0;
+ wfb_pixmaps[i].used = false;
+ wfb_pixmaps[i].tiled = false;
+ wfb_pixmaps[i].multiply_factor = 0;
+ wfb_pixmaps[i].cpp = 0;
+ wfb_pixmaps[i].tile_height = 0;
+ wfb_pixmaps[i].num_tiles_width = 0;
+ }
+}
diff --git a/src/nv50_exa.c b/src/nv50_exa.c
index 3831ec3..ac6b6b4 100644
--- a/src/nv50_exa.c
+++ b/src/nv50_exa.c
@@ -72,6 +72,31 @@ NV50EXABlendOp[] = {
/* Add */ { 0, 0, BF( ONE), BF( ONE) },
};
+
+uint32_t
+nv50_exa_get_tile_mode(PixmapPtr ppix)
+{
+ struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix);
+
+ if (!nvpix)
+ return 0x00;
+
+ switch (nvpix->tiling_mode) {
+ case 1: /* pitch 32, height 4 */
+ return 0x00;
+ case 2: /* pitch 64, height 8 */
+ return 0x10;
+ case 3: /* pitch 64, height 16 */
+ return 0x20;
+ case 4: /* pitch 64, height 32 */
+ return 0x30;
+ case 5: /* pitch 64, height 64 */
+ return 0x40;
+ default:
+ return 0x00;
+ }
+}
+
static Bool
NV50EXA2DSurfaceFormat(PixmapPtr ppix, uint32_t *fmt)
{
@@ -128,7 +153,7 @@ NV50EXAAcquireSurface2D(PixmapPtr ppix, int is_src)
BEGIN_RING(chan, eng2d, mthd, 5);
OUT_RING (chan, fmt);
OUT_RING (chan, 0);
- OUT_RING (chan, 0);
+ OUT_RING (chan, nv50_exa_get_tile_mode(ppix));
OUT_RING (chan, 1);
OUT_RING (chan, 0);
}
@@ -457,7 +482,7 @@ NV50EXARenderTarget(PixmapPtr ppix, PicturePtr ppict)
OUT_RELOCh(chan, bo, delta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
OUT_RELOCl(chan, bo, delta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
OUT_RING (chan, format);
- OUT_RING (chan, 0);
+ OUT_RING (chan, nv50_exa_get_tile_mode(ppix));
OUT_RING (chan, 0x00000000);
BEGIN_RING(chan, tesla, NV50TCL_RT_HORIZ(0), 2);
OUT_RING (chan, ppix->drawable.width);
@@ -577,7 +602,7 @@ NV50EXATexture(PixmapPtr ppix, PicturePtr ppict, unsigned unit)
NOUVEAU_FALLBACK("invalid picture format, this SHOULD NOT HAPPEN. Expect trouble.\n");
}
OUT_RELOCl(chan, bo, delta, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
- OUT_RING (chan, 0xd0005000);
+ OUT_RING (chan, 0xd0005000 | (nv50_exa_get_tile_mode(ppix) << 18));
OUT_RING (chan, 0x00300000);
OUT_RING (chan, ppix->drawable.width);
OUT_RING (chan, (1 << NV50TIC_0_5_DEPTH_SHIFT) | ppix->drawable.height);
diff --git a/src/nv_driver.c b/src/nv_driver.c
index 92232dd..f10400e 100644
--- a/src/nv_driver.c
+++ b/src/nv_driver.c
@@ -158,6 +158,12 @@ static const char *fbSymbols[] = {
NULL
};
+static const char *wfbSymbols[] = {
+ "wfbPictureInit",
+ "wfbScreenInit",
+ NULL
+};
+
static const char *exaSymbols[] = {
"exaDriverInit",
"exaOffscreenInit",
@@ -279,7 +285,7 @@ nouveauSetup(pointer module, pointer opts, int *errmaj, int *errmin)
* Tell the loader about symbols from other modules that this module
* might refer to.
*/
- LoaderRefSymLists(vgahwSymbols, exaSymbols, fbSymbols,
+ LoaderRefSymLists(vgahwSymbols, exaSymbols, fbSymbols, wfbSymbols,
shadowSymbols, drmSymbols,
i2cSymbols, ddcSymbols, vbeSymbols,
int10Symbols, NULL);
@@ -1524,10 +1530,23 @@ NVPreInit(ScrnInfoPtr pScrn, int flags)
* section.
*/
- if (xf86LoadSubModule(pScrn, "fb") == NULL)
- NVPreInitFail("\n");
+#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,0,0)
+ if (!pNv->NoAccel && pNv->exa_driver_pixmaps && pNv->Architecture == NV_ARCH_50) {
+ pNv->wfb_enabled = true;
+ nouveau_exa_wfb_init();
+ if (xf86LoadSubModule(pScrn, "wfb") == NULL)
+ NVPreInitFail("\n");
- xf86LoaderReqSymLists(fbSymbols, NULL);
+ xf86LoaderReqSymLists(wfbSymbols, NULL);
+ } else
+#endif
+ {
+ pNv->wfb_enabled = false;
+ if (xf86LoadSubModule(pScrn, "fb") == NULL)
+ NVPreInitFail("\n");
+
+ xf86LoaderReqSymLists(fbSymbols, NULL);
+ }
/* Load EXA if needed */
if (!pNv->NoAccel) {
@@ -2130,9 +2149,19 @@ NVScreenInit(int scrnIndex, ScreenPtr pScreen, int argc, char **argv)
switch (pScrn->bitsPerPixel) {
case 16:
case 32:
- ret = fbScreenInit(pScreen, FBStart, pScrn->virtualX, pScrn->virtualY,
- pScrn->xDpi, pScrn->yDpi,
- displayWidth, pScrn->bitsPerPixel);
+#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,0,0)
+ if (pNv->wfb_enabled) {
+ ret = wfbScreenInit(pScreen, FBStart, pScrn->virtualX, pScrn->virtualY,
+ pScrn->xDpi, pScrn->yDpi,
+ displayWidth, pScrn->bitsPerPixel,
+ nouveau_exa_wfb_setup_wrap, nouveau_exa_wfb_finish_wrap);
+ } else
+#endif
+ {
+ ret = fbScreenInit(pScreen, FBStart, pScrn->virtualX, pScrn->virtualY,
+ pScrn->xDpi, pScrn->yDpi,
+ displayWidth, pScrn->bitsPerPixel);
+ }
break;
default:
xf86DrvMsg(scrnIndex, X_ERROR,
@@ -2157,7 +2186,14 @@ NVScreenInit(int scrnIndex, ScreenPtr pScreen, int argc, char **argv)
}
}
- fbPictureInit (pScreen, 0, 0);
+#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,0,0)
+ if (pNv->wfb_enabled) {
+ wfbPictureInit(pScreen, 0, 0);
+ } else
+#endif
+ {
+ fbPictureInit(pScreen, 0, 0);
+ }
xf86SetBlackWhitePixels(pScreen);
diff --git a/src/nv_proto.h b/src/nv_proto.h
index 43fa62e..2ded43d 100644
--- a/src/nv_proto.h
+++ b/src/nv_proto.h
@@ -21,6 +21,10 @@ void NVAccelFree(ScrnInfoPtr pScrn);
/* in nv_driver.c */
Bool NVI2CInit(ScrnInfoPtr pScrn);
+/* We can only include fb.h in normal or wfb mode,
+ * so we have to declare one ourself. */
+extern Bool wfbPictureInit (ScreenPtr pScreen,
+ PictFormatPtr formats, int nformats);
/* in nv_dri.c */
Bool NVDRIScreenInit(ScrnInfoPtr pScrn);
@@ -72,6 +76,11 @@ void NVTakedownDma(ScrnInfoPtr pScrn);
Bool nouveau_exa_init(ScreenPtr pScreen);
Bool nouveau_exa_pixmap_is_onscreen(PixmapPtr pPixmap);
bool nouveau_exa_pixmap_is_tiled(PixmapPtr ppix);
+void nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
+ WriteMemoryProcPtr *pWrite,
+ DrawablePtr pDraw);
+void nouveau_exa_wfb_finish_wrap(DrawablePtr pDraw);
+void nouveau_exa_wfb_init();
/* in nv_hw.c */
void NVCalcStateExt(ScrnInfoPtr,struct nouveau_mode_state *,int,int,int,int,int,int);
@@ -207,6 +216,7 @@ int NV40SetTexturePortAttribute(ScrnInfoPtr, Atom, INT32, pointer);
Bool NVAccelInitNV50TCL(ScrnInfoPtr pScrn);
/* in nv50_exa.c */
+uint32_t nv50_exa_get_tile_mode(PixmapPtr ppix);
Bool NV50EXAPrepareSolid(PixmapPtr, int, Pixel, Pixel);
void NV50EXASolid(PixmapPtr, int, int, int, int);
void NV50EXADoneSolid(PixmapPtr);
diff --git a/src/nv_type.h b/src/nv_type.h
index 2ec4fba..7d34c8b 100644
--- a/src/nv_type.h
+++ b/src/nv_type.h
@@ -301,7 +301,8 @@ typedef struct _NVRec {
uint8_t cur_head;
ExaDriverPtr EXADriverPtr;
- Bool exa_driver_pixmaps;
+ Bool exa_driver_pixmaps;
+ bool wfb_enabled;
ScreenBlockHandlerProcPtr BlockHandler;
CloseScreenProcPtr CloseScreen;
/* Cursor */
@@ -463,6 +464,7 @@ struct nouveau_pixmap {
void *linear;
unsigned size;
int map_refcount;
+ int tiling_mode;
};
static inline struct nouveau_pixmap *
--
1.6.2.2
[-- Attachment #3: 0002-exa-smarter-initial-mapping-of-driver-allocated-pix.patch --]
[-- Type: text/x-patch, Size: 8940 bytes --]
From 34e967c0fe675f3943d7ecd5ff4321919d8c78bf Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <madman2003@gmail.com>
Date: Fri, 13 Mar 2009 09:57:12 +0100
Subject: [PATCH 2/8] exa: smarter initial mapping of driver allocated pixmaps
- Use a software copy until the first accelerated op, then UTS it.
- The path back does not exist.
---
src/nouveau_exa.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++-----
src/nv04_exa.c | 5 ++++
src/nv10_exa.c | 5 ++++
src/nv30_exa.c | 5 ++++
src/nv40_exa.c | 5 ++++
src/nv50_exa.c | 10 ++++++++
src/nv_proto.h | 1 +
src/nv_type.h | 1 +
8 files changed, 89 insertions(+), 7 deletions(-)
diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c
index 74804ec..72e783e 100644
--- a/src/nouveau_exa.c
+++ b/src/nouveau_exa.c
@@ -328,6 +328,9 @@ nouveau_exa_destroy_pixmap(ScreenPtr pScreen, void *priv)
if (!nvpix)
return;
+ if (nvpix->linear)
+ xfree(nvpix->linear);
+
nouveau_bo_ref(NULL, &nvpix->bo);
xfree(nvpix);
}
@@ -422,6 +425,39 @@ nouveau_exa_pixmap_is_tiled(PixmapPtr ppix)
return true;
}
+void
+nouveau_exa_pixmap_prepare_for_accel(PixmapPtr ppix)
+{
+ ScrnInfoPtr pScrn = xf86Screens[ppix->drawable.pScreen->myNum];
+ NVPtr pNv = NVPTR(pScrn);
+ struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix);
+
+ if (!pNv->exa_driver_pixmaps)
+ return;
+
+ if (nvpix->linear) {
+ int cpp = ppix->drawable.bitsPerPixel >> 3;
+ if ((ppix->drawable.width * ppix->drawable.height * cpp) < 16*1024) {
+ if (pNv->Architecture == NV_ARCH_50)
+ NV50EXAUploadSIFC(nvpix->linear, ppix->devKind, ppix, 0, 0, ppix->drawable.width,
+ ppix->drawable.height, cpp);
+ else
+ NV04EXAUploadIFC(pScrn, nvpix->linear, ppix->devKind, ppix, 0, 0, ppix->drawable.width,
+ ppix->drawable.height, cpp);
+ exaMarkSync(ppix->drawable.pScreen);
+ } else {
+ NVAccelUploadM2MF(ppix, 0, 0, ppix->drawable.width,
+ ppix->drawable.height, nvpix->linear,
+ ppix->devKind);
+ }
+
+ xfree(nvpix->linear);
+ nvpix->linear = NULL;
+ }
+
+ nvpix->dirty = true;
+}
+
static void *
nouveau_exa_pixmap_map(PixmapPtr ppix)
{
@@ -429,15 +465,26 @@ nouveau_exa_pixmap_map(PixmapPtr ppix)
NVPtr pNv = NVPTR(pScrn);
struct nouveau_bo *bo = nouveau_pixmap_bo(ppix);
unsigned delta = nouveau_pixmap_offset(ppix);
+ struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix);
- if (!pNv->wfb_enabled && bo->tiled) {
- struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix);
+ /* Attempt to handle first access with more grace.
+ * This handles multiple sw accesses (such as trapezoid rasterisation).
+ * This avoids the first access on potentially uncached memory.
+ */
+ if (!nvpix->dirty) {
+ if (!nvpix->linear)
+ nvpix->linear = xalloc(ppix->devKind * ppix->drawable.height);
+ nouveau_bo_map(bo, NOUVEAU_BO_RDWR);
+ return nvpix->linear;
+ }
+
+ if (!pNv->wfb_enabled && bo->tiled && nvpix && nvpix->dirty) {
nvpix->map_refcount++;
if (nvpix->linear)
return nvpix->linear;
- nvpix->linear = xcalloc(1, ppix->devKind * ppix->drawable.height);
+ nvpix->linear = xalloc(ppix->devKind * ppix->drawable.height);
NVAccelDownloadM2MF(ppix, 0, 0, ppix->drawable.width,
ppix->drawable.height, nvpix->linear,
@@ -457,10 +504,9 @@ nouveau_exa_pixmap_unmap(PixmapPtr ppix)
ScrnInfoPtr pScrn = xf86Screens[ppix->drawable.pScreen->myNum];
NVPtr pNv = NVPTR(pScrn);
struct nouveau_bo *bo = nouveau_pixmap_bo(ppix);
+ struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix);
- if (!pNv->wfb_enabled && bo->tiled) {
- struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix);
-
+ if (!pNv->wfb_enabled && bo->tiled && nvpix && nvpix->dirty) {
if (--nvpix->map_refcount)
return;
@@ -489,6 +535,8 @@ nouveau_exa_download_from_screen(PixmapPtr pspix, int x, int y, int w, int h,
cpp = pspix->drawable.bitsPerPixel >> 3;
offset = (y * src_pitch) + (x * cpp);
+ nouveau_exa_pixmap_prepare_for_accel(pspix);
+
if (pNv->GART) {
if (NVAccelDownloadM2MF(pspix, x, y, w, h, dst, dst_pitch))
return TRUE;
@@ -516,6 +564,8 @@ nouveau_exa_upload_to_screen(PixmapPtr pdpix, int x, int y, int w, int h,
dst_pitch = exaGetPixmapPitch(pdpix);
cpp = pdpix->drawable.bitsPerPixel >> 3;
+ nouveau_exa_pixmap_prepare_for_accel(pdpix);
+
/* try hostdata transfer */
if (w * h * cpp < 16*1024) /* heuristic */
{
@@ -886,7 +936,7 @@ nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
else
wfb_pixmaps[i].end = wfb_pixmaps[i].start + nvpix->bo->size;
wfb_pixmaps[i].used = true;
- wfb_pixmaps[i].tiled = nouveau_exa_pixmap_is_tiled(pPixmap);
+ wfb_pixmaps[i].tiled = nouveau_exa_pixmap_is_tiled(pPixmap) && !nvpix->linear;
/* Division is too expensive for large numbers, so we precalculate a multiplication factor. */
wfb_pixmaps[i].multiply_factor = (0xFFFFFFFF/exaGetPixmapPitch(pPixmap)) + 1;
wfb_pixmaps[i].cpp = (pPixmap->drawable.bitsPerPixel >> 3);
diff --git a/src/nv04_exa.c b/src/nv04_exa.c
index de5da67..a7e9b16 100644
--- a/src/nv04_exa.c
+++ b/src/nv04_exa.c
@@ -83,6 +83,8 @@ NV04EXAPrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg)
unsigned delta = nouveau_pixmap_offset(pPixmap);
unsigned int fmt, pitch, color;
+ nouveau_exa_pixmap_prepare_for_accel(pPixmap);
+
WAIT_RING(chan, 64);
planemask |= ~0 << pPixmap->drawable.bitsPerPixel;
@@ -190,6 +192,9 @@ NV04EXAPrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, int dx, int dy,
unsigned dst_delta = nouveau_pixmap_offset(pDstPixmap);
int fmt;
+ nouveau_exa_pixmap_prepare_for_accel(pSrcPixmap);
+ nouveau_exa_pixmap_prepare_for_accel(pDstPixmap);
+
WAIT_RING(chan, 64);
if (pSrcPixmap->drawable.bitsPerPixel !=
diff --git a/src/nv10_exa.c b/src/nv10_exa.c
index 291c2da..bef7d40 100644
--- a/src/nv10_exa.c
+++ b/src/nv10_exa.c
@@ -626,6 +626,11 @@ Bool NV10EXAPrepareComposite(int op,
NVPtr pNv = NVPTR(pScrn);
struct nouveau_channel *chan = pNv->chan;
+ nouveau_exa_pixmap_prepare_for_accel(pSrc);
+ if (pMask)
+ nouveau_exa_pixmap_prepare_for_accel(pMask);
+ nouveau_exa_pixmap_prepare_for_accel(pDst);
+
WAIT_RING(chan, 128);
if (NV10Check_A8plusA8_Feasability(pSrcPicture,pMaskPicture,pDstPicture,op))
diff --git a/src/nv30_exa.c b/src/nv30_exa.c
index d3f83d3..58e5f27 100644
--- a/src/nv30_exa.c
+++ b/src/nv30_exa.c
@@ -447,6 +447,11 @@ NV30EXAPrepareComposite(int op, PicturePtr psPict,
int fpid = NV30EXA_FPID_PASS_COL0;
NV30EXA_STATE;
+ nouveau_exa_pixmap_prepare_for_accel(psPix);
+ if (pmPix)
+ nouveau_exa_pixmap_prepare_for_accel(pmPix);
+ nouveau_exa_pixmap_prepare_for_accel(pdPix);
+
WAIT_RING(chan, 128);
blend = NV30_GetPictOpRec(op);
diff --git a/src/nv40_exa.c b/src/nv40_exa.c
index 68da331..d10d93b 100644
--- a/src/nv40_exa.c
+++ b/src/nv40_exa.c
@@ -406,6 +406,11 @@ NV40EXAPrepareComposite(int op, PicturePtr psPict,
int fpid = NV40EXA_FPID_PASS_COL0;
NV40EXA_STATE;
+ nouveau_exa_pixmap_prepare_for_accel(psPix);
+ if (pmPix)
+ nouveau_exa_pixmap_prepare_for_accel(pmPix);
+ nouveau_exa_pixmap_prepare_for_accel(pdPix);
+
WAIT_RING(chan, 128);
blend = NV40_GetPictOpRec(op);
diff --git a/src/nv50_exa.c b/src/nv50_exa.c
index ac6b6b4..8b3dab7 100644
--- a/src/nv50_exa.c
+++ b/src/nv50_exa.c
@@ -250,6 +250,8 @@ NV50EXAPrepareSolid(PixmapPtr pdpix, int alu, Pixel planemask, Pixel fg)
NV50EXA_LOCALS(pdpix);
uint32_t fmt;
+ nouveau_exa_pixmap_prepare_for_accel(pdpix);
+
WAIT_RING(chan, 64);
if (!NV50EXA2DSurfaceFormat(pdpix, &fmt))
@@ -311,6 +313,9 @@ NV50EXAPrepareCopy(PixmapPtr pspix, PixmapPtr pdpix, int dx, int dy,
{
NV50EXA_LOCALS(pdpix);
+ nouveau_exa_pixmap_prepare_for_accel(pspix);
+ nouveau_exa_pixmap_prepare_for_accel(pdpix);
+
WAIT_RING(chan, 64);
if (!NV50EXAAcquireSurface2D(pspix, 1))
@@ -769,6 +774,11 @@ NV50EXAPrepareComposite(int op,
NV50EXA_LOCALS(pspix);
const unsigned shd_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
+ nouveau_exa_pixmap_prepare_for_accel(pspix);
+ if (pmpix)
+ nouveau_exa_pixmap_prepare_for_accel(pmpix);
+ nouveau_exa_pixmap_prepare_for_accel(pdpix);
+
WAIT_RING (chan, 128);
BEGIN_RING(chan, eng2d, 0x0110, 1);
OUT_RING (chan, 0);
diff --git a/src/nv_proto.h b/src/nv_proto.h
index 2ded43d..b9fca74 100644
--- a/src/nv_proto.h
+++ b/src/nv_proto.h
@@ -76,6 +76,7 @@ void NVTakedownDma(ScrnInfoPtr pScrn);
Bool nouveau_exa_init(ScreenPtr pScreen);
Bool nouveau_exa_pixmap_is_onscreen(PixmapPtr pPixmap);
bool nouveau_exa_pixmap_is_tiled(PixmapPtr ppix);
+void nouveau_exa_pixmap_prepare_for_accel(PixmapPtr ppix);
void nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
WriteMemoryProcPtr *pWrite,
DrawablePtr pDraw);
diff --git a/src/nv_type.h b/src/nv_type.h
index 7d34c8b..9b336a1 100644
--- a/src/nv_type.h
+++ b/src/nv_type.h
@@ -465,6 +465,7 @@ struct nouveau_pixmap {
unsigned size;
int map_refcount;
int tiling_mode;
+ bool dirty;
};
static inline struct nouveau_pixmap *
--
1.6.2.2
[-- Attachment #4: 0003-nv50-support-NV9X-hw-with-wfb.patch --]
[-- Type: text/x-patch, Size: 5081 bytes --]
From 3ed01bedd6d7fcb6e5493b17ff8a5d5bd3f23321 Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <madman2003@gmail.com>
Date: Sat, 14 Mar 2009 18:07:31 +0100
Subject: [PATCH 3/8] nv50: support NV9X hw with wfb
- NVAX hardware seems to have the NV5X and NV8X beheaviour (based on one sample).
---
src/nouveau_exa.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++----
1 files changed, 46 insertions(+), 4 deletions(-)
diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c
index 72e783e..b8bbb53 100644
--- a/src/nouveau_exa.c
+++ b/src/nouveau_exa.c
@@ -385,6 +385,12 @@ nouveau_exa_modify_pixmap_header(PixmapPtr ppix, int width, int height,
/* This allignment is very important. */
devkind = (width * cpp + 63) & ~63;
nvpix->size = devkind * ah;
+
+ /* A 128 bytes block is potentially moved 6 positions ahead.
+ * It would be very difficult to predict this, so we overallocate.
+ */
+ if (pNv->NVArch >= 0x90 && pNv->NVArch < 0xA0)
+ nvpix->size += 768;
}
ret = nouveau_bo_new(pNv->dev, flags, 0, nvpix->size,
@@ -800,21 +806,51 @@ const unsigned int mask_pitch_inv[] = {~0, ~7, ~31, ~63, ~63, ~63, ~63 };
const unsigned int mask_height[] = {0, 1, 3, 7, 15, 31, 63 };
const unsigned int mask_pitch[] = {0, 7, 31, 63, 63, 63, 63 };
+/* This applies to nv9X hw. They do strange memory rearranging. */
+/* 128 bytes blocks (which correspond to 32x4 tiles) are moved forward and backward in a special pattern. */
+static bool nv90_mode;
+const int pattern[] = { 0, 1, 2, 3, /**/ 0, 1, 2, 3, /**/ 1, 2, 3, /**/ 0, 2, 3, /**/
+ 0, 1, 3, /**/ 0, 1, 2, /**/ 1, 2, 3, /**/ 0, 2, 3, /**/ 0, 1, 3, /**/ 0, 1, 2 /**/};
+const int tile_offset[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, /* type 0 */
+ 2, 2, 2, 2, 2, 2, -6, -6, /* type 1 */
+ 4, 4, 4, 4, -4, -4, -4, -4, /* type 2 */
+ 6, 6, -2, -2, -2, -2, -2, -2 /* type 3 */
+ };
+
#define X_REMAINDER (x & mask_pitch[tile_height])
#define Y_REMAINDER (y & mask_height[tile_height])
/* tile_height and tile_pitch are expressed in powers of two */
static inline unsigned int
nouveau_exa_wfb_recurse_offset(unsigned int offset, unsigned int tile_height,
- unsigned int x, unsigned int y, unsigned int num_tiles_width)
+ unsigned int x, unsigned int y, unsigned int num_tiles_width, bool first)
{
offset += (((x & mask_pitch_inv[tile_height]) >> tile_pitch[tile_height]) + (((y & mask_height_inv[tile_height]) >> tile_height) * num_tiles_width)) * (1 << (tile_height + tile_pitch[tile_height]));
if (tile_height > 1)
- offset = nouveau_exa_wfb_recurse_offset(offset, tile_height - 1, X_REMAINDER, Y_REMAINDER, num_tiles[tile_height]);
+ offset = nouveau_exa_wfb_recurse_offset(offset, tile_height - 1, X_REMAINDER, Y_REMAINDER, num_tiles[tile_height], false);
else
offset += (Y_REMAINDER * (1 << tile_pitch[tile_height])) + X_REMAINDER;
+ /* Here comes the correction for the unusual memory mapping of NV9X hw. */
+ if (first && nv90_mode) {
+ unsigned int suboffset, suboffset2, suboffset3;
+
+ /* 128 byte blocks within a larger block of 32768 bytes */
+ /* the first block misses the first "0, 1, 2, 3" sequence, so we add an extra 4096 bytes offset. */
+ suboffset = ((offset + 4096) & 0x7FFF) & ~0x7F;
+ /* now we have 256 blocks */
+ suboffset >>= 7;
+
+ /* now we have 32 rows */
+ suboffset2 = suboffset >> 3;
+ /* tile within row */
+ suboffset3 = suboffset & 0x7;
+
+ offset += tile_offset[suboffset3 + 8*pattern[suboffset2]] * 128;
+ }
+
return offset;
}
@@ -851,7 +887,7 @@ nouveau_exa_wfb_read_memory(const void *src, int size)
line_x = offset - line_y * LINEAR_PITCH;
new_src = pPixmap->devPrivate.ptr +
- nouveau_exa_wfb_recurse_offset(0, wfb_pixmaps[i].tile_height, line_x, line_y, wfb_pixmaps[i].num_tiles_width) +
+ nouveau_exa_wfb_recurse_offset(0, wfb_pixmaps[i].tile_height, line_x, line_y, wfb_pixmaps[i].num_tiles_width, true) +
subpixel_offset;
memcpy(&bits, new_src, size);
@@ -892,7 +928,7 @@ nouveau_exa_wfb_write_memory(void *dst, FbBits value, int size)
line_x = offset - line_y * LINEAR_PITCH;
new_dst = pPixmap->devPrivate.ptr +
- nouveau_exa_wfb_recurse_offset(0, wfb_pixmaps[i].tile_height, line_x, line_y, wfb_pixmaps[i].num_tiles_width) +
+ nouveau_exa_wfb_recurse_offset(0, wfb_pixmaps[i].tile_height, line_x, line_y, wfb_pixmaps[i].num_tiles_width, true) +
subpixel_offset;
memcpy(new_dst, &value, size);
@@ -905,6 +941,8 @@ nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
{
PixmapPtr pPixmap;
struct nouveau_pixmap *nvpix;
+ ScrnInfoPtr pScrn = xf86Screens[pDraw->pScreen->myNum];
+ NVPtr pNv = NVPTR(pScrn);
if (!pRead || !pWrite)
return;
@@ -947,6 +985,10 @@ nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
wfb_pixmaps[i].tile_height = nvpix->tiling_mode + 1;
wfb_pixmaps[i].num_tiles_width = LINEAR_PITCH/(1 << tile_pitch[wfb_pixmaps[i].tile_height]);
}
+ if (pNv->NVArch >= 0x90 && pNv->NVArch < 0xA0)
+ nv90_mode = true;
+ else
+ nv90_mode = false;
*pRead = nouveau_exa_wfb_read_memory;
*pWrite = nouveau_exa_wfb_write_memory;
--
1.6.2.2
[-- Attachment #5: 0004-exa-don-t-put-cpp-0-pixmaps-in-vram.patch --]
[-- Type: text/x-patch, Size: 886 bytes --]
From 43c489e315fc0169d5595578bd529753c9b9214b Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <madman2003@gmail.com>
Date: Sat, 14 Mar 2009 19:38:09 +0100
Subject: [PATCH 4/8] exa: don't put cpp == 0 pixmaps in vram
---
src/nouveau_exa.c | 6 +++++-
1 files changed, 5 insertions(+), 1 deletions(-)
diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c
index b8bbb53..4cbcf50 100644
--- a/src/nouveau_exa.c
+++ b/src/nouveau_exa.c
@@ -358,9 +358,13 @@ nouveau_exa_modify_pixmap_header(PixmapPtr ppix, int width, int height,
if (!nvpix->bo && nvpix->size) {
uint32_t cpp = ppix->drawable.bitsPerPixel >> 3;
- uint32_t flags = NOUVEAU_BO_VRAM;
+ uint32_t flags = 0;
int ret;
+ /* Let's not waste vram on useless pixmaps. */
+ if (cpp)
+ flags |= NOUVEAU_BO_VRAM;
+
if (pNv->Architecture >= NV_ARCH_50 && cpp) {
uint32_t ah;
if (height > 47) {
--
1.6.2.2
[-- Attachment #6: 0005-xv-some-fixes.patch --]
[-- Type: text/x-patch, Size: 2055 bytes --]
From c3cd97c3a3ab018938085cfb67832c2b670d0667 Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <madman2003@gmail.com>
Date: Sat, 14 Mar 2009 23:26:46 +0100
Subject: [PATCH 5/8] xv: some fixes
---
src/nouveau_xv.c | 24 +++++++++++++-----------
src/nv50_xv.c | 2 +-
2 files changed, 14 insertions(+), 12 deletions(-)
diff --git a/src/nouveau_xv.c b/src/nouveau_xv.c
index 1a39c09..c99e1e8 100644
--- a/src/nouveau_xv.c
+++ b/src/nouveau_xv.c
@@ -718,10 +718,8 @@ NV_calculate_pitches_and_mem_size(NVPtr pNv, int action_flags, int *srcPitch,
{
int tmp;
- if (pNv->Architecture >= NV_ARCH_50) {
- npixels = (npixels + 7) & ~7;
- nlines = (nlines + 7) & ~7;
- }
+ if (pNv->Architecture >= NV_ARCH_50)
+ nlines = (nlines + 3) & ~3;
if (action_flags & IS_YV12) {
*srcPitch = (width + 3) & ~3; /* of luma */
@@ -1252,13 +1250,17 @@ CPU_copy:
exaMoveInPixmap(ppix);
/* check if it made it offscreen */
-#if NOUVEAU_EXA_PIXMAPS
- if (!pNv->EXADriverPtr->PixmapIsOffscreen(ppix))
-#else
- if (exaGetPixmapOffset(ppix) >= pNv->EXADriverPtr->memorySize)
-#endif
- /* we lost, insufficient space probably */
- return BadAlloc;
+ if (pNv->EXADriverPtr->PixmapIsOffscreen) {
+ if (!pNv->EXADriverPtr->PixmapIsOffscreen(ppix)) {
+ /* we lost, insufficient space probably */
+ return BadAlloc;
+ }
+ } else {
+ if (exaGetPixmapOffset(ppix) >= pNv->EXADriverPtr->memorySize) {
+ /* we lost, insufficient space probably */
+ return BadAlloc;
+ }
+ }
ExaOffscreenMarkUsed(ppix);
diff --git a/src/nv50_xv.c b/src/nv50_xv.c
index 9601326..df35b1a 100644
--- a/src/nv50_xv.c
+++ b/src/nv50_xv.c
@@ -76,7 +76,7 @@ nv50_xv_state_emit(PixmapPtr ppix, int id, struct nouveau_bo *src,
case 24: OUT_RING (chan, NV50TCL_RT_FORMAT_24BPP); break;
case 16: OUT_RING (chan, NV50TCL_RT_FORMAT_16BPP); break;
}
- OUT_RING (chan, 0);
+ OUT_RING (chan, nv50_exa_get_tile_mode(ppix));
OUT_RING (chan, 0);
BEGIN_RING(chan, tesla, NV50TCL_RT_HORIZ(0), 2);
OUT_RING (chan, ppix->drawable.width);
--
1.6.2.2
[-- Attachment #7: 0006-NV50-add-NV84-wfb-support.patch --]
[-- Type: text/x-patch, Size: 5875 bytes --]
From 0171e3254516428a6904ad5a9467ea11f125f995 Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <madman2003@gmail.com>
Date: Wed, 18 Mar 2009 09:36:51 +0100
Subject: [PATCH 6/8] NV50: add NV84 wfb support
---
src/nouveau_exa.c | 58 +++++++++++++++++++++++++++++++++++++++-------------
src/nv_driver.c | 2 +-
src/nv_proto.h | 2 +-
3 files changed, 45 insertions(+), 17 deletions(-)
diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c
index 4cbcf50..43d9945 100644
--- a/src/nouveau_exa.c
+++ b/src/nouveau_exa.c
@@ -390,11 +390,13 @@ nouveau_exa_modify_pixmap_header(PixmapPtr ppix, int width, int height,
devkind = (width * cpp + 63) & ~63;
nvpix->size = devkind * ah;
- /* A 128 bytes block is potentially moved 6 positions ahead.
+ /* A 128 bytes block is potentially moved 6/2 positions ahead.
* It would be very difficult to predict this, so we overallocate.
*/
if (pNv->NVArch >= 0x90 && pNv->NVArch < 0xA0)
nvpix->size += 768;
+ else if (pNv->NVArch == 0x84)
+ nvpix->size += 256;
}
ret = nouveau_bo_new(pNv->dev, flags, 0, nvpix->size,
@@ -810,16 +812,20 @@ const unsigned int mask_pitch_inv[] = {~0, ~7, ~31, ~63, ~63, ~63, ~63 };
const unsigned int mask_height[] = {0, 1, 3, 7, 15, 31, 63 };
const unsigned int mask_pitch[] = {0, 7, 31, 63, 63, 63, 63 };
-/* This applies to nv9X hw. They do strange memory rearranging. */
+/* Some cards do strange memory rearrangement. */
/* 128 bytes blocks (which correspond to 32x4 tiles) are moved forward and backward in a special pattern. */
-static bool nv90_mode;
-const int pattern[] = { 0, 1, 2, 3, /**/ 0, 1, 2, 3, /**/ 1, 2, 3, /**/ 0, 2, 3, /**/
+static bool nv9x_mode;
+static bool nv84_mode;
+const int pattern_nv9x[] = { 0, 1, 2, 3, /**/ 0, 1, 2, 3, /**/ 1, 2, 3, /**/ 0, 2, 3, /**/
0, 1, 3, /**/ 0, 1, 2, /**/ 1, 2, 3, /**/ 0, 2, 3, /**/ 0, 1, 3, /**/ 0, 1, 2 /**/};
+const int pattern_nv84[] = { 4, /**/ 5, 5, /**/ 4, 5, /**/ 4, 4, /**/ 5, 5, /**/ 4, 4, /**/ 5, 4, /**/ 5, 5, /**/ 4 };
const int tile_offset[] = {
0, 0, 0, 0, 0, 0, 0, 0, /* type 0 */
2, 2, 2, 2, 2, 2, -6, -6, /* type 1 */
4, 4, 4, 4, -4, -4, -4, -4, /* type 2 */
- 6, 6, -2, -2, -2, -2, -2, -2 /* type 3 */
+ 6, 6, -2, -2, -2, -2, -2, -2, /* type 3 */
+ 0, 0, 0, 0, 2, 2, -2, -2, /* type 4 */
+ 2, 2, -2, -2, 0, 0, 0, 0, /* type 5 */
};
#define X_REMAINDER (x & mask_pitch[tile_height])
@@ -837,8 +843,24 @@ nouveau_exa_wfb_recurse_offset(unsigned int offset, unsigned int tile_height,
else
offset += (Y_REMAINDER * (1 << tile_pitch[tile_height])) + X_REMAINDER;
- /* Here comes the correction for the unusual memory mapping of NV9X hw. */
- if (first && nv90_mode) {
+ /* Here comes the correction for the unusual memory mapping of some hw. */
+ if (first && nv84_mode) {
+ unsigned int suboffset, suboffset2, suboffset3;
+
+ /* 128 byte blocks within a larger block of 16384 bytes */
+ suboffset = (offset & 0x3FFF) & ~0x7F;
+ /* now we have 128 blocks */
+ suboffset >>= 7;
+
+ /* now we have 16 rows */
+ suboffset2 = suboffset >> 3;
+ /* tile within row */
+ suboffset3 = suboffset & 0x7;
+
+ offset += tile_offset[suboffset3 + 8*pattern_nv84[suboffset2]] * 128;
+ }
+
+ if (first && nv9x_mode) {
unsigned int suboffset, suboffset2, suboffset3;
/* 128 byte blocks within a larger block of 32768 bytes */
@@ -852,7 +874,7 @@ nouveau_exa_wfb_recurse_offset(unsigned int offset, unsigned int tile_height,
/* tile within row */
suboffset3 = suboffset & 0x7;
- offset += tile_offset[suboffset3 + 8*pattern[suboffset2]] * 128;
+ offset += tile_offset[suboffset3 + 8*pattern_nv9x[suboffset2]] * 128;
}
return offset;
@@ -945,8 +967,6 @@ nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
{
PixmapPtr pPixmap;
struct nouveau_pixmap *nvpix;
- ScrnInfoPtr pScrn = xf86Screens[pDraw->pScreen->myNum];
- NVPtr pNv = NVPTR(pScrn);
if (!pRead || !pWrite)
return;
@@ -989,10 +1009,6 @@ nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
wfb_pixmaps[i].tile_height = nvpix->tiling_mode + 1;
wfb_pixmaps[i].num_tiles_width = LINEAR_PITCH/(1 << tile_pitch[wfb_pixmaps[i].tile_height]);
}
- if (pNv->NVArch >= 0x90 && pNv->NVArch < 0xA0)
- nv90_mode = true;
- else
- nv90_mode = false;
*pRead = nouveau_exa_wfb_read_memory;
*pWrite = nouveau_exa_wfb_write_memory;
@@ -1024,10 +1040,22 @@ nouveau_exa_wfb_finish_wrap(DrawablePtr pDraw)
}
void
-nouveau_exa_wfb_init()
+nouveau_exa_wfb_init(ScrnInfoPtr pScrn)
{
+ NVPtr pNv = NVPTR(pScrn);
int i;
+ if (pNv->NVArch >= 0x90 && pNv->NVArch < 0xA0) {
+ nv9x_mode = true;
+ nv84_mode = false;
+ } else if (pNv->NVArch == 0x84) {
+ nv9x_mode = false;
+ nv84_mode = true;
+ } else {
+ nv9x_mode = false;
+ nv84_mode = false;
+ }
+
for (i = 0; i < 6; i++) {
wfb_pixmaps[i].ppix = NULL;
wfb_pixmaps[i].start = 0;
diff --git a/src/nv_driver.c b/src/nv_driver.c
index f10400e..6aa31c0 100644
--- a/src/nv_driver.c
+++ b/src/nv_driver.c
@@ -1533,7 +1533,7 @@ NVPreInit(ScrnInfoPtr pScrn, int flags)
#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,0,0)
if (!pNv->NoAccel && pNv->exa_driver_pixmaps && pNv->Architecture == NV_ARCH_50) {
pNv->wfb_enabled = true;
- nouveau_exa_wfb_init();
+ nouveau_exa_wfb_init(pScrn);
if (xf86LoadSubModule(pScrn, "wfb") == NULL)
NVPreInitFail("\n");
diff --git a/src/nv_proto.h b/src/nv_proto.h
index b9fca74..1c93a4b 100644
--- a/src/nv_proto.h
+++ b/src/nv_proto.h
@@ -81,7 +81,7 @@ void nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
WriteMemoryProcPtr *pWrite,
DrawablePtr pDraw);
void nouveau_exa_wfb_finish_wrap(DrawablePtr pDraw);
-void nouveau_exa_wfb_init();
+void nouveau_exa_wfb_init(ScrnInfoPtr pScrn);
/* in nv_hw.c */
void NVCalcStateExt(ScrnInfoPtr,struct nouveau_mode_state *,int,int,int,int,int,int);
--
1.6.2.2
[-- Attachment #8: 0007-exa-failing-to-create-a-pixmap-is-fatal.patch --]
[-- Type: text/x-patch, Size: 931 bytes --]
From df101754213904c6f1889fe7c7f3739f8890617b Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <madman2003@gmail.com>
Date: Mon, 6 Apr 2009 00:18:04 +0200
Subject: [PATCH 7/8] exa: failing to create a pixmap is fatal
---
src/nouveau_exa.c | 7 ++-----
1 files changed, 2 insertions(+), 5 deletions(-)
diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c
index 43d9945..dc6bd62 100644
--- a/src/nouveau_exa.c
+++ b/src/nouveau_exa.c
@@ -401,11 +401,8 @@ nouveau_exa_modify_pixmap_header(PixmapPtr ppix, int width, int height,
ret = nouveau_bo_new(pNv->dev, flags, 0, nvpix->size,
&nvpix->bo);
- if (ret) {
- xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
- "Failed pixmap creation: %d\n", ret);
- return FALSE;
- }
+ if (ret)
+ FatalError("Failed pixmap creation: %d\n", ret);
/* We don't want devPrivate.ptr set at all. */
miModifyPixmapHeader(ppix, width, height, depth, bpp, devkind,
--
1.6.2.2
[-- Attachment #9: 0008-exa-avoid-overhead-when-there-are-no-tiled-pixmaps.patch --]
[-- Type: text/x-patch, Size: 1092 bytes --]
From da450f5a73fc35e796b26ef489a5c86ca63e2fca Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <madman2003@gmail.com>
Date: Thu, 9 Apr 2009 19:51:27 +0200
Subject: [PATCH 8/8] exa: avoid overhead when there are no tiled pixmaps wrapped
---
src/nouveau_exa.c | 14 ++++++++++++--
1 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c
index dc6bd62..d4a18df 100644
--- a/src/nouveau_exa.c
+++ b/src/nouveau_exa.c
@@ -1007,8 +1007,18 @@ nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
wfb_pixmaps[i].num_tiles_width = LINEAR_PITCH/(1 << tile_pitch[wfb_pixmaps[i].tile_height]);
}
- *pRead = nouveau_exa_wfb_read_memory;
- *pWrite = nouveau_exa_wfb_write_memory;
+ for (i = 0; i < 6; i++)
+ if (wfb_pixmaps[i].used && wfb_pixmaps[i].tiled)
+ break;
+
+ /* Do we have a tiled pixmap pending? */
+ if (i < 6) {
+ *pRead = nouveau_exa_wfb_read_memory;
+ *pWrite = nouveau_exa_wfb_write_memory;
+ } else {
+ *pRead = nouveau_exa_wfb_read_memory_linear;
+ *pWrite = nouveau_exa_wfb_write_memory_linear;
+ }
}
void
--
1.6.2.2
[-- Attachment #10: Type: text/plain, Size: 181 bytes --]
_______________________________________________
Nouveau mailing list
Nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
http://lists.freedesktop.org/mailman/listinfo/nouveau
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: nv50: wfb patches
[not found] ` <6d4bc9fc0904101231l4373cc92h1f32b93dec9cc8b8-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2009-04-12 22:25 ` Hervé Cauwelier
[not found] ` <49E26A53.6010004-GANU6spQydw@public.gmane.org>
0 siblings, 1 reply; 6+ messages in thread
From: Hervé Cauwelier @ 2009-04-12 22:25 UTC (permalink / raw)
To: Maarten Maathuis
Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
Is there some option to pass? I have xserver 1.6, drm 2.4.9 and nouveau
master with your patches, but I see no progress in gnome-terminal
refresh when maximising the window or switching desktops. It still takes
like a couple of seconds where the CPU is at 100 %.
Some report from dmesg:
[drm] Initialized drm 1.1.0 20060810
nouveau 0000:01:00.0: PCI INT A -> GSI 16 (level, low) -> IRQ 16
nouveau 0000:01:00.0: setting latency timer to 64
[drm] Detected an NV50 generation card (0x086700a2)
[drm] Initialized nouveau 0.0.12 libdrm-2.4.9 on minor 0
[drm] Allocating FIFO number 1
[drm] nouveau_fifo_alloc: initialised FIFO 1
[drm] Allocating FIFO number 2
[drm] nouveau_fifo_alloc: initialised FIFO 2
And Xorg.0.log: http://pastebin.ca/1390029
Regards
Maarten Maathuis a écrit :
> Updated patches, with one performance improvement. Not checking for
> tiled pixmaps when all pending pixmaps are linear. This reduces the
> overhead to the point that you are doing small memcopies instead of a
> large one.
>
> I think i solved the XSHM problem, simply by accelerating it
> (http://cgit.freedesktop.org/xorg/xserver/commit/?id=1b5758bef0840c6614244e321790231b3c9477c9).
>
> I'll be fading to the background for a few months pretty soon, so
> questions that involve access to my main machine (running nouveau,
> xorg-git, etc) should be asked soon (less than a week).
>
> Maarten.
>
>
> ------------------------------------------------------------------------
>
> _______________________________________________
> Nouveau mailing list
> Nouveau@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/nouveau
_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: nv50: wfb patches
[not found] ` <49E26A53.6010004-GANU6spQydw@public.gmane.org>
@ 2009-04-12 22:39 ` Maarten Maathuis
0 siblings, 0 replies; 6+ messages in thread
From: Maarten Maathuis @ 2009-04-12 22:39 UTC (permalink / raw)
To: Hervé Cauwelier
Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
Two things:
- wfb is not supposed to solve that problem, it's only supposed to
solve the problem of software access to tiled pixmaps
- wfb+exa requires something newer than xorg-server-1.6.0
The problem of getting the hw to do scanout from tiled memory still
exists (darktama tried to replicate what the blob does without success
as far as i know). The only workaround to it is some kind of shadow
fb, the latter is more of a political issue.
Maarten.
2009/4/13 Hervé Cauwelier <herve.cauwelier-GANU6spQydw@public.gmane.org>:
> Is there some option to pass? I have xserver 1.6, drm 2.4.9 and nouveau
> master with your patches, but I see no progress in gnome-terminal
> refresh when maximising the window or switching desktops. It still takes
> like a couple of seconds where the CPU is at 100 %.
>
> Some report from dmesg:
>
> [drm] Initialized drm 1.1.0 20060810
> nouveau 0000:01:00.0: PCI INT A -> GSI 16 (level, low) -> IRQ 16
> nouveau 0000:01:00.0: setting latency timer to 64
> [drm] Detected an NV50 generation card (0x086700a2)
> [drm] Initialized nouveau 0.0.12 libdrm-2.4.9 on minor 0
> [drm] Allocating FIFO number 1
> [drm] nouveau_fifo_alloc: initialised FIFO 1
> [drm] Allocating FIFO number 2
> [drm] nouveau_fifo_alloc: initialised FIFO 2
>
> And Xorg.0.log: http://pastebin.ca/1390029
>
> Regards
>
> Maarten Maathuis a écrit :
>> Updated patches, with one performance improvement. Not checking for
>> tiled pixmaps when all pending pixmaps are linear. This reduces the
>> overhead to the point that you are doing small memcopies instead of a
>> large one.
>>
>> I think i solved the XSHM problem, simply by accelerating it
>> (http://cgit.freedesktop.org/xorg/xserver/commit/?id=1b5758bef0840c6614244e321790231b3c9477c9).
>>
>> I'll be fading to the background for a few months pretty soon, so
>> questions that involve access to my main machine (running nouveau,
>> xorg-git, etc) should be asked soon (less than a week).
>>
>> Maarten.
>>
>>
>> ------------------------------------------------------------------------
>>
>> _______________________________________________
>> Nouveau mailing list
>> Nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>> http://lists.freedesktop.org/mailman/listinfo/nouveau
>
>
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2009-04-12 22:39 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-03-29 23:54 nv50: wfb patches Maarten Maathuis
[not found] ` <6d4bc9fc0903291654r70e3e6f0pd6f56874a6c361f7-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2009-03-30 3:54 ` Younes Manton
[not found] ` <586c2acd0903292054r11841a3v7750ac27ea9a6963-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2009-03-30 10:20 ` Maarten Maathuis
[not found] ` <6d4bc9fc0903300320t7ab80b83k499c7aaf397303ea-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2009-04-10 19:31 ` Maarten Maathuis
[not found] ` <6d4bc9fc0904101231l4373cc92h1f32b93dec9cc8b8-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2009-04-12 22:25 ` Hervé Cauwelier
[not found] ` <49E26A53.6010004-GANU6spQydw@public.gmane.org>
2009-04-12 22:39 ` Maarten Maathuis
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.