All of lore.kernel.org
 help / color / mirror / Atom feed
From: Maarten Maathuis <madman2003-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
To: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
Subject: Re: preliminary nv50 wfb patch
Date: Sat, 7 Mar 2009 16:04:50 +0100	[thread overview]
Message-ID: <6d4bc9fc0903070704u7eaadeb1v18ea65be45af5aaf@mail.gmail.com> (raw)
In-Reply-To: <6d4bc9fc0903070520q6418197vce462940290e47c5-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>

[-- Attachment #1: Type: text/plain, Size: 1069 bytes --]

This one has been optimised further, all usage of modulus has been removed.

There are still optimisations to do (like using hostdata transfers for
the first pixmap access), but this is getting closer to usable.

Maarten.

On Sat, Mar 7, 2009 at 2:20 PM, Maarten Maathuis <madman2003-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:
> This version is a bit faster by avoiding the need for (SUB)TILE_WIDTH.
>
> On Sat, Mar 7, 2009 at 1:59 PM, Maarten Maathuis <madman2003-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:
>> This patch will only work with Option "EXAPixmaps" "1", and will
>> prevent classic exa from working.
>>
>> Occasionally a pixmap fails to map, but that's not related to this patch.
>>
>> I haven't done any hardcore optimisations, but suggestions are
>> ofcource appreciated.
>>
>> In my experience some benchmarks suck now (gtkperf for which had it's
>> performance halved), and qt4 is definitely faster than gtk (gtk has
>> some odd rendering scenarios).
>>
>> This is by no means finished, but i wanted to share what i have.
>>
>> Maarten.
>>
>

[-- Attachment #2: wfb_test17.patch --]
[-- Type: application/octet-stream, Size: 9999 bytes --]

diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c
index 93fc3c5..d1e37b0 100644
--- a/src/nouveau_exa.c
+++ b/src/nouveau_exa.c
@@ -351,17 +351,16 @@ nouveau_exa_modify_pixmap_header(PixmapPtr ppix, int width, int height,
 
 	if (!nvpix->bo && nvpix->size) {
 		uint32_t cpp = ppix->drawable.bitsPerPixel >> 3;
-		/* At some point we should just keep 1bpp pixmaps in sysram */
 		uint32_t flags = NOUVEAU_BO_VRAM;
 		int ret;
 
 		if (pNv->Architecture >= NV_ARCH_50 && cpp) {
-			uint32_t aw = (width + 7) & ~7;
-			uint32_t ah = (height + 7) & ~7;
+			uint32_t ah = (height + 3) & ~3;
 
 			flags |= NOUVEAU_BO_TILED;
 
-			devkind = ((aw * cpp) + 63) & ~63;
+			/* This alligment is very important. */
+			devkind = (width * cpp + 63) & ~63;
 			nvpix->size = devkind * ah;
 		}
 
@@ -390,8 +389,11 @@ nouveau_exa_pixmap_is_tiled(PixmapPtr ppix)
 	NVPtr pNv = NVPTR(pScrn);
 
 	if (pNv->exa_driver_pixmaps) {
-		if (!nouveau_pixmap_bo(ppix)->tiled)
+		if (!nouveau_pixmap_bo(ppix))
 			return false;
+		if (nouveau_pixmap_bo(ppix)->tiled == 0)
+			return false;
+		return true;
 	} else
 	if (pNv->Architecture < NV_ARCH_50 ||
 	    exaGetPixmapOffset(ppix) < pNv->EXADriverPtr->offScreenBase)
@@ -406,7 +408,7 @@ nouveau_exa_pixmap_map(PixmapPtr ppix)
 	struct nouveau_bo *bo = nouveau_pixmap_bo(ppix);
 	unsigned delta = nouveau_pixmap_offset(ppix);
 
-	if (bo->tiled) {
+	if (0 && bo->tiled) {
 		struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix);
 
 		nvpix->linear = xcalloc(1, ppix->devKind * ppix->drawable.height);
@@ -428,7 +430,7 @@ nouveau_exa_pixmap_unmap(PixmapPtr ppix)
 {
 	struct nouveau_bo *bo = nouveau_pixmap_bo(ppix);
 
-	if (bo->tiled) {
+	if (0 && bo->tiled) {
 		struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix);
 
 		NVAccelUploadM2MF(ppix, 0, 0, ppix->drawable.width,
@@ -674,3 +676,197 @@ nouveau_exa_init(ScreenPtr pScreen)
 	pNv->EXADriverPtr = exa;
 	return TRUE;
 }
+
+/* WFB functions. */
+
+static FbBits
+nouveau_exa_wfb_read_memory_linear(const void *src, int size)
+{
+	FbBits bits = 0;
+
+	memcpy(&bits, src, size);
+
+	return bits;
+}
+
+static void
+nouveau_exa_wfb_write_memory_linear(void *dst, FbBits value, int size)
+{
+	memcpy(dst, &value, size);
+}
+
+#define TILE_PITCH 32
+#define TILE_HEIGHT 4
+#define SUBTILE_PITCH 8
+#define SUBTILE_HEIGHT 2
+
+#define LINEAR_PITCH (pPixmap->devKind)
+#define NUM_TILES_WIDTH (LINEAR_PITCH/TILE_PITCH)
+
+static NVPtr last_wfb_pNv = NULL;
+
+/* Note, we can only expose one read and write function, the linear versions are for internal consumption. */
+static FbBits
+nouveau_exa_wfb_read_memory(const void *src, int size)
+{
+	int i, line_x, line_y, tile_x, tile_y, subtile_x, subtile_y;
+	unsigned long offset = (unsigned long) src, subpixel_offset;
+	PixmapPtr pPixmap = NULL;
+	FbBits bits = 0;
+	void *new_src;
+
+	if (!last_wfb_pNv)
+		return nouveau_exa_wfb_read_memory_linear(src, size);
+
+	/* Find the right pixmap. */
+	for (i = 0; i < 6; i++)
+		if (offset >= last_wfb_pNv->wfb_pixmaps[i].start && offset < last_wfb_pNv->wfb_pixmaps[i].end) {
+			pPixmap = last_wfb_pNv->wfb_pixmaps[i].ppix;
+			break;
+		}
+
+	if (!pPixmap || !last_wfb_pNv->wfb_pixmaps[i].tiled)
+		return nouveau_exa_wfb_read_memory_linear(src, size);
+
+	/* Now comes the decoding. */
+	offset -= (unsigned long) pPixmap->devPrivate.ptr;
+	/* Assuming dword alligned offsets. */
+	subpixel_offset = offset & ((pPixmap->drawable.bitsPerPixel >> 3) - 1);
+	offset -= subpixel_offset;
+
+	/* Determine the coordinate first. */
+	line_y = offset/LINEAR_PITCH;
+	line_x = offset - line_y * LINEAR_PITCH;
+	tile_x = line_x/TILE_PITCH;
+	tile_y = line_y/TILE_HEIGHT;
+	subtile_x = (line_x - tile_x * TILE_PITCH)/SUBTILE_PITCH;
+	subtile_y = (line_y - tile_y * TILE_HEIGHT)/SUBTILE_HEIGHT;
+
+	new_src = pPixmap->devPrivate.ptr +
+		(((tile_y * NUM_TILES_WIDTH) + tile_x) * (TILE_HEIGHT * TILE_PITCH)) +
+		(((subtile_y * (TILE_PITCH/SUBTILE_PITCH)) + subtile_x) * (SUBTILE_HEIGHT * SUBTILE_PITCH)) +
+		((line_y - tile_y * TILE_HEIGHT - subtile_y * SUBTILE_HEIGHT) * SUBTILE_PITCH) + 
+		(line_x - tile_x * TILE_PITCH - subtile_x * SUBTILE_PITCH) +
+		subpixel_offset;
+
+	memcpy(&bits, new_src, size);
+
+	return bits;
+}
+
+static void
+nouveau_exa_wfb_write_memory(void *dst, FbBits value, int size)
+{
+	int i, line_x, line_y, tile_x, tile_y, subtile_x, subtile_y;
+	unsigned long offset = (unsigned long) dst, subpixel_offset;
+	PixmapPtr pPixmap = NULL;
+	void *new_dst;
+
+	if (!last_wfb_pNv) {
+		nouveau_exa_wfb_write_memory_linear(dst, value, size);
+		return;
+	}
+
+	/* Find the right pixmap. */
+	for (i = 0; i < 6; i++)
+		if (offset >= last_wfb_pNv->wfb_pixmaps[i].start && offset < last_wfb_pNv->wfb_pixmaps[i].end) {
+			pPixmap = last_wfb_pNv->wfb_pixmaps[i].ppix;
+			break;
+		}
+
+	if (!pPixmap || !last_wfb_pNv->wfb_pixmaps[i].tiled) {
+		nouveau_exa_wfb_write_memory_linear(dst, value, size);
+		return;
+	}
+
+	/* Now comes the decoding. */
+	offset -= (unsigned long) pPixmap->devPrivate.ptr;
+	/* Assuming dword alligned offsets. */
+	subpixel_offset = offset & ((pPixmap->drawable.bitsPerPixel >> 3) - 1);
+	offset -= subpixel_offset;
+
+	/* Determine the coordinate first. */
+	line_y = offset/LINEAR_PITCH;
+	line_x = offset - line_y * LINEAR_PITCH;
+	tile_x = line_x/TILE_PITCH;
+	tile_y = line_y/TILE_HEIGHT;
+	subtile_x = (line_x - tile_x * TILE_PITCH)/SUBTILE_PITCH;
+	subtile_y = (line_y - tile_y * TILE_HEIGHT)/SUBTILE_HEIGHT;
+
+	new_dst = pPixmap->devPrivate.ptr +
+		(((tile_y * NUM_TILES_WIDTH) + tile_x) * (TILE_HEIGHT * TILE_PITCH)) +
+		(((subtile_y * (TILE_PITCH/SUBTILE_PITCH)) + subtile_x) * (SUBTILE_HEIGHT * SUBTILE_PITCH)) +
+		((line_y - tile_y * TILE_HEIGHT - subtile_y * SUBTILE_HEIGHT) * SUBTILE_PITCH) + 
+		(line_x - tile_x * TILE_PITCH - subtile_x * SUBTILE_PITCH) +
+		subpixel_offset;
+
+	memcpy(new_dst, &value, size);
+}
+
+void
+nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
+				WriteMemoryProcPtr *pWrite,
+				DrawablePtr pDraw)
+{
+	ScrnInfoPtr pScrn = xf86Screens[pDraw->pScreen->myNum];
+	NVPtr pNv = NVPTR(pScrn);
+	PixmapPtr pPixmap;
+
+	if (!pRead || !pWrite)
+		return;
+
+	pPixmap = NVGetDrawablePixmap(pDraw);
+	if (!pPixmap)
+		return;
+
+	int i;
+	for (i = 0; i < 6; i++)
+		if (!pNv->wfb_pixmaps[i].used)
+			break;
+
+	if (i == 6) {
+		ErrorF("More than 10 wraps are setup, what the hell is going on?\n");
+		*pRead = NULL;
+		*pWrite = NULL;
+		return;
+	}
+
+	/* We will get a pointer, somewhere in the range of this pixmap. */
+	/* Based on linear representation ofcource. */
+	pNv->wfb_pixmaps[i].ppix = pPixmap;
+	pNv->wfb_pixmaps[i].start = (unsigned long) pPixmap->devPrivate.ptr;
+	pNv->wfb_pixmaps[i].end = pNv->wfb_pixmaps[i].start + exaGetPixmapPitch(pPixmap) * ((pPixmap->drawable.height + 3) & ~3);
+	pNv->wfb_pixmaps[i].used = true;
+	if (nouveau_exa_pixmap_is_tiled(pPixmap))
+		pNv->wfb_pixmaps[i].tiled = true;
+	else
+		pNv->wfb_pixmaps[i].tiled = false;
+
+	*pRead = nouveau_exa_wfb_read_memory;
+	*pWrite = nouveau_exa_wfb_write_memory;
+
+	last_wfb_pNv = pNv;
+}
+
+void
+nouveau_exa_wfb_finish_wrap(DrawablePtr pDraw)
+{
+	ScrnInfoPtr pScrn = xf86Screens[pDraw->pScreen->myNum];
+	NVPtr pNv = NVPTR(pScrn);
+	PixmapPtr pPixmap;
+	int i;
+
+	pPixmap = NVGetDrawablePixmap(pDraw);
+	if (!pPixmap)
+		return;
+
+	for (i = 0; i < 10; i++)
+		if (pNv->wfb_pixmaps[i].ppix == pPixmap) {
+			pNv->wfb_pixmaps[i].ppix = NULL;
+			pNv->wfb_pixmaps[i].start = 0;
+			pNv->wfb_pixmaps[i].end = 0;
+			pNv->wfb_pixmaps[i].used = false;
+			pNv->wfb_pixmaps[i].tiled = false;
+			break;
+		}
+}
diff --git a/src/nv_driver.c b/src/nv_driver.c
index d7e8025..bdea2b6 100644
--- a/src/nv_driver.c
+++ b/src/nv_driver.c
@@ -1510,7 +1510,7 @@ NVPreInit(ScrnInfoPtr pScrn, int flags)
 	 * section.
 	 */
 
-	if (xf86LoadSubModule(pScrn, "fb") == NULL)
+	if (xf86LoadSubModule(pScrn, "wfb") == NULL)
 		NVPreInitFail("\n");
 
 	xf86LoaderReqSymLists(fbSymbols, NULL);
@@ -2154,9 +2154,10 @@ NVScreenInit(int scrnIndex, ScreenPtr pScreen, int argc, char **argv)
 	switch (pScrn->bitsPerPixel) {
 		case 16:
 		case 32:
-			ret = fbScreenInit(pScreen, FBStart, pScrn->virtualX, pScrn->virtualY,
+			ret = wfbScreenInit(pScreen, FBStart, pScrn->virtualX, pScrn->virtualY,
 				pScrn->xDpi, pScrn->yDpi,
-				displayWidth, pScrn->bitsPerPixel);
+				displayWidth, pScrn->bitsPerPixel,
+				nouveau_exa_wfb_setup_wrap, nouveau_exa_wfb_finish_wrap);
 			break;
 		default:
 			xf86DrvMsg(scrnIndex, X_ERROR,
@@ -2181,7 +2182,7 @@ NVScreenInit(int scrnIndex, ScreenPtr pScreen, int argc, char **argv)
 		}
 	}
 
-	fbPictureInit (pScreen, 0, 0);
+	wfbPictureInit (pScreen, 0, 0);
 
 	xf86SetBlackWhitePixels(pScreen);
 
diff --git a/src/nv_include.h b/src/nv_include.h
index be004ef..f78cda0 100644
--- a/src/nv_include.h
+++ b/src/nv_include.h
@@ -47,6 +47,7 @@
 #include "dixstruct.h"
 #include "scrnintstr.h"
 
+#define FB_ACCESS_WRAPPER 1
 #include "fb.h"
 
 #include "xf86cmap.h"
diff --git a/src/nv_proto.h b/src/nv_proto.h
index 28605a8..5933ddd 100644
--- a/src/nv_proto.h
+++ b/src/nv_proto.h
@@ -65,6 +65,10 @@ void  NVTakedownDma(ScrnInfoPtr pScrn);
 Bool nouveau_exa_init(ScreenPtr pScreen);
 Bool nouveau_exa_pixmap_is_onscreen(PixmapPtr pPixmap);
 bool nouveau_exa_pixmap_is_tiled(PixmapPtr ppix);
+void nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
+				WriteMemoryProcPtr *pWrite,
+				DrawablePtr pDraw);
+void nouveau_exa_wfb_finish_wrap(DrawablePtr pDraw);
 
 /* in nv_hw.c */
 void NVCalcStateExt(ScrnInfoPtr,struct _riva_hw_state *,int,int,int,int,int,int);
diff --git a/src/nv_type.h b/src/nv_type.h
index 78a7802..24b628f 100644
--- a/src/nv_type.h
+++ b/src/nv_type.h
@@ -404,6 +404,15 @@ typedef struct _NVRec {
 	unsigned point_x, point_y;
 	unsigned width_in, width_out;
 	unsigned height_in, height_out;
+
+	/* Wfb related data. */
+	struct {
+		PixmapPtr ppix;
+		bool used;
+		bool tiled;
+		unsigned long start;
+		unsigned long end;
+	} wfb_pixmaps[6];
 } NVRec;
 
 #define NVPTR(p) ((NVPtr)((p)->driverPrivate))

[-- Attachment #3: Type: text/plain, Size: 181 bytes --]

_______________________________________________
Nouveau mailing list
Nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

      parent reply	other threads:[~2009-03-07 15:04 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-03-07 12:59 preliminary nv50 wfb patch Maarten Maathuis
     [not found] ` <6d4bc9fc0903070459i3b9a0732n3163972cc1c687bf-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2009-03-07 13:20   ` Maarten Maathuis
     [not found]     ` <6d4bc9fc0903070520q6418197vce462940290e47c5-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2009-03-07 15:04       ` Maarten Maathuis [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=6d4bc9fc0903070704u7eaadeb1v18ea65be45af5aaf@mail.gmail.com \
    --to=madman2003-re5jqeeqqe8avxtiumwx3w@public.gmane.org \
    --cc=nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.