Re: [patch] generic framebuffer support, cleanups and buglets with BPP < 32 (fwd)

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Ulrich Eckhardt <eckhardt@satorlaser.com>
To: linux-fbdev-devel@lists.sourceforge.net
Subject: Re: [patch] generic framebuffer support, cleanups and buglets with BPP < 32 (fwd)
Date: Wed, 16 Feb 2005 15:06:45 +0100	[thread overview]
Message-ID: <200502161506.45868.eckhardt@satorlaser.com> (raw)
In-Reply-To: <200502162155.53719.adaplas@hotpop.com>

[-- Attachment #1: Type: text/plain, Size: 954 bytes --]

Antonino A. Daplas wrote:
> Patch is linewrapped.  Please resend.  Text attachment is okay.

Ok, resending. I'll repeat the description so it all is in one place. Here we 
go...


Changes:
 * removed trailing whitespace
 * use helper function for common bitwise operations
 * fixed several cases where either the wrong mask was used for bitops
   or the mask was computed falsely, messing up <32 BPP support
 * added self-tests for bitcpy_rev() algorithm in module-init function
 * no need to use explicitly sized integers in some cases
 * added a few comments where things weren't obvious to me

I tested these changes on an Au1100 based board, using the patches to its 
framebuffer code that were posted here by Christian Pellegrin[1]. I had this 
code working both on a 16BPP color LCD and a 4BPP monochrome one.

Uli

[1] http://www.linux-mips.org/archives/linux-mips/2005-01/msg00095.html
What happened to those, btw? They at least Work For Me(tm).

[-- Attachment #2: cfb-2002-02-15.4.patch --]
[-- Type: text/x-diff, Size: 29283 bytes --]

Index: cfbcopyarea.c
===================================================================
RCS file: /home/cvs/linux/drivers/video/cfbcopyarea.c,v
retrieving revision 1.10
diff -u -r1.10 cfbcopyarea.c
--- cfbcopyarea.c	12 Oct 2004 01:45:47 -0000	1.10
+++ cfbcopyarea.c	15 Feb 2005 13:28:02 -0000
@@ -8,18 +8,22 @@
  *  more details.
  *
  * NOTES:
- * 
- *  This is for cfb packed pixels. Iplan and such are incorporated in the 
+ *
+ *  This is for cfb packed pixels. Iplan and such are incorporated in the
  *  drivers that need them.
- * 
+ *
  *  FIXME
- *  The code for 24 bit is horrible. It copies byte by byte size instead of 
- *  longs like the other sizes. Needs to be optimized. 
  *
- *  Also need to add code to deal with cards endians that are different than 
+ *  Also need to add code to deal with cards endians that are different than
  *  the native cpu endians. I also need to deal with MSB position in the word.
- *  
+ *
+ *  The two functions or copying forward and backward could be split up like
+ *  the ones for filling, i.e. in aligned and unaligned versions. This would
+ *  help moving some redundant computations and branches out of the loop, too.
  */
+
+
+
 #include <linux/config.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
@@ -32,53 +36,63 @@
 #define LONG_MASK  (BITS_PER_LONG - 1)
 
 #if BITS_PER_LONG == 32
-#define FB_WRITEL fb_writel
-#define FB_READL  fb_readl
-#define SHIFT_PER_LONG 5
-#define BYTES_PER_LONG 4
+#  define FB_WRITEL fb_writel
+#  define FB_READL  fb_readl
+#  define SHIFT_PER_LONG 5
+#  define BYTES_PER_LONG 4
 #else
-#define FB_WRITEL fb_writeq
-#define FB_READL  fb_readq
-#define SHIFT_PER_LONG 6
-#define BYTES_PER_LONG 8
+#  define FB_WRITEL fb_writeq
+#  define FB_READL  fb_readq
+#  define SHIFT_PER_LONG 6
+#  define BYTES_PER_LONG 8
 #endif
 
-static void bitcpy(unsigned long __iomem *dst, int dst_idx,
-		   const unsigned long __iomem *src, int src_idx,
-		   unsigned long n)
+    /*
+     *  Compose two values, using a bitmask as decision value
+     *  This is equivalent to (a & mask) | (b & ~mask)
+     */
+
+static inline unsigned long
+comp(unsigned long a, unsigned long b, unsigned long mask)
+{
+    return ((a ^ b) & mask) ^ b;
+}
+
+    /*
+     *  Generic bitwise copy algorithm
+     */
+
+static void
+bitcpy(unsigned long __iomem *dst, int dst_idx,
+       const unsigned long __iomem *src, int src_idx,
+       unsigned n)
 {
 	unsigned long first, last;
-	int shift = dst_idx-src_idx, left, right;
-	unsigned long d0, d1;
-	int m;
-	
-	if (!n)
-		return;
-	
-	shift = dst_idx-src_idx;
+	int const shift = dst_idx-src_idx;
+	int left, right;
+
 	first = ~0UL >> dst_idx;
 	last = ~(~0UL >> ((dst_idx+n) % BITS_PER_LONG));
-	
+
 	if (!shift) {
 		// Same alignment for source and dest
-		
+
 		if (dst_idx+n <= BITS_PER_LONG) {
 			// Single word
 			if (last)
 				first &= last;
-			FB_WRITEL((FB_READL(src) & first) | (FB_READL(dst) & ~first), dst);
+			FB_WRITEL( comp( FB_READL(src), FB_READL(dst), first), dst);
 		} else {
 			// Multiple destination words
+
 			// Leading bits
-			if (first) {
-				
-				FB_WRITEL((FB_READL(src) & first) | 
-					  (FB_READL(dst) & ~first), dst);
+			if (first != ~0UL) {
+				FB_WRITEL( comp( FB_READL(src), FB_READL(dst), first), dst);
 				dst++;
 				src++;
 				n -= BITS_PER_LONG-dst_idx;
 			}
-			
+
 			// Main chunk
 			n /= BITS_PER_LONG;
 			while (n >= 8) {
@@ -94,55 +108,58 @@
 			}
 			while (n--)
 				FB_WRITEL(FB_READL(src++), dst++);
+
 			// Trailing bits
 			if (last)
-				FB_WRITEL((FB_READL(src) & last) | (FB_READL(dst) & ~last), dst);
+				FB_WRITEL( comp( FB_READL(src), FB_READL(dst), last), dst);
 		}
 	} else {
+		unsigned long d0, d1;
+		int m;
 		// Different alignment for source and dest
-		
+
 		right = shift & (BITS_PER_LONG-1);
 		left = -shift & (BITS_PER_LONG-1);
-		
+
 		if (dst_idx+n <= BITS_PER_LONG) {
 			// Single destination word
 			if (last)
 				first &= last;
 			if (shift > 0) {
 				// Single source word
-				FB_WRITEL(((FB_READL(src) >> right) & first) | 
-					  (FB_READL(dst) & ~first), dst);
+				FB_WRITEL( comp( FB_READL(src) >> right, FB_READL(dst), first), dst);
 			} else if (src_idx+n <= BITS_PER_LONG) {
 				// Single source word
-				FB_WRITEL(((FB_READL(src) << left) & first) | 
-					  (FB_READL(dst) & ~first), dst);
+				FB_WRITEL( comp(FB_READL(src) << left, FB_READL(dst), first), dst);
 			} else {
 				// 2 source words
 				d0 = FB_READL(src++);
 				d1 = FB_READL(src);
-				FB_WRITEL(((d0<<left | d1>>right) & first) | 
-					  (FB_READL(dst) & ~first), dst);
+				FB_WRITEL( comp(d0<<left | d1>>right, FB_READL(dst), first), dst);
 			}
 		} else {
 			// Multiple destination words
+			/** We must always remember the last value read, because in case
+			SRC and DST overlap bitwise (e.g. when moving just one pixel in
+			1bpp), we always collect one full long for DST and that might
+			overlap with the current long from SRC. We store this value in
+			'd0'. */
 			d0 = FB_READL(src++);
 			// Leading bits
 			if (shift > 0) {
 				// Single source word
-				FB_WRITEL(((d0 >> right) & first) | 
-					  (FB_READL(dst) & ~first), dst);
+				FB_WRITEL( comp(d0 >> right, FB_READL(dst), first), dst);
 				dst++;
 				n -= BITS_PER_LONG-dst_idx;
 			} else {
 				// 2 source words
 				d1 = FB_READL(src++);
-				FB_WRITEL(((d0<<left | d1>>right) & first) | 
-					  (FB_READL(dst) & ~first), dst);
+				FB_WRITEL( comp(d0<<left | d1>>right, FB_READL(dst), first), dst);
 				d0 = d1;
 				dst++;
 				n -= BITS_PER_LONG-dst_idx;
 			}
-			
+
 			// Main chunk
 			m = n % BITS_PER_LONG;
 			n /= BITS_PER_LONG;
@@ -166,37 +183,34 @@
 				FB_WRITEL(d0 << left | d1 >> right, dst++);
 				d0 = d1;
 			}
-			
+
 			// Trailing bits
 			if (last) {
 				if (m <= right) {
 					// Single source word
-					FB_WRITEL(((d0 << left) & last) | 
-						  (FB_READL(dst) & ~last), 
-						  dst);
+					FB_WRITEL( comp(d0 << left, FB_READL(dst), last), dst);
 				} else {
 					// 2 source words
 					d1 = FB_READL(src);
-					FB_WRITEL(((d0<<left | d1>>right) & 
-						   last) | (FB_READL(dst) & 
-							    ~last), dst);
+					FB_WRITEL( comp(d0<<left | d1>>right, FB_READL(dst), last), dst);
 				}
 			}
 		}
 	}
 }
 
-static void bitcpy_rev(unsigned long __iomem *dst, int dst_idx,
-		       const unsigned long __iomem *src, int src_idx, unsigned long n)
+    /*
+     *  Generic bitwise copy algorithm, operating backward
+     */
+
+static void
+bitcpy_rev(unsigned long __iomem *dst, int dst_idx,
+           const unsigned long __iomem *src, int src_idx,
+           unsigned n)
 {
 	unsigned long first, last;
-	int shift = dst_idx-src_idx, left, right;
-	unsigned long d0, d1;
-	int m;
-	
-	if (!n)
-		return;
-	
+	int shift;
+
 	dst += (n-1)/BITS_PER_LONG;
 	src += (n-1)/BITS_PER_LONG;
 	if ((n-1) % BITS_PER_LONG) {
@@ -207,29 +221,31 @@
 		src += src_idx >> SHIFT_PER_LONG;
 		src_idx &= BITS_PER_LONG-1;
 	}
-	
+
 	shift = dst_idx-src_idx;
+
 	first = ~0UL << (BITS_PER_LONG-1-dst_idx);
 	last = ~(~0UL << (BITS_PER_LONG-1-((dst_idx-n) % BITS_PER_LONG)));
-	
+
 	if (!shift) {
 		// Same alignment for source and dest
-		
+
 		if ((unsigned long)dst_idx+1 >= n) {
 			// Single word
 			if (last)
 				first &= last;
-			FB_WRITEL((FB_READL(src) & first) | (FB_READL(dst) & ~first), dst); 
+			FB_WRITEL( comp( FB_READL(src), FB_READL(dst), first), dst);
 		} else {
 			// Multiple destination words
+
 			// Leading bits
-			if (first) {
-				FB_WRITEL((FB_READL(src) & first) | (FB_READL(dst) & ~first), dst); 
+			if (first != ~0UL) {
+				FB_WRITEL( comp( FB_READL(src), FB_READL(dst), first), dst);
 				dst--;
 				src--;
 				n -= dst_idx+1;
 			}
-			
+
 			// Main chunk
 			n /= BITS_PER_LONG;
 			while (n >= 8) {
@@ -245,56 +261,55 @@
 			}
 			while (n--)
 				FB_WRITEL(FB_READL(src--), dst--);
-			
+
 			// Trailing bits
 			if (last)
-				FB_WRITEL((FB_READL(src) & last) | (FB_READL(dst) & ~last), dst);
+				FB_WRITEL( comp( FB_READL(src), FB_READL(dst), last), dst);
 		}
 	} else {
 		// Different alignment for source and dest
-		
-		right = shift & (BITS_PER_LONG-1);
-		left = -shift & (BITS_PER_LONG-1);
-		
+
+		int const left = -shift & (BITS_PER_LONG-1);
+		int const right = shift & (BITS_PER_LONG-1);
+
 		if ((unsigned long)dst_idx+1 >= n) {
 			// Single destination word
 			if (last)
 				first &= last;
 			if (shift < 0) {
 				// Single source word
-				FB_WRITEL((FB_READL(src) << left & first) | 
-					  (FB_READL(dst) & ~first), dst);
+				FB_WRITEL( comp( FB_READL(src)<<left, FB_READL(dst), first), dst);
 			} else if (1+(unsigned long)src_idx >= n) {
 				// Single source word
-				FB_WRITEL(((FB_READL(src) >> right) & first) | 
-					  (FB_READL(dst) & ~first), dst);
+				FB_WRITEL( comp( FB_READL(src)>>right, FB_READL(dst), first), dst);
 			} else {
 				// 2 source words
-				d0 = FB_READL(src--);
-				d1 = FB_READL(src);
-				FB_WRITEL(((d0>>right | d1<<left) & first) | 
-					  (FB_READL(dst) & ~first), dst);
+				FB_WRITEL( comp( (FB_READL(src)>>right | FB_READL(src-1)<<left), FB_READL(dst), first), dst);
 			}
 		} else {
 			// Multiple destination words
+			/** We must always remember the last value read, because in case
+			SRC and DST overlap bitwise (e.g. when moving just one pixel in
+			1bpp), we always collect one full long for DST and that might
+			overlap with the current long from SRC. We store this value in
+			'd0'. */
+			unsigned long d0, d1;
+			int m;
+
 			d0 = FB_READL(src--);
 			// Leading bits
 			if (shift < 0) {
 				// Single source word
-				FB_WRITEL(((d0 << left) & first) | 
-					  (FB_READL(dst) & ~first), dst);
-				dst--;
-				n -= dst_idx+1;
+				FB_WRITEL( comp( (d0 << left), FB_READL(dst), first), dst);
 			} else {
 				// 2 source words
 				d1 = FB_READL(src--);
-				FB_WRITEL(((d0>>right | d1<<left) & first) | 
-					  (FB_READL(dst) & ~first), dst);
+				FB_WRITEL( comp( (d0>>right | d1<<left), FB_READL(dst), first), dst);
 				d0 = d1;
-				dst--;
-				n -= dst_idx+1;
 			}
-			
+			dst--;
+			n -= dst_idx+1;
+
 			// Main chunk
 			m = n % BITS_PER_LONG;
 			n /= BITS_PER_LONG;
@@ -318,20 +333,16 @@
 				FB_WRITEL(d0 >> right | d1 << left, dst--);
 				d0 = d1;
 			}
-			
+
 			// Trailing bits
 			if (last) {
 				if (m <= left) {
 					// Single source word
-					FB_WRITEL(((d0 >> right) & last) | 
-						  (FB_READL(dst) & ~last), 
-						  dst);
+					FB_WRITEL( comp(d0 >> right, FB_READL(dst), last), dst);
 				} else {
 					// 2 source words
 					d1 = FB_READL(src);
-					FB_WRITEL(((d0>>right | d1<<left) & 
-						   last) | (FB_READL(dst) & 
-							    ~last), dst);
+					FB_WRITEL( comp(d0>>right | d1<<left, FB_READL(dst), last), dst);
 				}
 			}
 		}
@@ -342,8 +353,8 @@
 {
 	u32 dx = area->dx, dy = area->dy, sx = area->sx, sy = area->sy;
 	u32 height = area->height, width = area->width;
-	int x2, y2, old_dx, old_dy, vxres, vyres;
-	unsigned long next_line = p->fix.line_length;
+	int x2, y2, vxres, vyres;
+	unsigned long const bits_per_line = p->fix.line_length*8u;
 	int dst_idx = 0, src_idx = 0, rev_copy = 0;
 	unsigned long __iomem *dst = NULL, *src = NULL;
 
@@ -352,20 +363,16 @@
 
 	/* We want rotation but lack hardware to do it for us. */
 	if (!p->fbops->fb_rotate && p->var.rotate) {
-	}	
-	
+	}
+
 	vxres = p->var.xres_virtual;
 	vyres = p->var.yres_virtual;
 
-	if (area->dx > vxres || area->sx > vxres || 
+	if (area->dx > vxres || area->sx > vxres ||
 	    area->dy > vyres || area->sy > vyres)
 		return;
 
-	/* clip the destination */
-	old_dx = area->dx;
-	old_dy = area->dy;
-
-	/*
+	/* clip the destination
 	 * We could use hardware clipping but on many cards you get around
 	 * hardware clipping by writing to framebuffer directly.
 	 */
@@ -378,9 +385,13 @@
 	width = x2 - dx;
 	height = y2 - dy;
 
+	if ((width==0)
+	  ||(height==0))
+		return;
+
 	/* update sx1,sy1 */
-	sx += (dx - old_dx);
-	sy += (dy - old_dy);
+	sx += (dx - area->dx);
+	sy += (dy - area->dy);
 
 	/* the source must be completely inside the virtual screen */
 	if (sx < 0 || sy < 0 ||
@@ -388,45 +399,118 @@
 	    (sy + height) > vyres)
 		return;
 
-	if ((dy == sy && dx > sx) ||	
-	    (dy > sy)) { 
+	/* if the beginning of the target area might overlap with the end of
+	the source area, be have to copy the area reverse. */
+	if ((dy == sy && dx > sx) ||
+	    (dy > sy)) {
 		dy += height;
 		sy += height;
 		rev_copy = 1;
 	}
 
-	dst = src = (unsigned long __iomem *)((unsigned long)p->screen_base & 
+	// split the base of the framebuffer into a long-aligned address and the
+	// index of the first bit
+	dst = src = (unsigned long __iomem *)((unsigned long)p->screen_base &
 				      ~(BYTES_PER_LONG-1));
-	dst_idx = src_idx = (unsigned long)p->screen_base & (BYTES_PER_LONG-1);
-	dst_idx += dy*next_line*8 + dx*p->var.bits_per_pixel;
-	src_idx += sy*next_line*8 + sx*p->var.bits_per_pixel;
-	
+	dst_idx = src_idx = 8*((unsigned long)p->screen_base & (BYTES_PER_LONG-1));
+	// add offset of source and target area
+	dst_idx += dy*bits_per_line + dx*p->var.bits_per_pixel;
+	src_idx += sy*bits_per_line + sx*p->var.bits_per_pixel;
+
 	if (p->fbops->fb_sync)
 		p->fbops->fb_sync(p);
+
 	if (rev_copy) {
 		while (height--) {
-			dst_idx -= next_line*8;
-			src_idx -= next_line*8;
+			dst_idx -= bits_per_line;
+			src_idx -= bits_per_line;
 			dst += dst_idx >> SHIFT_PER_LONG;
-			dst_idx &= (BYTES_PER_LONG-1);
+			dst_idx &= LONG_MASK;
 			src += src_idx >> SHIFT_PER_LONG;
-			src_idx &= (BYTES_PER_LONG-1);
-			bitcpy_rev(dst, dst_idx, src, src_idx, 
+			src_idx &= LONG_MASK;
+			bitcpy_rev(dst, dst_idx, src, src_idx,
 				   width*p->var.bits_per_pixel);
-		}	
+		}
 	} else {
 		while (height--) {
 			dst += dst_idx >> SHIFT_PER_LONG;
-			dst_idx &= (BYTES_PER_LONG-1);
+			dst_idx &= LONG_MASK;
 			src += src_idx >> SHIFT_PER_LONG;
-			src_idx &= (BYTES_PER_LONG-1);
-			bitcpy(dst, dst_idx, src, src_idx, 
+			src_idx &= LONG_MASK;
+			bitcpy(dst, dst_idx, src, src_idx,
 			       width*p->var.bits_per_pixel);
-			dst_idx += next_line*8;
-			src_idx += next_line*8;
-		}	
+			dst_idx += bits_per_line;
+			src_idx += bits_per_line;
+		}
+	}
+}
+#undef CFB_DEBUG
+#ifdef CFB_DEBUG
+/** all this init-function does is to perform a few unittests.
+The idea it always to invoke the function to test on a predefined bitmap and
+compare the results to the expected output.
+TODO:
+ - this currently only tests bitcpy_rev, as that was the only one giving me trouble
+ - this assumes 32 bit longs
+ - not sure about endianess, I only tested this on a 32 bit MIPS little endian system
+ - could reuse testcases to test forward copying, too, just reverse the operation
+*/
+int __init cfb_copyarea_init(void)
+{
+	char const* comment = 0;
+	printk( KERN_INFO "cfb_copyarea_init()\n");
+	{
+		comment = "copy a single u32, source and target u32-aligned";
+		u32 tmp[] =          { 0xaaaaaaaau, 0x55555555u, 0xffffffffu, 0x00000000u };
+		u32 const expect[] = { 0xaaaaaaaau, 0xaaaaaaaau, 0xffffffffu, 0x00000000u };
+
+		bitcpy_rev( tmp, 0, tmp+1, 0, 32);
+
+		if( 0!=memcmp( expect, tmp, sizeof tmp))
+			goto error;
+	}
+
+	{
+		comment = "copy a single u32, source u32-aligned";
+		u32 tmp[] =          { 0x11112222u, 0x33334444u, 0x55556666u, 0x77778888u };
+		u32 const expect[] = { 0x11112222u, 0x22224444u, 0x55551111u, 0x77778888u };
+
+		bitcpy_rev( tmp, 0, tmp+1, 16, 32);
+
+		if( 0!=memcmp( expect, tmp, sizeof tmp))
+			goto error;
+	}
+
+	{
+		comment = "copy a single u32, target u32-aligned";
+		u32 tmp[] =          { 0x11112222u, 0x33334444u, 0x55556666u, 0x77778888u };
+		u32 const expect[] = { 0x11112222u, 0x33334444u, 0x44441111u, 0x77778888u };
+
+		bitcpy_rev( tmp, 16, tmp+2, 0, 32);
+
+		if( 0!=memcmp( expect, tmp, sizeof tmp))
+			goto error;
 	}
+
+	{
+		comment = "copy two u32, source and target u32-aligned";
+		u32 tmp[] =          { 0xaaaaaaaau, 0x55555555u, 0xffffffffu, 0x00000000u };
+		u32 const expect[] = { 0xaaaaaaaau, 0xaaaaaaaau, 0x55555555u, 0x00000000u };
+
+		bitcpy_rev( tmp, 0, tmp+1, 0, 64);
+
+		if( 0!=memcmp( expect, tmp, sizeof tmp))
+			goto error;
+	}
+
+	return 0;
+
+error:
+	printk( KERN_ERR " framebuffer self-test(%s) failed\n", comment);
+	return -1;
 }
+module_init(cfb_copyarea_init);
+#endif
 
 EXPORT_SYMBOL(cfb_copyarea);
 
Index: cfbfillrect.c
===================================================================
RCS file: /home/cvs/linux/drivers/video/cfbfillrect.c,v
retrieving revision 1.8
diff -u -r1.8 cfbfillrect.c
--- cfbfillrect.c	12 Oct 2004 01:45:47 -0000	1.8
+++ cfbfillrect.c	15 Feb 2005 13:28:02 -0000
@@ -1,7 +1,7 @@
 /*
- *  Generic fillrect for frame buffers with packed pixels of any depth. 
+ *  Generic fillrect for frame buffers with packed pixels of any depth.
  *
- *      Copyright (C)  2000 James Simmons (jsimmons@linux-fbdev.org) 
+ *      Copyright (C)  2000 James Simmons (jsimmons@linux-fbdev.org)
  *
  *  This file is subject to the terms and conditions of the GNU General Public
  *  License.  See the file COPYING in the main directory of this archive for
@@ -9,8 +9,8 @@
  *
  * NOTES:
  *
- *  The code for depths like 24 that don't have integer number of pixels per 
- *  long is broken and needs to be fixed. For now I turned these types of 
+ *  The code for depths like 24 that don't have integer number of pixels per
+ *  long is broken and needs to be fixed. For now I turned these types of
  *  mode off.
  *
  *  Also need to add code to deal with cards endians that are different than
@@ -24,149 +24,134 @@
 #include <asm/types.h>
 
 #if BITS_PER_LONG == 32
-#define FB_WRITEL fb_writel
-#define FB_READL  fb_readl
-#define BYTES_PER_LONG 4
-#define SHIFT_PER_LONG 5
+#  define FB_WRITEL fb_writel
+#  define FB_READL  fb_readl
+#  define SHIFT_PER_LONG 5
+#  define BYTES_PER_LONG 4
 #else
-#define FB_WRITEL fb_writeq
-#define FB_READL  fb_readq
-#define BYTES_PER_LONG 8
-#define SHIFT_PER_LONG 6
+#  define FB_WRITEL fb_writeq
+#  define FB_READL  fb_readq
+#  define SHIFT_PER_LONG 6
+#  define BYTES_PER_LONG 8
 #endif
 
-#define EXP1(x)		0xffffffffU*x
-#define EXP2(x)		0x55555555U*x
-#define EXP4(x)		0x11111111U*0x ## x
-
-typedef u32 pixel_t;
-
-static const u32 bpp1tab[2] = {
-    EXP1(0), EXP1(1)
-};
-
-static const u32 bpp2tab[4] = {
-    EXP2(0), EXP2(1), EXP2(2), EXP2(3)
-};
-
-static const u32 bpp4tab[16] = {
-    EXP4(0), EXP4(1), EXP4(2), EXP4(3), EXP4(4), EXP4(5), EXP4(6), EXP4(7),
-    EXP4(8), EXP4(9), EXP4(a), EXP4(b), EXP4(c), EXP4(d), EXP4(e), EXP4(f)
-};
-
     /*
      *  Compose two values, using a bitmask as decision value
      *  This is equivalent to (a & mask) | (b & ~mask)
      */
 
-static inline unsigned long comp(unsigned long a, unsigned long b,
-				 unsigned long mask)
+static inline unsigned long
+comp(unsigned long a, unsigned long b, unsigned long mask)
 {
     return ((a ^ b) & mask) ^ b;
 }
 
-static inline u32 pixel_to_pat32(const struct fb_info *p, pixel_t pixel)
-{
-    u32 pat = pixel;
+    /*
+     *  Create a pattern with the given pixel's color
+     */
 
-    switch (p->var.bits_per_pixel) {
+#if BITS_PER_LONG == 64
+static inline unsigned long
+pixel_to_pat( u32 bpp, u32 pixel)
+{
+	switch (bpp) {
 	case 1:
-	    pat = bpp1tab[pat];
-	    break;
-
+		return 0xfffffffffffffffful*pixel;
 	case 2:
-	    pat = bpp2tab[pat];
-	    break;
-
+		return 0x5555555555555555ul*pixel;
 	case 4:
-	    pat = bpp4tab[pat];
-	    break;
-
+		return 0x1111111111111111ul*pixel;
 	case 8:
-	    pat |= pat << 8;
-	    // Fall through
+		return 0x0101010101010101ul*pixel;
+	case 12:
+		return 0x0001001001001001ul*pixel;
 	case 16:
-	    pat |= pat << 16;
-	    // Fall through
+		return 0x0001000100010001ul*pixel;
+	case 24:
+		return 0x0000000001000001ul*pixel;
 	case 32:
-	    break;
+		return 0x0000000100000001ul*pixel;
+	default:
+		panic("pixel_to_pat(): unsupported pixelformat\n");
     }
-    return pat;
 }
-
-    /*
-     *  Expand a pixel value to a generic 32/64-bit pattern and rotate it to
-     *  the correct start position
-     */
-
-static inline unsigned long pixel_to_pat(const struct fb_info *p, 
-					 pixel_t pixel, int left)
+#else
+static inline unsigned long
+pixel_to_pat( u32 bpp, u32 pixel)
 {
-    unsigned long pat = pixel;
-    u32 bpp = p->var.bits_per_pixel;
-    int i;
-
-    /* expand pixel value */
-    for (i = bpp; i < BITS_PER_LONG; i *= 2)
-	pat |= pat << i;
-
-    /* rotate pattern to correct start position */
-    pat = pat << left | pat >> (bpp-left);
-    return pat;
+	switch (bpp) {
+	case 1:
+		return 0xfffffffful*pixel;
+	case 2:
+		return 0x55555555ul*pixel;
+	case 4:
+		return 0x11111111ul*pixel;
+	case 8:
+		return 0x01010101ul*pixel;
+	case 12:
+		return 0x00001001ul*pixel;
+	case 16:
+		return 0x00010001ul*pixel;
+	case 24:
+		return 0x00000001ul*pixel;
+	case 32:
+		return 0x00000001ul*pixel;
+	default:
+		panic("pixel_to_pat(): unsupported pixelformat\n");
+    }
 }
+#endif
 
     /*
-     *  Unaligned 32-bit pattern fill using 32/64-bit memory accesses
+     *  Aligned pattern fill using 32/64-bit memory accesses
      */
 
-void bitfill32(unsigned long __iomem *dst, int dst_idx, u32 pat, u32 n)
+static void
+bitfill32(unsigned long __iomem *dst, int dst_idx, unsigned long pat, unsigned n)
 {
-	unsigned long val = pat;
 	unsigned long first, last;
-	
+
 	if (!n)
 		return;
-	
-#if BITS_PER_LONG == 64
-	val |= val << 32;
-#endif
-	
+
 	first = ~0UL >> dst_idx;
 	last = ~(~0UL >> ((dst_idx+n) % BITS_PER_LONG));
-	
+
+
 	if (dst_idx+n <= BITS_PER_LONG) {
 		// Single word
 		if (last)
 			first &= last;
-		FB_WRITEL(comp(val, FB_READL(dst), first), dst);
+		FB_WRITEL(comp(pat, FB_READL(dst), first), dst);
 	} else {
 		// Multiple destination words
+
 		// Leading bits
-		if (first) {
-			FB_WRITEL(comp(val, FB_READL(dst), first), dst);
+		if (first!= ~0UL) {
+			FB_WRITEL(comp(pat, FB_READL(dst), first), dst);
 			dst++;
 			n -= BITS_PER_LONG-dst_idx;
 		}
-		
+
 		// Main chunk
 		n /= BITS_PER_LONG;
 		while (n >= 8) {
-			FB_WRITEL(val, dst++);
-			FB_WRITEL(val, dst++);
-			FB_WRITEL(val, dst++);
-			FB_WRITEL(val, dst++);
-			FB_WRITEL(val, dst++);
-			FB_WRITEL(val, dst++);
-			FB_WRITEL(val, dst++);
-			FB_WRITEL(val, dst++);
+			FB_WRITEL(pat, dst++);
+			FB_WRITEL(pat, dst++);
+			FB_WRITEL(pat, dst++);
+			FB_WRITEL(pat, dst++);
+			FB_WRITEL(pat, dst++);
+			FB_WRITEL(pat, dst++);
+			FB_WRITEL(pat, dst++);
+			FB_WRITEL(pat, dst++);
 			n -= 8;
 		}
 		while (n--)
-			FB_WRITEL(val, dst++);
-		
+			FB_WRITEL(pat, dst++);
+
 		// Trailing bits
 		if (last)
-			FB_WRITEL(comp(val, FB_READL(dst), first), dst);
+			FB_WRITEL(comp(pat, FB_READL(dst), last), dst);
 	}
 }
 
@@ -178,17 +163,18 @@
      *  used for the next 32/64-bit word
      */
 
-void bitfill(unsigned long __iomem *dst, int dst_idx, unsigned long pat, int left,
-	     int right, u32 n)
+static void
+bitfill(unsigned long __iomem *dst, int dst_idx, unsigned long pat, int left,
+        int right, unsigned n)
 {
 	unsigned long first, last;
 
 	if (!n)
 		return;
-	
+
 	first = ~0UL >> dst_idx;
 	last = ~(~0UL >> ((dst_idx+n) % BITS_PER_LONG));
-	
+
 	if (dst_idx+n <= BITS_PER_LONG) {
 		// Single word
 		if (last)
@@ -203,7 +189,7 @@
 			pat = pat << left | pat >> right;
 			n -= BITS_PER_LONG-dst_idx;
 		}
-		
+
 		// Main chunk
 		n /= BITS_PER_LONG;
 		while (n >= 4) {
@@ -221,28 +207,28 @@
 			FB_WRITEL(pat, dst++);
 			pat = pat << left | pat >> right;
 		}
-		
+
 		// Trailing bits
 		if (last)
 			FB_WRITEL(comp(pat, FB_READL(dst), first), dst);
 	}
 }
 
-void bitfill32_rev(unsigned long __iomem *dst, int dst_idx, u32 pat, u32 n)
+    /*
+     *  Aligned pattern invert using 32/64-bit memory accesses
+     */
+static void
+bitfill32_rev(unsigned long __iomem *dst, int dst_idx, unsigned long pat, unsigned n)
 {
 	unsigned long val = pat, dat;
 	unsigned long first, last;
-	
+
 	if (!n)
 		return;
-	
-#if BITS_PER_LONG == 64
-	val |= val << 32;
-#endif
-	
+
 	first = ~0UL >> dst_idx;
 	last = ~(~0UL >> ((dst_idx+n) % BITS_PER_LONG));
-	
+
 	if (dst_idx+n <= BITS_PER_LONG) {
 		// Single word
 		if (last)
@@ -252,13 +238,13 @@
 	} else {
 		// Multiple destination words
 		// Leading bits
-		if (first) {
+		if (first!=0UL) {
 			dat = FB_READL(dst);
 			FB_WRITEL(comp(dat ^ val, dat, first), dst);
 			dst++;
 			n -= BITS_PER_LONG-dst_idx;
 		}
-		
+
 		// Main chunk
 		n /= BITS_PER_LONG;
 		while (n >= 8) {
@@ -283,34 +269,35 @@
 		while (n--) {
 			FB_WRITEL(FB_READL(dst) ^ val, dst);
 			dst++;
-		}		
+		}
 		// Trailing bits
 		if (last) {
 			dat = FB_READL(dst);
-			FB_WRITEL(comp(dat ^ val, dat, first), dst);
+			FB_WRITEL(comp(dat ^ val, dat, last), dst);
 		}
 	}
 }
 
 
     /*
-     *  Unaligned generic pattern fill using 32/64-bit memory accesses
+     *  Unaligned generic pattern invert using 32/64-bit memory accesses
      *  The pattern must have been expanded to a full 32/64-bit value
      *  Left/right are the appropriate shifts to convert to the pattern to be
      *  used for the next 32/64-bit word
      */
 
-void bitfill_rev(unsigned long __iomem *dst, int dst_idx, unsigned long pat, int left,
-	     int right, u32 n)
+static void
+bitfill_rev(unsigned long __iomem *dst, int dst_idx, unsigned long pat, int left,
+            int right, unsigned n)
 {
 	unsigned long first, last, dat;
 
 	if (!n)
 		return;
-	
+
 	first = ~0UL >> dst_idx;
 	last = ~(~0UL >> ((dst_idx+n) % BITS_PER_LONG));
-	
+
 	if (dst_idx+n <= BITS_PER_LONG) {
 		// Single word
 		if (last)
@@ -319,15 +306,16 @@
 		FB_WRITEL(comp(dat ^ pat, dat, first), dst);
 	} else {
 		// Multiple destination words
+
 		// Leading bits
-		if (first) {
+		if (first != 0UL) {
 			dat = FB_READL(dst);
 			FB_WRITEL(comp(dat ^ pat, dat, first), dst);
 			dst++;
 			pat = pat << left | pat >> right;
 			n -= BITS_PER_LONG-dst_idx;
 		}
-		
+
 		// Main chunk
 		n /= BITS_PER_LONG;
 		while (n >= 4) {
@@ -350,11 +338,11 @@
 			dst++;
 			pat = pat << left | pat >> right;
 		}
-		
+
 		// Trailing bits
 		if (last) {
 			dat = FB_READL(dst);
-			FB_WRITEL(comp(dat ^ pat, dat, first), dst);
+			FB_WRITEL(comp(dat ^ pat, dat, last), dst);
 		}
 	}
 }
@@ -365,6 +353,7 @@
 	unsigned long x2, y2, vxres, vyres;
 	unsigned long height, width, fg;
 	unsigned long __iomem *dst;
+	unsigned long pat;
 	int dst_idx, left;
 
 	if (p->state != FBINFO_STATE_RUNNING)
@@ -372,32 +361,34 @@
 
 	/* We want rotation but lack hardware to do it for us. */
 	if (!p->fbops->fb_rotate && p->var.rotate) {
-	}	
-	
+	}
+
 	vxres = p->var.xres_virtual;
 	vyres = p->var.yres_virtual;
 
-	if (!rect->width || !rect->height || 
+	if (!rect->width || !rect->height ||
 	    rect->dx > vxres || rect->dy > vyres)
 		return;
 
 	/* We could use hardware clipping but on many cards you get around
 	 * hardware clipping by writing to framebuffer directly. */
-	
+
 	x2 = rect->dx + rect->width;
 	y2 = rect->dy + rect->height;
 	x2 = x2 < vxres ? x2 : vxres;
 	y2 = y2 < vyres ? y2 : vyres;
 	width = x2 - rect->dx;
 	height = y2 - rect->dy;
-	
+
 	if (p->fix.visual == FB_VISUAL_TRUECOLOR ||
 	    p->fix.visual == FB_VISUAL_DIRECTCOLOR )
 		fg = ((u32 *) (p->pseudo_palette))[rect->color];
 	else
 		fg = rect->color;
-	
-	dst = (unsigned long __iomem *)((unsigned long)p->screen_base & 
+
+	pat = pixel_to_pat( bpp, fg);
+
+	dst = (unsigned long __iomem *)((unsigned long)p->screen_base &
 				~(BYTES_PER_LONG-1));
 	dst_idx = ((unsigned long)p->screen_base & (BYTES_PER_LONG-1))*8;
 	dst_idx += rect->dy*p->fix.line_length*8+rect->dx*bpp;
@@ -406,16 +397,18 @@
 	if (p->fbops->fb_sync)
 		p->fbops->fb_sync(p);
 	if (!left) {
-		u32 pat = pixel_to_pat32(p, fg);
-		void (*fill_op32)(unsigned long __iomem *dst, int dst_idx, u32 pat, 
-				  u32 n) = NULL;
-		
+		void (*fill_op32)(unsigned long __iomem *dst, int dst_idx,
+		                  unsigned long pat, unsigned n) = NULL;
+
 		switch (rect->rop) {
 		case ROP_XOR:
 			fill_op32 = bitfill32_rev;
 			break;
 		case ROP_COPY:
+			fill_op32 = bitfill32;
+			break;
 		default:
+			printk( KERN_ERR "cfb_fillrect(): unknown rop, defaulting to ROP_COPY\n");
 			fill_op32 = bitfill32;
 			break;
 		}
@@ -426,26 +419,33 @@
 			dst_idx += p->fix.line_length*8;
 		}
 	} else {
-		unsigned long pat = pixel_to_pat(p, fg, (left-dst_idx) % bpp);
+		int rot = (left-dst_idx) % bpp;
+
+		/* rotate pattern to correct start position */
+		pat = pat << rot | pat >> (bpp-rot);
+
 		int right = bpp-left;
 		int r;
-		void (*fill_op)(unsigned long __iomem *dst, int dst_idx, 
-				unsigned long pat, int left, int right, 
-				u32 n) = NULL;
-		
+		void (*fill_op)(unsigned long __iomem *dst, int dst_idx,
+		                unsigned long pat, int left, int right,
+		                unsigned n) = NULL;
+
 		switch (rect->rop) {
 		case ROP_XOR:
 			fill_op = bitfill_rev;
 			break;
 		case ROP_COPY:
+			fill_op = bitfill;
+			break;
 		default:
+			printk( KERN_ERR "cfb_fillrect(): unknown rop, defaulting to ROP_COPY\n");
 			fill_op = bitfill;
 			break;
 		}
 		while (height--) {
 			dst += dst_idx >> SHIFT_PER_LONG;
 			dst_idx &= (BITS_PER_LONG-1);
-			fill_op(dst, dst_idx, pat, left, right, 
+			fill_op(dst, dst_idx, pat, left, right,
 				width*bpp);
 			r = (p->fix.line_length*8) % bpp;
 			pat = pat << (bpp-r) | pat >> r;

next prev parent reply	other threads:[~2005-02-16 14:04 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-02-16  9:23 [patch] generic framebuffer support, cleanups and buglets with BPP < 32 (fwd) Geert Uytterhoeven
2005-02-16 13:55 ` Antonino A. Daplas
2005-02-16 14:06   ` Ulrich Eckhardt [this message]
2005-02-16 20:07     ` James Simmons
2005-02-16 22:37     ` James Simmons
2005-02-16 22:38     ` James Simmons

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200502161506.45868.eckhardt@satorlaser.com \
    --to=eckhardt@satorlaser.com \
    --cc=linux-fbdev-devel@lists.sourceforge.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.