linux-fbdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Antonino Daplas <adaplas@pol.net>
To: James Simmons <captain_delete@yahoo.com>
Cc: fbdev <linux-fbdev-devel@lists.sourceforge.net>
Subject: Re: RFC: Optimizing putcs()
Date: 07 Aug 2002 02:11:55 +0800	[thread overview]
Message-ID: <1028657448.554.6.camel@daplas> (raw)
In-Reply-To: <20020806054957.44715.qmail@web13004.mail.yahoo.com>

[-- Attachment #1: Type: text/plain, Size: 556 bytes --]

On Tue, 2002-08-06 at 13:49, James Simmons wrote:

> Ah. Thank you for solving this problem for me. I
> haven't had time to figure it out. Also left to be
> done is 24 bpp support as well as drawing the penguin.
> 
I took a crack at adding support for bpp24 for the cfb_* drawing
functions.  I tried to keep the original code as much as possible, so
the result may not be optimal.  My test shows though that bpp24 should
be as fast as (maybe a tad slower than) bpp32.

As for drawing the logo, will the source be containing indices to the
palette?

Tony



[-- Attachment #2: bpp24.diff --]
[-- Type: text/x-patch, Size: 5232 bytes --]

diff -Naur linux-2.5.27/drivers/video/cfbcopyarea.c linux/drivers/video/cfbcopyarea.c
--- linux-2.5.27/drivers/video/cfbcopyarea.c	Tue Aug  6 17:55:48 2002
+++ linux/drivers/video/cfbcopyarea.c	Tue Aug  6 17:56:19 2002
@@ -83,7 +83,7 @@
 		lineincr = -linesize;
 	}
 
-	if ((BITS_PER_LONG % p->var.bits_per_pixel) == 0) {
+	if ((BITS_PER_LONG % p->var.bits_per_pixel) == 0) { 
 		int ppw = BITS_PER_LONG / p->var.bits_per_pixel;
 		int n = ((area->width * p->var.bits_per_pixel) >> 3);
 
@@ -103,7 +103,6 @@
 			n -= end_index;
 		}
 		n /= bpl;
-
 		if (n <= 0) {
 			if (start_mask) {
 				if (end_mask)
@@ -219,4 +218,32 @@
 			}
 		}
 	}
+	else {
+		int n = ((area->width * p->var.bits_per_pixel) >> 3);
+		int n16 = (n16 >> 4) << 4;
+		int n_fract = n - n16;
+		int rows;
+
+		if (area->dy < area->sy
+		    || (area->dy == area->sy && area->dx < area->sx)) {
+			for (rows = height; rows--; ) {
+				if (n16)
+					fast_memmove(dst1, src1, n16);
+				if (n_fract)
+					fb_memmove(dst1+n16, src1+n16, n_fract);
+				dst1 += linesize;
+				src1 += linesize;
+			}
+		}
+		else {
+			for (rows = height; rows--; ) {
+				if (n16)
+					fast_memmove(dst1, src1, n16);
+				if (n_fract)
+					fb_memmove(dst1+n16, src1+n16, n_fract);
+				dst1 -= linesize;
+				src1 -= linesize;
+			}
+		}
+	}			
 }
diff -Naur linux-2.5.27/drivers/video/cfbfillrect.c linux/drivers/video/cfbfillrect.c
--- linux-2.5.27/drivers/video/cfbfillrect.c	Tue Aug  6 17:55:54 2002
+++ linux/drivers/video/cfbfillrect.c	Tue Aug  6 17:56:23 2002
@@ -57,7 +57,7 @@
 	else
 		fg = fgcolor = rect->color;
 
-	for (i = 0; i < ppw - 1; i++) {
+	for (i = 0; i < ppw-1; i++) {
 		fg <<= p->var.bits_per_pixel;
 		fg |= fgcolor;
 	}
@@ -161,45 +161,31 @@
 			break;
 		}
 	} else {
-		/* Odd modes like 24 or 80 bits per pixel */
-		start_mask = fg >> (start_index * p->var.bits_per_pixel);
-		end_mask = fg << (end_index * p->var.bits_per_pixel);
-		/* start_mask =& PFILL24(x1,fg);
-		   end_mask_or = end_mask & PFILL24(x1+width-1,fg); */
-
-		n = (rect->width - start_index - end_index) / ppw;
+		char *dst2;
+		int bytes = (p->var.bits_per_pixel + 7) >> 3;
 
+		n = rect->width;
+		fg = fgcolor;
 		switch (rect->rop) {
 		case ROP_COPY:
 			do {
-				dst = (unsigned long *) dst1;
-				if (start_mask)
-					*dst |= start_mask;
-				if ((start_index + rect->width) > ppw)
-					dst++;
-
+				dst2 = dst1;
 				/* XXX: slow */
+				/* YYY: extremely slow */
 				for (i = 0; i < n; i++) {
-					*dst++ = fg;
+					*(unsigned long *) dst2 = fg;
+					dst2 += bytes;
 				}
-				if (end_mask)
-					*dst |= end_mask;
 				dst1 += linesize;
 			} while (--height);
 			break;
 		case ROP_XOR:
 			do {
-				dst = (unsigned long *) dst1;
-				if (start_mask)
-					*dst ^= start_mask;
-				if ((start_mask + rect->width) > ppw)
-					dst++;
-
+				dst2 = dst1;
 				for (i = 0; i < n; i++) {
-					*dst++ ^= fg;	/* PFILL24(fg,x1+i); */
+					*(unsigned long *) dst2 ^= fg;	/* PFILL24(fg,x1+i); */
+					dst2 += bytes;
 				}
-				if (end_mask)
-					*dst ^= end_mask;
 				dst1 += linesize;
 			} while (--height);
 			break;
diff -Naur linux-2.5.27/drivers/video/cfbimgblt.c linux/drivers/video/cfbimgblt.c
--- linux-2.5.27/drivers/video/cfbimgblt.c	Tue Aug  6 17:55:41 2002
+++ linux/drivers/video/cfbimgblt.c	Tue Aug  6 17:56:14 2002
@@ -47,9 +47,8 @@
 	int x2, y2, n, i, j, k, l = 7;
 	unsigned long tmp = ~0 << (BITS_PER_LONG - p->var.bits_per_pixel);
 	unsigned long fgx, bgx, fgcolor, bgcolor, eorx;	
-	unsigned long end_mask;
-	unsigned long *dst = NULL;
-	u8 *dst1;
+	unsigned long end_mask, bytes = 4;
+	u8 *dst = NULL, *dst1;
 	u8 *src;
 
 	/* 
@@ -64,7 +63,7 @@
 	y2 = y2 < p->var.yres_virtual ? y2 : p->var.yres_virtual;
 	image->width  = x2 - image->dx;
 	image->height = y2 - image->dy;
-  
+
 	dst1 = p->screen_base + image->dy * p->fix.line_length + 
 		((image->dx * p->var.bits_per_pixel) >> 3);
   
@@ -88,13 +87,26 @@
 			fgx |= fgcolor;
 			bgx |= bgcolor;
 		}
-		eorx = fgx ^ bgx;
+
+		/*
+		 * BPP kludge, should be generalized/optimized later
+		 */
+		if (BITS_PER_LONG % p->var.bits_per_pixel) {
+			bytes = (p->var.bits_per_pixel + 7) >> 3;
+			tmp = ~0UL >> (BITS_PER_LONG - p->var.bits_per_pixel);
+			fgx = fgcolor;
+			bgx = bgcolor;
+			ppw = 1;
+		}
+
 		n = ((image->width + 7) >> 3);
 		pad = (n << 3) - image->width;
 		n = image->width % ppw;
 
+		eorx = fgx ^ bgx;
+		
 		for (i = 0; i < image->height; i++) {
-			dst = (unsigned long *) dst1;
+			dst = dst1;
 		
 			for (j = image->width/ppw; j > 0; j--) {
 				end_mask = 0;
@@ -105,8 +117,8 @@
 					l--;
 					if (l < 0) { l = 7; src++; }
 				}
-				fb_writel((end_mask & eorx)^bgx, dst);
-				dst++;
+				fb_writel((end_mask & eorx)^bgx, (unsigned long *) dst);
+				dst += bytes;
 			}
 		
 			if (n) {
@@ -117,8 +129,8 @@
 					l--;
 					if (l < 0) { l = 7; src++; }
 				}
-				fb_writel((end_mask & eorx)^bgx, dst);
-				dst++;
+				fb_writel((end_mask & eorx)^bgx, (unsigned long *) dst);
+				dst += bytes;
 			}
 			l -= pad;		
 			dst1 += p->fix.line_length;	

       reply	other threads:[~2002-08-06 18:07 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20020806054957.44715.qmail@web13004.mail.yahoo.com>
2002-08-06 18:11 ` Antonino Daplas [this message]
2002-08-08 18:31   ` RFC: Optimizing putcs() James Simmons
2002-08-05 22:04 Antonino Daplas
2002-08-06 20:08 ` Geert Uytterhoeven
2002-08-07  0:17   ` Antonino Daplas
2002-08-07  5:25 ` Antonino Daplas

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1028657448.554.6.camel@daplas \
    --to=adaplas@pol.net \
    --cc=captain_delete@yahoo.com \
    --cc=linux-fbdev-devel@lists.sourceforge.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).