* [PATCH] packed pixel image blit optimizations.
@ 2007-05-02 20:50 James Simmons
2007-05-02 21:33 ` Antonino A. Daplas
0 siblings, 1 reply; 10+ messages in thread
From: James Simmons @ 2007-05-02 20:50 UTC (permalink / raw)
To: Linux Fbdev development list
This patch is the start of my original work. The goal was:
1) One function for to draw at any color depth.
2) 16 color image data does need special padding.
3) Use fb_pixmap to control drawing to the hardware.
The first two where done. The last one needs more work. Using
access_align I can control how data is transfered to the framebuffer.
Say you have video hardware that will only allows data to be written to
the in 16 bits units. Then for a image that is at bpp 32 and 10 pixels
wide will do 20 writes. Need to rework the slow path to handle all color
depths. I have managed to get the fast monochrome images working for
this except one strange bug. When access_align is set to say 4 then it
behaves as if it we 4 bpp. More work is needed for this patch.
diff -urN -X linus-2.6/Documentation/dontdiff linus-2.6/drivers/video/cfbimgblt.c fbdev-2.6/drivers/video/cfbimgblt.c
--- linus-2.6/drivers/video/cfbimgblt.c 2006-11-07 05:38:36.000000000 -0500
+++ fbdev-2.6/drivers/video/cfbimgblt.c 2006-11-12 10:29:49.000000000 -0500
@@ -1,7 +1,7 @@
/*
* Generic BitBLT function for frame buffer with packed pixels of any depth.
*
- * Copyright (C) June 1999 James Simmons
+ * Copyright (C) June 1999 - 2006 James Simmons
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file COPYING in the main directory of this archive for
@@ -17,7 +17,7 @@
* their are cards with hardware that coverts images of various depths to the
* framebuffer depth. But not every card has this. All images must be rounded
* up to the nearest byte. For example a bitmap 12 bits wide must be two
- * bytes width.
+ * bytes width.
*
* Tony:
* Incorporate mask tables similar to fbcon-cfb*.c in 2.4 API. This speeds
@@ -32,6 +32,7 @@
#include <linux/module.h>
#include <linux/string.h>
#include <linux/fb.h>
+#include <asm/byteorder.h>
#include <asm/types.h>
#define DEBUG
@@ -43,161 +44,99 @@
#endif
static u32 cfb_tab8[] = {
-#if defined(__BIG_ENDIAN)
0x00000000,0x000000ff,0x0000ff00,0x0000ffff,
0x00ff0000,0x00ff00ff,0x00ffff00,0x00ffffff,
0xff000000,0xff0000ff,0xff00ff00,0xff00ffff,
0xffff0000,0xffff00ff,0xffffff00,0xffffffff
-#elif defined(__LITTLE_ENDIAN)
- 0x00000000,0xff000000,0x00ff0000,0xffff0000,
- 0x0000ff00,0xff00ff00,0x00ffff00,0xffffff00,
- 0x000000ff,0xff0000ff,0x00ff00ff,0xffff00ff,
- 0x0000ffff,0xff00ffff,0x00ffffff,0xffffffff
-#else
-#error FIXME: No endianness??
-#endif
};
static u32 cfb_tab16[] = {
-#if defined(__BIG_ENDIAN)
0x00000000, 0x0000ffff, 0xffff0000, 0xffffffff
-#elif defined(__LITTLE_ENDIAN)
- 0x00000000, 0xffff0000, 0x0000ffff, 0xffffffff
-#else
-#error FIXME: No endianness??
-#endif
};
static u32 cfb_tab32[] = {
0x00000000, 0xffffffff
};
-#define FB_WRITEL fb_writel
-#define FB_READL fb_readl
+#define FB_WRITEL fb_writel
+#define FB_READL fb_readl
-static inline void color_imageblit(const struct fb_image *image,
- struct fb_info *p, u8 __iomem *dst1,
- u32 start_index,
- u32 pitch_index)
+static inline void slow_imageblit(const struct fb_image *image,
+ struct fb_info *p, u8 __iomem *dst,
+ u32 start_index, u32 pitch_index)
{
/* Draw the penguin */
- u32 __iomem *dst, *dst2;
- u32 color = 0, val, shift;
- int i, n, bpp = p->var.bits_per_pixel;
- u32 null_bits = 32 - bpp;
+ int spitch = (image->width * image->depth + 7) >> 3;
+ const u32 *src = (const u32 *) image->data;
+ int scan_align = p->pixmap.scan_align - 1;
u32 *palette = (u32 *) p->pseudo_palette;
- const u8 *src = image->data;
+ int bpp = p->var.bits_per_pixel, i, n;
+ int mask = (1 << image->depth) - 1;
+ int bits = p->pixmap.access_align;
+ int bpw = bits >> 3, s = 32;
+ int null_bits = bits - bpp;
+ u32 color = 0, val, shift;
+ u32 __iomem *dst2;
+ u8 __iomem *dst1;
- dst2 = (u32 __iomem *) dst1;
+ spitch = (spitch + scan_align) & ~scan_align;
+ dst2 = (u32 __iomem *) dst;
for (i = image->height; i--; ) {
n = image->width;
- dst = (u32 __iomem *) dst1;
- shift = 0;
- val = 0;
-
+ shift = val = 0;
+ dst1 = dst;
+
+ /* write leading bits */
if (start_index) {
- u32 start_mask = ~(FB_SHIFT_HIGH(~(u32)0, start_index));
- val = FB_READL(dst) & start_mask;
+ u32 start_mask = (~(u32)0 << start_index);
+ val = FB_READL(dst1);
+ val &= start_mask;
shift = start_index;
}
- while (n--) {
- if (p->fix.visual == FB_VISUAL_TRUECOLOR ||
- p->fix.visual == FB_VISUAL_DIRECTCOLOR )
- color = palette[*src];
- else
- color = *src;
- color <<= FB_LEFT_POS(bpp);
- val |= FB_SHIFT_HIGH(color, shift);
- if (shift >= null_bits) {
- FB_WRITEL(val, dst++);
-
- val = (shift == null_bits) ? 0 :
- FB_SHIFT_LOW(color, 32 - shift);
- }
- shift += bpp;
- shift &= (32 - 1);
- src++;
- }
- if (shift) {
- u32 end_mask = FB_SHIFT_HIGH(~(u32)0, shift);
-
- FB_WRITEL((FB_READL(dst) & end_mask) | val, dst);
- }
- dst1 += p->fix.line_length;
- if (pitch_index) {
- dst2 += p->fix.line_length;
- dst1 = (u8 __iomem *)((long __force)dst2 & ~(sizeof(u32) - 1));
- start_index += pitch_index;
- start_index &= 32 - 1;
- }
- }
-}
+ while (n--) {
+ if (!s) { src++; s = 32; }
+ s -= image->depth;
-static inline void slow_imageblit(const struct fb_image *image, struct fb_info *p,
- u8 __iomem *dst1, u32 fgcolor,
- u32 bgcolor,
- u32 start_index,
- u32 pitch_index)
-{
- u32 shift, color = 0, bpp = p->var.bits_per_pixel;
- u32 __iomem *dst, *dst2;
- u32 val, pitch = p->fix.line_length;
- u32 null_bits = 32 - bpp;
- u32 spitch = (image->width+7)/8;
- const u8 *src = image->data, *s;
- u32 i, j, l;
-
- dst2 = (u32 __iomem *) dst1;
- fgcolor <<= FB_LEFT_POS(bpp);
- bgcolor <<= FB_LEFT_POS(bpp);
+ color = (swab32p(src) & (mask << s));
+ if (image->depth == 1)
+ color = color ? image->fg_color : image->bg_color;
+ else
+ color >>= s;
- for (i = image->height; i--; ) {
- shift = val = 0;
- l = 8;
- j = image->width;
- dst = (u32 __iomem *) dst1;
- s = src;
+ if (p->fix.visual == FB_VISUAL_TRUECOLOR ||
+ p->fix.visual == FB_VISUAL_DIRECTCOLOR)
+ color = palette[color];
- /* write leading bits */
- if (start_index) {
- u32 start_mask = ~(FB_SHIFT_HIGH(~(u32)0,start_index));
- val = FB_READL(dst) & start_mask;
- shift = start_index;
- }
+ val |= (color << shift);
- while (j--) {
- l--;
- color = (*s & (1 << l)) ? fgcolor : bgcolor;
- val |= FB_SHIFT_HIGH(color, shift);
-
- /* Did the bitshift spill bits to the next long? */
+ /* Did the bitshift spill bits into the next long? */
if (shift >= null_bits) {
- FB_WRITEL(val, dst++);
- val = (shift == null_bits) ? 0 :
- FB_SHIFT_LOW(color,32 - shift);
+ FB_WRITEL(val, dst1);
+ dst1 += bpw;
+ val = (shift == null_bits) ? 0 : (color >> (bits - shift));
}
shift += bpp;
- shift &= (32 - 1);
- if (!l) { l = 8; s++; };
+ shift &= (bits - 1);
}
+ s -= (spitch << 3) - image->width * image->depth;
/* write trailing bits */
- if (shift) {
- u32 end_mask = FB_SHIFT_HIGH(~(u32)0, shift);
+ if (shift) {
+ u32 end_mask = (~(u32)0 << shift);
- FB_WRITEL((FB_READL(dst) & end_mask) | val, dst);
+ val = FB_READL(dst1);
+ val &= end_mask;
+ FB_WRITEL(val, dst1);
}
-
- dst1 += pitch;
- src += spitch;
+
+ dst += p->fix.line_length;
if (pitch_index) {
- dst2 += pitch;
- dst1 = (u8 __iomem *)((long __force)dst2 & ~(sizeof(u32) - 1));
+ dst2 += p->fix.line_length;
+ dst = (u8 __iomem *)((long __force)dst2 & ~(bpw - 1));
start_index += pitch_index;
- start_index &= 32 - 1;
+ start_index &= bits - 1;
}
-
}
}
@@ -210,101 +149,105 @@
* beginning and end of a scanline is dword aligned
*/
static inline void fast_imageblit(const struct fb_image *image, struct fb_info *p,
- u8 __iomem *dst1, u32 fgcolor,
- u32 bgcolor)
+ u8 __iomem *dst)
{
- u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel;
- u32 ppw = 32/bpp, spitch = (image->width + 7)/8;
- u32 bit_mask, end_mask, eorx, shift;
+ int scan_align = p->pixmap.scan_align - 1, spitch = (image->width + 7) >> 3;
+ u32 bit_mask, end_mask = 0, eorx, fgx, fgcolor, bgx, bgcolor, val;
+ int access = p->pixmap.access_align, bpw = access >> 3, bits;
+ int bpp = p->var.bits_per_pixel, ppw, shift, i, j;
const char *s = image->data, *src;
- u32 __iomem *dst;
+ u8 __iomem *dst1;
u32 *tab = NULL;
- int i, j, k;
-
+
+ spitch = (spitch + scan_align) & ~scan_align;
+
+ if (p->fix.visual == FB_VISUAL_TRUECOLOR ||
+ p->fix.visual == FB_VISUAL_DIRECTCOLOR) {
+ fgx = fgcolor = ((u32*)(p->pseudo_palette))[image->fg_color];
+ bgx = bgcolor = ((u32*)(p->pseudo_palette))[image->bg_color];
+ } else {
+ fgx = fgcolor = image->fg_color;
+ bgx = bgcolor = image->bg_color;
+ }
+
switch (bpp) {
case 8:
tab = cfb_tab8;
+ ppw = 4;
break;
case 16:
tab = cfb_tab16;
+ ppw = 2;
break;
case 32:
default:
tab = cfb_tab32;
+ ppw = 1;
break;
}
- for (i = ppw-1; i--; ) {
+ for (i = 0; i < 32; i += bpp) {
fgx <<= bpp;
bgx <<= bpp;
fgx |= fgcolor;
bgx |= bgcolor;
}
-
+
bit_mask = (1 << ppw) - 1;
eorx = fgx ^ bgx;
- k = image->width/ppw;
for (i = image->height; i--; ) {
- dst = (u32 __iomem *) dst1, shift = 8; src = s;
-
- for (j = k; j--; ) {
- shift -= ppw;
- end_mask = tab[(*src >> shift) & bit_mask];
- FB_WRITEL((end_mask & eorx)^bgx, dst++);
- if (!shift) { shift = 8; src++; }
+ dst1 = dst, shift = 8, bits = 32; src = s;
+
+ for (j = image->width*bpp; j > 0; j -= access) {
+ bits += access;
+ if (bits >= 32) {
+ shift -= ppw;
+ end_mask = swab32(tab[(*src >> shift) & bit_mask]);
+ if (!shift) { shift = 8; src++; }
+ bits = 0;
+ }
+ val = (end_mask & eorx)^bgx;
+ FB_WRITEL(val, dst1);
+ dst1 += bpw;
}
- dst1 += p->fix.line_length;
+ dst += p->fix.line_length;
s += spitch;
}
-}
-
+}
+
void cfb_imageblit(struct fb_info *p, const struct fb_image *image)
{
- u32 fgcolor, bgcolor, start_index, bitstart, pitch_index = 0;
- u32 bpl = sizeof(u32), bpp = p->var.bits_per_pixel;
- u32 width = image->width;
- u32 dx = image->dx, dy = image->dy;
+ u32 bits = p->pixmap.access_align, bpp = p->var.bits_per_pixel;
+ u32 width = image->width, dx = image->dx, dy = image->dy;
+ u32 start_index, bitstart, pitch_index = 0;
+ int bpl = bits >> 3;
u8 __iomem *dst1;
if (p->state != FBINFO_STATE_RUNNING)
return;
- bitstart = (dy * p->fix.line_length * 8) + (dx * bpp);
- start_index = bitstart & (32 - 1);
- pitch_index = (p->fix.line_length & (bpl - 1)) * 8;
+ bitstart = ((dy * p->fix.line_length) << 3) + (dx * bpp);
+ start_index = bitstart & (bits - 1);
+ pitch_index = (p->fix.line_length & (bpl - 1)) << 3;
- bitstart /= 8;
+ bitstart >>= 3;
bitstart &= ~(bpl - 1);
dst1 = p->screen_base + bitstart;
if (p->fbops->fb_sync)
p->fbops->fb_sync(p);
- if (image->depth == 1) {
- if (p->fix.visual == FB_VISUAL_TRUECOLOR ||
- p->fix.visual == FB_VISUAL_DIRECTCOLOR) {
- fgcolor = ((u32*)(p->pseudo_palette))[image->fg_color];
- bgcolor = ((u32*)(p->pseudo_palette))[image->bg_color];
- } else {
- fgcolor = image->fg_color;
- bgcolor = image->bg_color;
- }
-
- if (32 % bpp == 0 && !start_index && !pitch_index &&
- ((width & (32/bpp-1)) == 0) &&
- bpp >= 8 && bpp <= 32)
- fast_imageblit(image, p, dst1, fgcolor, bgcolor);
- else
- slow_imageblit(image, p, dst1, fgcolor, bgcolor,
- start_index, pitch_index);
- } else
- color_imageblit(image, p, dst1, start_index, pitch_index);
+ if (bits % bpp == 0 && image->depth == 1 && !start_index && !pitch_index && bpp >= 8 && bpp <= 32
+ && ((width & (bits/bpp-1)) == 0))
+ fast_imageblit(image, p, dst1);
+ else
+ slow_imageblit(image, p, dst1, start_index, pitch_index);
}
EXPORT_SYMBOL(cfb_imageblit);
-MODULE_AUTHOR("James Simmons <jsimmons@users.sf.net>");
+MODULE_AUTHOR("James Simmons <jsimmons@infradead.org>");
MODULE_DESCRIPTION("Generic software accelerated imaging drawing");
MODULE_LICENSE("GPL");
diff -urN -X linus-2.6/Documentation/dontdiff linus-2.6/drivers/video/fbmem.c fbdev-2.6/drivers/video/fbmem.c
--- linus-2.6/drivers/video/fbmem.c 2006-11-07 05:38:36.000000000 -0500
+++ fbdev-2.6/drivers/video/fbmem.c 2006-11-11 10:00:32.000000000 -0500
@@ -243,48 +243,6 @@
palette[i] = i << redshift | i << greenshift | i << blueshift;
}
-static void fb_set_logo(struct fb_info *info,
- const struct linux_logo *logo, u8 *dst,
- int depth)
-{
- int i, j, k;
- const u8 *src = logo->data;
- u8 xor = (info->fix.visual == FB_VISUAL_MONO01) ? 0xff : 0;
- u8 fg = 1, d;
-
- if (fb_get_color_depth(&info->var, &info->fix) == 3)
- fg = 7;
-
- if (info->fix.visual == FB_VISUAL_MONO01 ||
- info->fix.visual == FB_VISUAL_MONO10)
- fg = ~((u8) (0xfff << info->var.green.length));
-
- switch (depth) {
- case 4:
- for (i = 0; i < logo->height; i++)
- for (j = 0; j < logo->width; src++) {
- *dst++ = *src >> 4;
- j++;
- if (j < logo->width) {
- *dst++ = *src & 0x0f;
- j++;
- }
- }
- break;
- case 1:
- for (i = 0; i < logo->height; i++) {
- for (j = 0; j < logo->width; src++) {
- d = *src ^ xor;
- for (k = 7; k >= 0; k--) {
- *dst++ = ((d >> k) & 1) ? fg : 0;
- j++;
- }
- }
- }
- break;
- }
-}
-
/*
* Three (3) kinds of logo maps exist. linux_logo_clut224 (>16 colors),
* linux_logo_vga16 (16 colors) and linux_logo_mono (2 colors). Depending on
@@ -452,11 +410,9 @@
/* Return if no suitable logo was found */
fb_logo.logo = fb_find_logo(depth);
-
- if (!fb_logo.logo) {
+ if (!fb_logo.logo)
return 0;
- }
-
+
if (rotate == FB_ROTATE_UR || rotate == FB_ROTATE_UD)
yres = info->var.yres;
else
@@ -480,14 +436,13 @@
int fb_show_logo(struct fb_info *info, int rotate)
{
u32 *palette = NULL, *saved_pseudo_palette = NULL;
- unsigned char *logo_new = NULL, *logo_rotate = NULL;
struct fb_image image;
/* Return if the frame buffer is not mapped or suspended */
if (fb_logo.logo == NULL || info->state != FBINFO_STATE_RUNNING)
return 0;
- image.depth = 8;
+ image.depth = fb_logo.depth;
image.data = fb_logo.logo->data;
if (fb_logo.needs_cmapreset)
@@ -508,17 +463,13 @@
info->pseudo_palette = palette;
}
- if (fb_logo.depth <= 4) {
- logo_new = kmalloc(fb_logo.logo->width * fb_logo.logo->height,
- GFP_KERNEL);
- if (logo_new == NULL) {
- kfree(palette);
- if (saved_pseudo_palette)
- info->pseudo_palette = saved_pseudo_palette;
- return 0;
+ if (fb_logo.depth == 1) {
+ if (info->fix.visual == FB_VISUAL_MONO01) {
+ u32 fg = image.fg_color;
+
+ image.fg_color = image.bg_color;
+ image.bg_color = fg;
}
- image.data = logo_new;
- fb_set_logo(info, fb_logo.logo, logo_new, fb_logo.depth);
}
image.dx = 0;
@@ -527,19 +478,17 @@
image.height = fb_logo.logo->height;
if (rotate) {
- logo_rotate = kmalloc(fb_logo.logo->width *
- fb_logo.logo->height, GFP_KERNEL);
+ unsigned char *logo_rotate = kmalloc(fb_logo.logo->width *
+ fb_logo.logo->height, GFP_KERNEL);
if (logo_rotate)
fb_rotate_logo(info, logo_rotate, &image, rotate);
+ kfree(logo_rotate);
}
-
fb_do_show_logo(info, &image, rotate);
kfree(palette);
if (saved_pseudo_palette != NULL)
info->pseudo_palette = saved_pseudo_palette;
- kfree(logo_new);
- kfree(logo_rotate);
return fb_logo.logo->height;
}
#else
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH] packed pixel image blit optimizations.
2007-05-02 20:50 [PATCH] packed pixel image blit optimizations James Simmons
@ 2007-05-02 21:33 ` Antonino A. Daplas
2007-05-03 14:04 ` James Simmons
0 siblings, 1 reply; 10+ messages in thread
From: Antonino A. Daplas @ 2007-05-02 21:33 UTC (permalink / raw)
To: linux-fbdev-devel; +Cc: James Simmons
On Wed, 2007-05-02 at 21:50 +0100, James Simmons wrote:
> This patch is the start of my original work. The goal was:
>
> 1) One function for to draw at any color depth.
>
> 2) 16 color image data does need special padding.
>
> 3) Use fb_pixmap to control drawing to the hardware.
I'm getting rejects with this patch. Did you benchmark this and
compared the performance with the old one?
Tony
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] packed pixel image blit optimizations.
2007-05-02 21:33 ` Antonino A. Daplas
@ 2007-05-03 14:04 ` James Simmons
2007-05-03 19:24 ` Antonino A. Daplas
0 siblings, 1 reply; 10+ messages in thread
From: James Simmons @ 2007-05-03 14:04 UTC (permalink / raw)
To: Antonino A. Daplas; +Cc: linux-fbdev-devel
> On Wed, 2007-05-02 at 21:50 +0100, James Simmons wrote:
> > This patch is the start of my original work. The goal was:
> >
> > 1) One function for to draw at any color depth.
> >
> > 2) 16 color image data does need special padding.
> >
> > 3) Use fb_pixmap to control drawing to the hardware.
>
> I'm getting rejects with this patch. Did you benchmark this and
> compared the performance with the old one?
Need to update patch to newest kernel. Not yet, there are still bugs to
hammer away on.
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] packed pixel image blit optimizations.
2007-05-03 14:04 ` James Simmons
@ 2007-05-03 19:24 ` Antonino A. Daplas
2007-05-03 19:34 ` James Simmons
0 siblings, 1 reply; 10+ messages in thread
From: Antonino A. Daplas @ 2007-05-03 19:24 UTC (permalink / raw)
To: James Simmons; +Cc: linux-fbdev-devel
On Thu, 2007-05-03 at 15:04 +0100, James Simmons wrote:
> > On Wed, 2007-05-02 at 21:50 +0100, James Simmons wrote:
> > > This patch is the start of my original work. The goal was:
> > >
> > > 1) One function for to draw at any color depth.
> > >
> > > 2) 16 color image data does need special padding.
> > >
> > > 3) Use fb_pixmap to control drawing to the hardware.
> >
> > I'm getting rejects with this patch. Did you benchmark this and
> > compared the performance with the old one?
>
> Need to update patch to newest kernel. Not yet, there are still bugs to
> hammer away on.
I don't have to test the patch to see that it is not endian-correct and
that it will be many times slower than the old code.
Tony
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] packed pixel image blit optimizations.
2007-05-03 19:24 ` Antonino A. Daplas
@ 2007-05-03 19:34 ` James Simmons
2007-05-03 19:44 ` Antonino A. Daplas
0 siblings, 1 reply; 10+ messages in thread
From: James Simmons @ 2007-05-03 19:34 UTC (permalink / raw)
To: Antonino A. Daplas; +Cc: linux-fbdev-devel
> > > > This patch is the start of my original work. The goal was:
> > > >
> > > > 1) One function for to draw at any color depth.
> > > >
> > > > 2) 16 color image data does need special padding.
> > > >
> > > > 3) Use fb_pixmap to control drawing to the hardware.
> > >
> > > I'm getting rejects with this patch. Did you benchmark this and
> > > compared the performance with the old one?
> >
> > Need to update patch to newest kernel. Not yet, there are still bugs to
> > hammer away on.
>
> I don't have to test the patch to see that it is not endian-correct and
> that it will be many times slower than the old code.
I have tested on big endian framebuffers and it does work. I was attempting
to shrink the code for embedded devices. We can have the fastest code possible
but its no good if a cell phone can't run it :-(
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] packed pixel image blit optimizations.
2007-05-03 19:34 ` James Simmons
@ 2007-05-03 19:44 ` Antonino A. Daplas
2007-05-03 19:55 ` James Simmons
0 siblings, 1 reply; 10+ messages in thread
From: Antonino A. Daplas @ 2007-05-03 19:44 UTC (permalink / raw)
To: James Simmons; +Cc: linux-fbdev-devel
On Thu, 2007-05-03 at 20:34 +0100, James Simmons wrote:
> > > > > This patch is the start of my original work. The goal was:
> > > > >
> > > > > 1) One function for to draw at any color depth.
> > > > >
> > > > > 2) 16 color image data does need special padding.
> > > > >
> > > > > 3) Use fb_pixmap to control drawing to the hardware.
> > > >
> > > > I'm getting rejects with this patch. Did you benchmark this and
> > > > compared the performance with the old one?
> > >
> > > Need to update patch to newest kernel. Not yet, there are still bugs to
> > > hammer away on.
> >
> > I don't have to test the patch to see that it is not endian-correct and
> > that it will be many times slower than the old code.
>
> I have tested on big endian framebuffers and it does work.
How about little-endian?
> I was attempting
> to shrink the code for embedded devices. We can have the fastest code possible
> but its no good if a cell phone can't run it :-(
Nonsense, the difference is a few kb at most. And I'm not talking about
a few percent speed difference, I'm talking 2, 3 maybe 5x slower.
Please do basic tests first.
Tony
>
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] packed pixel image blit optimizations.
2007-05-03 19:44 ` Antonino A. Daplas
@ 2007-05-03 19:55 ` James Simmons
2007-05-03 20:06 ` Antonino A. Daplas
0 siblings, 1 reply; 10+ messages in thread
From: James Simmons @ 2007-05-03 19:55 UTC (permalink / raw)
To: Antonino A. Daplas; +Cc: linux-fbdev-devel
> > > I don't have to test the patch to see that it is not endian-correct and
> > > that it will be many times slower than the old code.
> >
> > I have tested on big endian framebuffers and it does work.
>
> How about little-endian?
Worked as well.
> > I was attempting
> > to shrink the code for embedded devices. We can have the fastest code possible
> > but its no good if a cell phone can't run it :-(
>
> Nonsense, the difference is a few kb at most. And I'm not talking about
> a few percent speed difference, I'm talking 2, 3 maybe 5x slower.
>
> Please do basic tests first.
Show me the numbers!!!
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] packed pixel image blit optimizations.
2007-05-03 19:55 ` James Simmons
@ 2007-05-03 20:06 ` Antonino A. Daplas
2007-05-08 19:58 ` James Simmons
0 siblings, 1 reply; 10+ messages in thread
From: Antonino A. Daplas @ 2007-05-03 20:06 UTC (permalink / raw)
To: James Simmons; +Cc: linux-fbdev-devel
On Thu, 2007-05-03 at 20:55 +0100, James Simmons wrote:
> > > > I don't have to test the patch to see that it is not endian-correct and
> > > > that it will be many times slower than the old code.
> > >
> > > I have tested on big endian framebuffers and it does work.
> >
> > How about little-endian?
>
> Worked as well.
>
Not for me, vesafb 8x16 font at 1024x768-8.
> > > I was attempting
> > > to shrink the code for embedded devices. We can have the fastest code possible
> > > but its no good if a cell phone can't run it :-(
> >
> > Nonsense, the difference is a few kb at most. And I'm not talking about
> > a few percent speed difference, I'm talking 2, 3 maybe 5x slower.
> >
> > Please do basic tests first.
>
> Show me the numbers!!!
I think that's your job, but here goes:
time cat linux/MAINTAINERS
old
real 0m5.406s
user 0m0.001s
sys 0m5.401s
your patch
real 0m36.859s
user 0m0.002s
sys 0m36.843s
Tony
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] packed pixel image blit optimizations.
2007-05-03 20:06 ` Antonino A. Daplas
@ 2007-05-08 19:58 ` James Simmons
2007-05-08 23:10 ` Antonino A. Daplas
0 siblings, 1 reply; 10+ messages in thread
From: James Simmons @ 2007-05-08 19:58 UTC (permalink / raw)
To: Antonino A. Daplas; +Cc: linux-fbdev-devel
> On Thu, 2007-05-03 at 20:55 +0100, James Simmons wrote:
> > > > > I don't have to test the patch to see that it is not endian-correct and
> > > > > that it will be many times slower than the old code.
> > > >
> > > > I have tested on big endian framebuffers and it does work.
> > >
> > > How about little-endian?
> >
> > Worked as well.
> >
>
> Not for me, vesafb 8x16 font at 1024x768-8.
How did you do the test below if it doesn't work on little endian? If you
are the only one allowed to work on the core system you need to make
people aware of it. I won't invade you space anymore.
>
> time cat linux/MAINTAINERS
>
> old
> real 0m5.406s
> user 0m0.001s
> sys 0m5.401s
>
> your patch
> real 0m36.859s
> user 0m0.002s
> sys 0m36.843s
>
> Tony
>
>
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] packed pixel image blit optimizations.
2007-05-08 19:58 ` James Simmons
@ 2007-05-08 23:10 ` Antonino A. Daplas
0 siblings, 0 replies; 10+ messages in thread
From: Antonino A. Daplas @ 2007-05-08 23:10 UTC (permalink / raw)
To: James Simmons; +Cc: linux-fbdev-devel
On Tue, 2007-05-08 at 20:58 +0100, James Simmons wrote:
> > On Thu, 2007-05-03 at 20:55 +0100, James Simmons wrote:
> > > > > > I don't have to test the patch to see that it is not endian-correct and
> > > > > > that it will be many times slower than the old code.
> > > > >
> > > > > I have tested on big endian framebuffers and it does work.
> > > >
> > > > How about little-endian?
> > >
> > > Worked as well.
> > >
> >
> > Not for me, vesafb 8x16 font at 1024x768-8.
>
> How did you do the test below if it doesn't work on little endian? If you
> are the only one allowed to work on the core system you need to make
> people aware of it. I won't invade you space anymore.
I get corrupted characters and this is with vesafb, the most widely used
framebuffer driver. I just copied the last screen output to a text file
and opened it in X. Look, everyone is free to try your patch for
testing, and if they report that it works for them with no speed
degradation, then I'll examine it further where it failed.
So people, can you try James Simmon's patch posted here?
http://marc.info/?l=linux-fbdev-devel&m=117821517710515&w=2
Tony
PS: The reason your patch is slow is because you are using swab32().
The old code does this:
table lookup
Your patch does this:
table lookup + swab32()
And swab32 consists of 4 OR's, 4 bitshifts, and 2 ANDS. This is very
expensive especially if this is done for each 4 bytes of data (thats 1
pixel in 32 bpp).
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2007-05-08 23:10 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-05-02 20:50 [PATCH] packed pixel image blit optimizations James Simmons
2007-05-02 21:33 ` Antonino A. Daplas
2007-05-03 14:04 ` James Simmons
2007-05-03 19:24 ` Antonino A. Daplas
2007-05-03 19:34 ` James Simmons
2007-05-03 19:44 ` Antonino A. Daplas
2007-05-03 19:55 ` James Simmons
2007-05-03 20:06 ` Antonino A. Daplas
2007-05-08 19:58 ` James Simmons
2007-05-08 23:10 ` Antonino A. Daplas
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).