diff -Naur linux-2.5.27/drivers/video/cfbcopyarea.c linux/drivers/video/cfbcopyarea.c --- linux-2.5.27/drivers/video/cfbcopyarea.c Thu Aug 8 21:42:21 2002 +++ linux/drivers/video/cfbcopyarea.c Thu Aug 8 21:42:54 2002 @@ -83,7 +83,7 @@ lineincr = -linesize; } - if ((BITS_PER_LONG % p->var.bits_per_pixel) == 0) { + if ((BITS_PER_LONG % p->var.bits_per_pixel) == 0) { int ppw = BITS_PER_LONG / p->var.bits_per_pixel; int n = ((area->width * p->var.bits_per_pixel) >> 3); @@ -103,7 +103,6 @@ n -= end_index; } n /= bpl; - if (n <= 0) { if (start_mask) { if (end_mask) @@ -219,4 +218,32 @@ } } } + else { + int n = ((area->width * p->var.bits_per_pixel) >> 3); + int n16 = (n >> 4) << 4; + int n_fract = n - n16; + int rows; + + if (area->dy < area->sy + || (area->dy == area->sy && area->dx < area->sx)) { + for (rows = height; rows--; ) { + if (n16) + fast_memmove(dst1, src1, n16); + if (n_fract) + fb_memmove(dst1+n16, src1+n16, n_fract); + dst1 += linesize; + src1 += linesize; + } + } + else { + for (rows = height; rows--; ) { + if (n16) + fast_memmove(dst1, src1, n16); + if (n_fract) + fb_memmove(dst1+n16, src1+n16, n_fract); + dst1 -= linesize; + src1 -= linesize; + } + } + } } diff -Naur linux-2.5.27/drivers/video/cfbfillrect.c linux/drivers/video/cfbfillrect.c --- linux-2.5.27/drivers/video/cfbfillrect.c Thu Aug 8 21:42:26 2002 +++ linux/drivers/video/cfbfillrect.c Thu Aug 8 21:42:50 2002 @@ -28,7 +28,7 @@ unsigned long height, ppw, fg, fgcolor; int i, n, x2, y2, linesize = p->fix.line_length; int bpl = sizeof(unsigned long); - unsigned long *dst; + unsigned long *dst = NULL; char *dst1; if (!rect->width || !rect->height) @@ -57,7 +57,7 @@ else fg = fgcolor = rect->color; - for (i = 0; i < ppw - 1; i++) { + for (i = 0; i < ppw-1; i++) { fg <<= p->var.bits_per_pixel; fg |= fgcolor; } @@ -85,7 +85,7 @@ n = 0; } - if ((BITS_PER_LONG % p->var.bits_per_pixel) == 0) { + if ((BITS_PER_LONG % p->var.bits_per_pixel) == 0) { switch (rect->rop) { case ROP_COPY: do { @@ -161,49 +161,76 @@ break; } } else { - /* Odd modes like 24 or 80 bits per pixel */ - start_mask = fg >> (start_index * p->var.bits_per_pixel); - end_mask = fg << (end_index * p->var.bits_per_pixel); - /* start_mask =& PFILL24(x1,fg); - end_mask_or = end_mask & PFILL24(x1+width-1,fg); */ - - n = (rect->width - start_index - end_index) / ppw; + /* + * Slow Method: The aim is to find the number of pixels to + * pack in order to write doubleword multiple data. + * For 24 bpp, 4 pixels are packed which are written as + * 3 dwords. + */ + char *dst2, *dst3; + int bytes = (p->var.bits_per_pixel + 7) >> 3; + int read, write, total, pack_size; + u32 pixarray[BITS_PER_LONG >> 3], m; + + fg = fgcolor; + read = (bytes + (bpl - 1)) & ~(bpl - 1); + write = bytes; + total = (rect->width * bytes); + + pack_size = bpl * write; + + dst3 = (char *) pixarray; + + for (n = read; n--; ) { + *(u32 *) dst3 = fg; + dst3 += bytes; + } switch (rect->rop) { case ROP_COPY: do { - dst = (unsigned long *) dst1; - if (start_mask) - *dst |= start_mask; - if ((start_index + rect->width) > ppw) - dst++; + dst2 = dst1; + n = total; - /* XXX: slow */ - for (i = 0; i < n; i++) { - *dst++ = fg; + while (n >= pack_size) { + for (m = 0; m < write; m++) { + fb_writel(pixarray[m], (u32 *) dst2); + dst2 += 4; + } + n -= pack_size; + } + if (n) { + m = 0; + while (n--) + fb_writeb(((u8 *)pixarray)[m++], dst2++); } - if (end_mask) - *dst |= end_mask; dst1 += linesize; } while (--height); break; case ROP_XOR: do { - dst = (unsigned long *) dst1; - if (start_mask) - *dst ^= start_mask; - if ((start_mask + rect->width) > ppw) - dst++; + dst2 = dst1; + n = total; - for (i = 0; i < n; i++) { - *dst++ ^= fg; /* PFILL24(fg,x1+i); */ + while (n >= pack_size) { + for (m = 0; m < write; m++) { + fb_writel(fb_readl((u32 *) dst2) ^ pixarray[m], (u32 *) dst2); + dst2 += 4; + } + n -= pack_size; + } + if (n) { + m = 0; + while (n--) { + fb_writeb(fb_readb(dst2) ^ ((u8 *)pixarray)[m++], dst2); + dst2++; + } } - if (end_mask) - *dst ^= end_mask; dst1 += linesize; } while (--height); break; } + } return; } diff -Naur linux-2.5.27/drivers/video/cfbimgblt.c linux/drivers/video/cfbimgblt.c --- linux-2.5.27/drivers/video/cfbimgblt.c Thu Aug 8 21:42:17 2002 +++ linux/drivers/video/cfbimgblt.c Thu Aug 8 21:42:42 2002 @@ -22,6 +22,13 @@ * FIXME * The code for 24 bit is horrible. It copies byte by byte size instead of * longs like the other sizes. Needs to be optimized. + * + * Tony: + * Incorporate mask tables similar to fbcon-cfb*.c in 2.4 API. This speeds + * up the code significantly. + * + * Code for depths not multiples of BITS_PER_LONG is still kludgy, which is + * still processed a bit at a time. * * Also need to add code to deal with cards endians that are different than * the native cpu endians. I also need to deal with MSB position in the word. @@ -41,16 +48,222 @@ #define DPRINTK(fmt, args...) #endif -void cfb_imageblit(struct fb_info *p, struct fb_image *image) +static u32 cfb_tab8[] = { +#if defined(__BIG_ENDIAN) + 0x00000000,0x000000ff,0x0000ff00,0x0000ffff, + 0x00ff0000,0x00ff00ff,0x00ffff00,0x00ffffff, + 0xff000000,0xff0000ff,0xff00ff00,0xff00ffff, + 0xffff0000,0xffff00ff,0xffffff00,0xffffffff +#elif defined(__LITTLE_ENDIAN) + 0x00000000,0xff000000,0x00ff0000,0xffff0000, + 0x0000ff00,0xff00ff00,0x00ffff00,0xffffff00, + 0x000000ff,0xff0000ff,0x00ff00ff,0xffff00ff, + 0x0000ffff,0xff00ffff,0x00ffffff,0xffffffff +#else +#error FIXME: No endianness?? +#endif +}; + +static u32 cfb_tab16[] = { +#if defined(__BIG_ENDIAN) + 0x00000000, 0x0000ffff, 0xffff0000, 0xffffffff +#elif defined(__LITTLE_ENDIAN) + 0x00000000, 0xffff0000, 0x0000ffff, 0xffffffff +#else +#error FIXME: No endianness?? +#endif +}; + +static u32 cfb_tab32[] = { + 0x00000000, 0xffffffff +}; + +static u32 cfb_pixarray[4]; +static u32 cfb_tabdef[2]; + + +static inline void fast_imageblit(struct fb_image *image, struct fb_info *p, char *dst1, + int fgcolor, int bgcolor) { - int pad, ppw; - int x2, y2, n, i, j, k, l = 7; + int i, j, k, l = 8, n; + int bit_mask, end_mask, eorx; + unsigned long fgx = fgcolor, bgx = bgcolor, pad; unsigned long tmp = ~0 << (BITS_PER_LONG - p->var.bits_per_pixel); - unsigned long fgx, bgx, fgcolor, bgcolor, eorx; + unsigned long ppw = BITS_PER_LONG/p->var.bits_per_pixel; + unsigned long *dst; + u32 *tab = NULL; + char *src = image->data; + + switch (ppw) { + case 4: + tab = cfb_tab8; + break; + case 2: + tab = cfb_tab16; + break; + case 1: + tab = cfb_tab32; + break; + } + + for (i = ppw-1; i--; ) { + fgx <<= p->var.bits_per_pixel; + bgx <<= p->var.bits_per_pixel; + fgx |= fgcolor; + bgx |= bgcolor; + } + + n = ((image->width + 7) >> 3); + pad = (n << 3) - image->width; + n = image->width % ppw; + + bit_mask = (1 << ppw) - 1; + eorx = fgx ^ bgx; + + k = image->width/ppw; + + for (i = image->height; i--; ) { + dst = (unsigned long *) dst1; + + for (j = k; j--; ) { + l -= ppw; + end_mask = tab[(*src >> l) & bit_mask]; + fb_writel((end_mask & eorx)^bgx, dst++); + if (!l) { l = 8; src++; } + } + if (n) { + end_mask = 0; + for (j = n; j > 0; j--) { + l--; + if (test_bit(l, (unsigned long *) src)) + end_mask |= (tmp >> (p->var.bits_per_pixel*(j-1))); + if (!l) { l = 8; src++; } + } + fb_writel((end_mask & eorx)^bgx, dst++); + } + l -= pad; + dst1 += p->fix.line_length; + } +} + + +/* + * Slow method: The idea is to find the number of pixels necessary to form + * dword-sized multiples that will be written to the framebuffer. For BPP24, + * 4 pixels has to be read which are then packed into 3 double words that + * are then written to the framebuffer. + * + * With this method, processing is done 1 pixel at a time. + */ +static inline void slow_imageblit(struct fb_image *image, struct fb_info *p, char * dst1, + int fgcolor, int bgcolor) +{ + int bytes = (p->var.bits_per_pixel + 7) >> 3; + int tmp = ~0UL >> (BITS_PER_LONG - p->var.bits_per_pixel); + int i, j, k, l = 8, m, end_mask, eorx; + int read, write, total, pack_size, bpl = sizeof(unsigned long); + unsigned long *dst; + char *dst2 = (char *) cfb_pixarray, *src = image->data; + + cfb_tabdef[0] = 0; + cfb_tabdef[1] = tmp; + + eorx = fgcolor ^ bgcolor; + read = (bytes + (bpl - 1)) & ~(bpl - 1); + write = bytes; + total = image->width * bytes; + pack_size = bpl * write; + + for (i = image->height; i--; ) { + dst = (unsigned long *) dst1; + j = total; + m = read; + + while (j >= pack_size) { + l--; m--; + end_mask = cfb_tabdef[(*src >> l) & 1]; + *(unsigned long *) dst2 = (end_mask & eorx)^bgcolor; + dst2 += bytes; + if (!m) { + for (k = 0; k < write; k++ ) + fb_writel(cfb_pixarray[k], dst++); + dst2 = (char *) cfb_pixarray; + j -= pack_size; + m = read; + } + if (!l) { l = 8; src++; } + } + /* write residual pixels */ + if (j) { + k = 0; + while (j--) + fb_writeb(((u8 *) cfb_pixarray)[k++], dst++); + } + dst1 += p->fix.line_length; + } +} + +static inline void bitwise_blit(struct fb_image *image, struct fb_info *p, char *dst1, + int fgcolor, int bgcolor) +{ + int i, j, k, l = 8, n, pad, ppw; + unsigned long tmp = ~0 << (BITS_PER_LONG - p->var.bits_per_pixel); + unsigned long fgx = fgcolor, bgx = bgcolor, eorx; unsigned long end_mask; unsigned long *dst = NULL; + char *src = image->data; + + ppw = BITS_PER_LONG/p->var.bits_per_pixel; + + for (i = 0; i < ppw-1; i++) { + fgx <<= p->var.bits_per_pixel; + bgx <<= p->var.bits_per_pixel; + fgx |= fgcolor; + bgx |= bgcolor; + } + eorx = fgx ^ bgx; + n = ((image->width + 7) >> 3); + pad = (n << 3) - image->width; + n = image->width % ppw; + + for (i = 0; i < image->height; i++) { + dst = (unsigned long *) dst1; + + for (j = image->width/ppw; j > 0; j--) { + end_mask = 0; + + for (k = ppw; k > 0; k--) { + l--; + if (test_bit(l, (unsigned long *) src)) + end_mask |= (tmp >> (p->var.bits_per_pixel*(k-1))); + if (!l) { l = 8; src++; } + } + fb_writel((end_mask & eorx)^bgx, dst); + dst++; + } + + if (n) { + end_mask = 0; + for (j = n; j > 0; j--) { + l--; + if (test_bit(l, (unsigned long *) src)) + end_mask |= (tmp >> (p->var.bits_per_pixel*(j-1))); + if (!l) { l = 8; src++; } + } + fb_writel((end_mask & eorx)^bgx, dst); + dst++; + } + l -= pad; + dst1 += p->fix.line_length; + } +} + +void cfb_imageblit(struct fb_info *p, struct fb_image *image) +{ + int x2, y2, n; + unsigned long fgcolor, bgcolor; + unsigned long end_mask; u8 *dst1; - u8 *src; /* * We could use hardware clipping but on many cards you get around hardware @@ -64,66 +277,32 @@ y2 = y2 < p->var.yres_virtual ? y2 : p->var.yres_virtual; image->width = x2 - image->dx; image->height = y2 - image->dy; - + dst1 = p->screen_base + image->dy * p->fix.line_length + ((image->dx * p->var.bits_per_pixel) >> 3); - ppw = BITS_PER_LONG/p->var.bits_per_pixel; - - src = image->data; - if (image->depth == 1) { - if (p->fix.visual == FB_VISUAL_TRUECOLOR) { - fgx = fgcolor = ((u32 *)(p->pseudo_palette))[image->fg_color]; - bgx = bgcolor = ((u32 *)(p->pseudo_palette))[image->bg_color]; + fgcolor = ((u32 *)(p->pseudo_palette))[image->fg_color]; + bgcolor = ((u32 *)(p->pseudo_palette))[image->bg_color]; } else { - fgx = fgcolor = image->fg_color; - bgx = bgcolor = image->bg_color; + fgcolor = image->fg_color; + bgcolor = image->bg_color; } - for (i = 0; i < ppw-1; i++) { - fgx <<= p->var.bits_per_pixel; - bgx <<= p->var.bits_per_pixel; - fgx |= fgcolor; - bgx |= bgcolor; - } - eorx = fgx ^ bgx; - n = ((image->width + 7) >> 3); - pad = (n << 3) - image->width; - n = image->width % ppw; - - for (i = 0; i < image->height; i++) { - dst = (unsigned long *) dst1; - - for (j = image->width/ppw; j > 0; j--) { - end_mask = 0; - - for (k = ppw; k > 0; k--) { - if (test_bit(l, (unsigned long *) src)) - end_mask |= (tmp >> (p->var.bits_per_pixel*(k-1))); - l--; - if (l < 0) { l = 7; src++; } - } - fb_writel((end_mask & eorx)^bgx, dst); - dst++; - } + if (p->var.bits_per_pixel >= 8) { + if (BITS_PER_LONG % p->var.bits_per_pixel == 0) + fast_imageblit(image, p, dst1, fgcolor, bgcolor); + else + slow_imageblit(image, p, dst1, fgcolor, bgcolor); + } + else + /* Is there such a thing as 3 or 5 bits per pixel? */ + slow_imageblit(image, p, dst1, fgcolor, bgcolor); - if (n) { - end_mask = 0; - for (j = n; j > 0; j--) { - if (test_bit(l, (unsigned long *) src)) - end_mask |= (tmp >> (p->var.bits_per_pixel*(j-1))); - l--; - if (l < 0) { l = 7; src++; } - } - fb_writel((end_mask & eorx)^bgx, dst); - dst++; - } - l -= pad; - dst1 += p->fix.line_length; - } - } else { + } + + else { /* Draw the penguin */ n = ((image->width * p->var.bits_per_pixel) >> 3); end_mask = 0; diff -Naur linux-2.5.27/drivers/video/fbcon-accel.c linux/drivers/video/fbcon-accel.c --- linux-2.5.27/drivers/video/fbcon-accel.c Thu Aug 8 21:42:11 2002 +++ linux/drivers/video/fbcon-accel.c Thu Aug 8 21:43:00 2002 @@ -70,9 +70,44 @@ image.width = fontwidth(p); image.height = fontheight(p); image.depth = 1; - image.data = p->fontdata + (c & charmask)*fontheight(p)*width; + if (!info->pixmap.addr) { + image.data = p->fontdata + (c & charmask)*fontheight(p) * width; + info->fbops->fb_imageblit(info, &image); + } + else { + unsigned int d_size, d_pitch, i, j; + unsigned int scan_align = (info->pixmap.scan_align) ? info->pixmap.scan_align - 1 : 0; + unsigned int buf_align = (info->pixmap.buf_align) ? info->pixmap.buf_align - 1 : 0; + char *d_addr, *s_addr; + + d_pitch = (width + scan_align) & ~scan_align; + d_size = d_pitch * image.height; + + if (d_size > info->pixmap.size) { + BUG(); + return; + } + + info->pixmap.offset = (info->pixmap.offset + buf_align) & ~buf_align; + + if (info->pixmap.offset + d_size > info->pixmap.size) { + if (info->fbops->fb_sync) + info->fbops->fb_sync(info); + info->pixmap.offset = 0; + } + s_addr = p->fontdata + (c & charmask)*fontheight(p)*width; + image.data = (char *) (info->pixmap.addr + info->pixmap.offset); + d_addr = image.data; - info->fbops->fb_imageblit(info, &image); + for (i = image.height; i--; ) { + for (j = 0; j < width; j++) + d_addr[j] = *s_addr++; + d_addr += d_pitch; + } + + info->fbops->fb_imageblit(info, &image); + info->pixmap.offset += d_size; + } } void fbcon_accel_putcs(struct vc_data *vc, struct display *p, @@ -81,21 +116,87 @@ struct fb_info *info = p->fb_info; unsigned short charmask = p->charmask; unsigned int width = ((fontwidth(p)+7)>>3); + unsigned int cell_size; struct fb_image image; image.fg_color = attr_fgcol(p, *s); image.bg_color = attr_bgcol(p, *s); image.dx = xx * fontwidth(p); image.dy = yy * fontheight(p); - image.width = fontwidth(p); image.height = fontheight(p); image.depth = 1; + cell_size = fontheight(p)*width; + if (!info->pixmap.addr) { + image.width = fontwidth(p); + while (count--) { + image.data = p->fontdata + (scr_readw(s++) & charmask) * cell_size; + info->fbops->fb_imageblit(info, &image); + image.dx += fontwidth(p); + } + } + else { + unsigned int d_pitch, d_size, i, j; + unsigned int scan_align = (info->pixmap.scan_align) ? info->pixmap.scan_align - 1 : 0; + unsigned int buf_align = (info->pixmap.buf_align) ? info->pixmap.buf_align - 1 : 0; + char *s_addr, *d_addr, *d_addr0; + + d_pitch = (width * count) + scan_align; + d_pitch &= ~scan_align; + d_size = d_pitch * image.height; + + if (d_size > info->pixmap.size) { + BUG(); + return; + } + + info->pixmap.offset = (info->pixmap.offset + buf_align) & ~buf_align; + + if (info->pixmap.offset + d_size > info->pixmap.size) { + if (info->fbops->fb_sync) + info->fbops->fb_sync(info); + info->pixmap.offset = 0; + } + + image.width = fontwidth(p) * count; + image.data = (char *) (info->pixmap.addr + info->pixmap.offset); + d_addr = image.data; + + if (width == 1 && count > 3) { + char *s1, *s2, *s3, *s4; + + while (count > 3) { + s1 = p->fontdata + (scr_readw(s++) & charmask) * cell_size; + s2 = p->fontdata + (scr_readw(s++) & charmask) * cell_size; + s3 = p->fontdata + (scr_readw(s++) & charmask) * cell_size; + s4 = p->fontdata + (scr_readw(s++) & charmask) * cell_size; + d_addr0 = d_addr; + + for (i = image.height; i--; ) { + *(unsigned long *) d_addr0 = + (unsigned long) ((*s1++ & 0xff) | + (*s2++ & 0xff) << 8 | + (*s3++ & 0xff) << 16 | + (*s4++ & 0xff) << 24 ); + d_addr0 += d_pitch; + } + count -= 4; + d_addr += 4; + } + } + + while (count--) { + s_addr = p->fontdata + (scr_readw(s++) & charmask) * cell_size; + d_addr0 = d_addr; - while (count--) { - image.data = p->fontdata + - (scr_readw(s++) & charmask) * fontheight(p) * width; + for (i = image.height; i--; ) { + for (j = 0; j < width; j++) + d_addr0[j] = *s_addr++; + d_addr0 += d_pitch; + } + d_addr += width; + } info->fbops->fb_imageblit(info, &image); - image.dx += fontwidth(p); + info->pixmap.offset += d_size; } }