* [Patch 2] Console Rotation
@ 2002-09-09 14:22 Antonino Daplas
2002-09-09 14:35 ` Geert Uytterhoeven
0 siblings, 1 reply; 6+ messages in thread
From: Antonino Daplas @ 2002-09-09 14:22 UTC (permalink / raw)
To: fbdev
The patch (fb_drawing.diff) is an optional replacement to cfbimgblt.c,
cfbfillrect.c and cfbcopyarea.c. Rotating the display CW or CCW will
expose some of the limitations (alignment and access) of the current
drawing functions because fontheight is very variable. This also fixes
software clipping.
cfbcopyarea.c is just a 'copy 'n paste' of Geert's version I leached
from fbutils (I hope this is okay with you, Geert :) The only changes
are using FB_WRITEL and FB_READL where appropriate.
Tony
<<------------------------------------------------------------------->>
diff -Naur linux-2.5.33/drivers/video/cfbcopyarea.c linux/drivers/video/cfbcopyarea.c
--- linux-2.5.33/drivers/video/cfbcopyarea.c Sun Sep 8 19:34:36 2002
+++ linux/drivers/video/cfbcopyarea.c Sun Sep 8 19:48:05 2002
@@ -28,22 +28,319 @@
#include <asm/io.h>
#include <video/fbcon.h>
+#include "fbcon-rotate.h"
+
+#define LONG_MASK (BITS_PER_LONG - 1)
+
#if BITS_PER_LONG == 32
-#define FB_READ fb_readl
-#define FB_WRITE fb_writel
+#define FB_WRITEL fb_writel
+#define FB_READL fb_readl
+#define SHIFT_PER_LONG 5
+#define BYTES_PER_LONG 4
#else
-#define FB_READ fb_readq
-#define FB_WRITE fb_writeq
+#define FB_WRITEL fb_writeq
+#define FB_READL fb_readq(x)
+#define SHIFT_PER_LONG 6
+#define BYTES_PER_LONG 8
#endif
+static void bitcpy(unsigned long *dst, int dst_idx, const unsigned long *src,
+ int src_idx, unsigned long n)
+{
+ unsigned long first, last;
+ int shift = dst_idx-src_idx, left, right;
+ unsigned long d0, d1;
+ int m;
+
+ if (!n)
+ return;
+
+ shift = dst_idx-src_idx;
+ first = ~0UL >> dst_idx;
+ last = ~(~0UL >> ((dst_idx+n) % BITS_PER_LONG));
+
+ if (!shift) {
+ // Same alignment for source and dest
+
+ if (dst_idx+n <= BITS_PER_LONG) {
+ // Single word
+ if (last)
+ first &= last;
+ FB_WRITEL((*src & first) | (FB_READL(dst) & ~first), dst);
+ } else {
+ // Multiple destination words
+ // Leading bits
+ if (first) {
+
+ FB_WRITEL((*src & first) | (FB_READL(dst) & ~first), dst);
+ dst++;
+ src++;
+ n -= BITS_PER_LONG-dst_idx;
+ }
+
+ // Main chunk
+ n /= BITS_PER_LONG;
+ while (n >= 8) {
+ FB_WRITEL(*src++, dst++);
+ FB_WRITEL(*src++, dst++);
+ FB_WRITEL(*src++, dst++);
+ FB_WRITEL(*src++, dst++);
+ FB_WRITEL(*src++, dst++);
+ FB_WRITEL(*src++, dst++);
+ FB_WRITEL(*src++, dst++);
+ FB_WRITEL(*src++, dst++);
+ n -= 8;
+ }
+ while (n--)
+ FB_WRITEL(*src++, dst++);
+ // Trailing bits
+ if (last)
+ FB_WRITEL((*src & last) | (FB_READL(dst) & ~last), dst);
+ }
+ } else {
+ // Different alignment for source and dest
+
+ right = shift & (BITS_PER_LONG-1);
+ left = -shift & (BITS_PER_LONG-1);
+
+ if (dst_idx+n <= BITS_PER_LONG) {
+ // Single destination word
+ if (last)
+ first &= last;
+ if (shift > 0) {
+ // Single source word
+ FB_WRITEL(((*src >> right) & first) | (FB_READL(dst) & ~first), dst);
+ } else if (src_idx+n <= BITS_PER_LONG) {
+ // Single source word
+ FB_WRITEL(((*src << left) & first) | (FB_READL(dst) & ~first), dst);
+ } else {
+ // 2 source words
+ d0 = *src++;
+ d1 = *src;
+ FB_WRITEL(((d0 << left | d1 >> right) & first) | (FB_READL(dst) & ~first), dst);
+ }
+ } else {
+ // Multiple destination words
+ d0 = *src++;
+ // Leading bits
+ if (shift > 0) {
+ // Single source word
+ FB_WRITEL(((d0 >> right) & first) | (FB_READL(dst) & ~first), dst);
+ dst++;
+ n -= BITS_PER_LONG-dst_idx;
+ } else {
+ // 2 source words
+ d1 = *src++;
+ FB_WRITEL(((d0 << left | d1 >> right) & first) | (FB_READL(dst) & ~first), dst);
+ d0 = d1;
+ dst++;
+ n -= BITS_PER_LONG-dst_idx;
+ }
+
+ // Main chunk
+ m = n % BITS_PER_LONG;
+ n /= BITS_PER_LONG;
+ while (n >= 4) {
+ d1 = *src++;
+ FB_WRITEL(d0 << left | d1 >> right, dst++);
+ d0 = d1;
+ d1 = *src++;
+ FB_WRITEL(d0 << left | d1 >> right, dst++);
+ d0 = d1;
+ d1 = *src++;
+ FB_WRITEL(d0 << left | d1 >> right, dst++);
+ d0 = d1;
+ d1 = *src++;
+ FB_WRITEL(d0 << left | d1 >> right, dst++);
+ d0 = d1;
+ n -= 4;
+ }
+ while (n--) {
+ d1 = *src++;
+ FB_WRITEL(d0 << left | d1 >> right, dst++);
+ d0 = d1;
+ }
+
+ // Trailing bits
+ if (last) {
+ if (m <= right) {
+ // Single source word
+ FB_WRITEL(((d0 << left) & last) | (FB_READL(dst) & ~last), dst);
+ } else {
+ // 2 source words
+ d1 = *src;
+ FB_WRITEL(((d0 << left | d1 >> right) & last) | (FB_READL(dst) & ~last), dst);
+ }
+ }
+ }
+ }
+}
+
+static void bitcpy_rev(unsigned long *dst, int dst_idx,
+ const unsigned long *src, int src_idx, unsigned long n)
+{
+ unsigned long first, last;
+ int shift = dst_idx-src_idx, left, right;
+ unsigned long d0, d1;
+ int m;
+
+ if (!n)
+ return;
+
+ dst += (n-1)/BITS_PER_LONG;
+ src += (n-1)/BITS_PER_LONG;
+ if ((n-1) % BITS_PER_LONG) {
+ dst_idx += (n-1) % BITS_PER_LONG;
+ dst += dst_idx >> SHIFT_PER_LONG;
+ dst_idx &= BITS_PER_LONG-1;
+ src_idx += (n-1) % BITS_PER_LONG;
+ src += src_idx >> SHIFT_PER_LONG;
+ src_idx &= BITS_PER_LONG-1;
+ }
+
+ shift = dst_idx-src_idx;
+ first = ~0UL << (BITS_PER_LONG-1-dst_idx);
+ last = ~(~0UL << (BITS_PER_LONG-1-((dst_idx-n) % BITS_PER_LONG)));
+
+ if (!shift) {
+ // Same alignment for source and dest
+
+ if ((unsigned long)dst_idx+1 >= n) {
+ // Single word
+ if (last)
+ first &= last;
+ FB_WRITEL((*src & first) | (FB_READL(dst) & ~first), dst);
+ } else {
+ // Multiple destination words
+ // Leading bits
+ if (first) {
+ FB_WRITEL((*src & first) | (FB_READL(dst) & ~first), dst);
+ dst--;
+ src--;
+ n -= dst_idx+1;
+ }
+
+ // Main chunk
+ n /= BITS_PER_LONG;
+ while (n >= 8) {
+ FB_WRITEL(*src--, dst--);
+ FB_WRITEL(*src--, dst--);
+ FB_WRITEL(*src--, dst--);
+ FB_WRITEL(*src--, dst--);
+ FB_WRITEL(*src--, dst--);
+ FB_WRITEL(*src--, dst--);
+ FB_WRITEL(*src--, dst--);
+ FB_WRITEL(*src--, dst--);
+ n -= 8;
+ }
+ while (n--)
+ FB_WRITEL(*src--, dst--);
+
+ // Trailing bits
+ if (last)
+ FB_WRITEL((*src & last) | (FB_READL(dst) & ~last), dst);
+ }
+ } else {
+ // Different alignment for source and dest
+
+ right = shift & (BITS_PER_LONG-1);
+ left = -shift & (BITS_PER_LONG-1);
+
+ if ((unsigned long)dst_idx+1 >= n) {
+ // Single destination word
+ if (last)
+ first &= last;
+ if (shift < 0) {
+ // Single source word
+ FB_WRITEL((*src << left & first) | (FB_READL(dst) & ~first), dst);
+ } else if (1+(unsigned long)src_idx >= n) {
+ // Single source word
+ FB_WRITEL(((*src >> right) & first) | (FB_READL(dst) & ~first), dst);
+ } else {
+ // 2 source words
+ d0 = *src--;
+ d1 = *src;
+ FB_WRITEL(((d0 >> right | d1 << left) & first) | (FB_READL(dst) & ~first), dst);
+ }
+ } else {
+ // Multiple destination words
+ d0 = *src--;
+ // Leading bits
+ if (shift < 0) {
+ // Single source word
+ FB_WRITEL(((d0 << left) & first) | (FB_READL(dst) & ~first), dst);
+ dst--;
+ n -= dst_idx+1;
+ } else {
+ // 2 source words
+ d1 = *src--;
+ FB_WRITEL(((d0 >> right | d1 << left) & first) | (FB_READL(dst) & ~first), dst);
+ d0 = d1;
+ dst--;
+ n -= dst_idx+1;
+ }
+
+ // Main chunk
+ m = n % BITS_PER_LONG;
+ n /= BITS_PER_LONG;
+ while (n >= 4) {
+ d1 = *src--;
+ FB_WRITEL(d0 >> right | d1 << left, dst--);
+ d0 = d1;
+ d1 = *src--;
+ FB_WRITEL(d0 >> right | d1 << left, dst--);
+ d0 = d1;
+ d1 = *src--;
+ FB_WRITEL(d0 >> right | d1 << left, dst--);
+ d0 = d1;
+ d1 = *src--;
+ FB_WRITEL(d0 >> right | d1 << left, dst--);
+ d0 = d1;
+ n -= 4;
+ }
+ while (n--) {
+ d1 = *src--;
+ FB_WRITEL(d0 >> right | d1 << left, dst--);
+ d0 = d1;
+ }
+
+ // Trailing bits
+ if (last) {
+ if (m <= left) {
+ // Single source word
+ FB_WRITEL(((d0 >> right) & last) | (FB_READL(dst) & ~last), dst);
+ } else {
+ // 2 source words
+ d1 = *src;
+ FB_WRITEL(((d0 >> right | d1 << left) & last) |
+ (FB_READL(dst) & ~last), dst);
+ }
+ }
+ }
+ }
+}
+
void cfb_copyarea(struct fb_info *p, struct fb_copyarea *area)
{
- int x2, y2, lineincr, shift, shift_right, shift_left, old_dx, old_dy;
- int j, linesize = p->fix.line_length, bpl = sizeof(unsigned long);
- unsigned long start_index, end_index, start_mask, end_mask, last;
+ int x2, y2, old_dx, old_dy, vxres, vyres;
+ unsigned long next_line = p->fix.line_length;
+ int dst_idx = 0, src_idx = 0, rev_copy = 0;
unsigned long *dst = NULL, *src = NULL;
- char *src1, *dst1;
- int tmp, height;
+
+ vxres = p->var.xres_virtual;
+ vyres = p->var.yres_virtual;
+#ifdef FBCON_HAS_ROTATE
+ if (p->var.vmode & (FB_VMODE_ROTATE_CW | FB_VMODE_ROTATE_CCW)) {
+ vxres = p->var.yres_virtual;
+ vyres = p->var.xres_virtual;
+ }
+#endif
+
+ if (area->dx > vxres ||
+ area->sx > vxres ||
+ area->dy > vyres ||
+ area->sy > vyres)
+ return;
/* clip the destination */
old_dx = area->dx;
@@ -57,8 +354,8 @@
y2 = area->dy + area->height;
area->dx = area->dx > 0 ? area->dx : 0;
area->dy = area->dy > 0 ? area->dy : 0;
- x2 = x2 < p->var.xres_virtual ? x2 : p->var.xres_virtual;
- y2 = y2 < p->var.yres_virtual ? y2 : p->var.yres_virtual;
+ x2 = x2 < vxres ? x2 : vxres;
+ y2 = y2 < vyres ? y2 : vyres;
area->width = x2 - area->dx;
area->height = y2 - area->dy;
@@ -66,165 +363,45 @@
area->sx += (area->dx - old_dx);
area->sy += (area->dy - old_dy);
- height = area->height;
-
/* the source must be completely inside the virtual screen */
if (area->sx < 0 || area->sy < 0 ||
- (area->sx + area->width) > p->var.xres_virtual ||
- (area->sy + area->height) > p->var.yres_virtual)
+ (area->sx + area->width) > vxres ||
+ (area->sy + area->height) > vyres)
return;
- if (area->dy < area->sy
- || (area->dy == area->sy && area->dx < area->sx)) {
- /* start at the top */
- src1 = p->screen_base + area->sy * linesize +
- ((area->sx * p->var.bits_per_pixel) >> 3);
- dst1 = p->screen_base + area->dy * linesize +
- ((area->dx * p->var.bits_per_pixel) >> 3);
- lineincr = linesize;
- } else {
- /* start at the bottom */
- src1 = p->screen_base + (area->sy + area->height-1) * linesize
- + (((area->sx + area->width - 1) * p->var.bits_per_pixel) >> 3);
- dst1 = p->screen_base + (area->dy + area->height-1) * linesize
- + (((area->dx + area->width - 1) * p->var.bits_per_pixel) >> 3);
- lineincr = -linesize;
+ if (area->dy > area->sy || (area->dy == area->sy && area->dx > area->sx)) {
+ area->dy += area->height;
+ area->sy += area->height;
+ rev_copy = 1;
}
-
- if ((BITS_PER_LONG % p->var.bits_per_pixel) == 0) {
- int ppw = BITS_PER_LONG / p->var.bits_per_pixel;
- int n = ((area->width * p->var.bits_per_pixel) >> 3);
-
- start_index = ((unsigned long) src1 & (bpl - 1));
- end_index = ((unsigned long) (src1 + n) & (bpl - 1));
- shift = ((unsigned long) dst1 & (bpl - 1)) -
- ((unsigned long) src1 & (bpl - 1));
- start_mask = end_mask = 0;
-
- if (start_index) {
- start_mask = -1 >> (start_index << 3);
- n -= (bpl - start_index);
+
+ dst = src = (unsigned long *)((unsigned long)p->screen_base & ~(BYTES_PER_LONG-1));
+ dst_idx = src_idx = (unsigned long)p->screen_base & (BYTES_PER_LONG-1);
+ dst_idx += area->dy*next_line*8+area->dx*p->var.bits_per_pixel;
+ src_idx += area->sy*next_line*8+area->sx*p->var.bits_per_pixel;
+
+ if (rev_copy) {
+ while (area->height--) {
+ dst_idx -= next_line*8;
+ src_idx -= next_line*8;
+ dst += dst_idx >> SHIFT_PER_LONG;
+ dst_idx &= (BYTES_PER_LONG-1);
+ src += src_idx >> SHIFT_PER_LONG;
+ src_idx &= (BYTES_PER_LONG-1);
+ bitcpy_rev((unsigned long *)dst, dst_idx, (unsigned long *)src,
+ src_idx, area->width*p->var.bits_per_pixel);
}
-
- if (end_index) {
- end_mask = -1 << ((bpl - end_index) << 3);
- n -= end_index;
- }
- n /= bpl;
-
- if (n <= 0) {
- if (start_mask) {
- if (end_mask)
- end_mask &= start_mask;
- else
- end_mask = start_mask;
- start_mask = 0;
- }
- n = 0;
- }
-
- if (shift) {
- if (shift > 0) {
- /* dest is over to right more */
- shift_right =
- shift * p->var.bits_per_pixel;
- shift_left =
- (ppw - shift) * p->var.bits_per_pixel;
- } else {
- /* source is to the right more */
- shift_right =
- (ppw + shift) * p->var.bits_per_pixel;
- shift_left =
- -shift * p->var.bits_per_pixel;
- }
- /* general case, positive increment */
- if (lineincr > 0) {
- if (shift < 0)
- n++;
- do {
- dst = (unsigned long *) dst1;
- src = (unsigned long *) src1;
-
- last = (FB_READ(src) & start_mask);
-
- if (shift > 0)
- FB_WRITE(FB_READ(dst) | (last >> shift_right), dst);
- for (j = 0; j < n; j++) {
- dst++;
- tmp = FB_READ(src);
- src++;
- FB_WRITE((last << shift_left) | (tmp >> shift_right), dst);
- last = tmp;
- src++;
- }
- FB_WRITE(FB_READ(dst) | (last << shift_left), dst);
- src1 += lineincr;
- dst1 += lineincr;
- } while (--height);
- } else {
- /* general case, negative increment */
- if (shift > 0)
- n++;
- do {
- dst = (unsigned long *) dst1;
- src = (unsigned long *) src1;
-
- last = (FB_READ(src) & end_mask);
-
- if (shift < 0)
- FB_WRITE(FB_READ(dst) | (last >> shift_right), dst);
- for (j = 0; j < n; j++) {
- dst--;
- tmp = FB_READ(src);
- src--;
- FB_WRITE((tmp << shift_left) | (last >> shift_right), dst);
- last = tmp;
- src--;
- }
- FB_WRITE(FB_READ(dst) | (last >> shift_right), dst);
- src1 += lineincr;
- dst1 += lineincr;
- } while (--height);
- }
- } else {
- /* no shift needed */
- if (lineincr > 0) {
- /* positive increment */
- do {
- dst = (unsigned long *) (dst1 - start_index);
- src = (unsigned long *) (src1 - start_index);
-
- if (start_mask)
- FB_WRITE(FB_READ(src) | start_mask, dst);
-
- for (j = 0; j < n; j++) {
- FB_WRITE(FB_READ(src), dst);
- dst++;
- src++;
- }
-
- if (end_mask)
- FB_WRITE(FB_READ(src) | end_mask, dst);
- src1 += lineincr;
- dst1 += lineincr;
- } while (--height);
- } else {
- /* negative increment */
- do {
- dst = (unsigned long *) dst1;
- src = (unsigned long *) src1;
-
- if (start_mask)
- FB_WRITE(FB_READ(src) | start_mask, dst);
- for (j = 0; j < n; j++) {
- FB_WRITE(FB_READ(src), dst);
- dst--;
- src--;
- }
- src1 += lineincr;
- dst1 += lineincr;
- } while (--height);
- }
+ }
+ else {
+ while (area->height--) {
+ dst += dst_idx >> SHIFT_PER_LONG;
+ dst_idx &= (BYTES_PER_LONG-1);
+ src += src_idx >> SHIFT_PER_LONG;
+ src_idx &= (BYTES_PER_LONG-1);
+ bitcpy((unsigned long *)dst, dst_idx, (unsigned long *)src,
+ src_idx, area->width*p->var.bits_per_pixel);
+ dst_idx += next_line*8;
+ src_idx += next_line*8;
}
}
-}
+}
diff -Naur linux-2.5.33/drivers/video/cfbfillrect.c linux/drivers/video/cfbfillrect.c
--- linux-2.5.33/drivers/video/cfbfillrect.c Sun Sep 8 19:34:36 2002
+++ linux/drivers/video/cfbfillrect.c Sun Sep 8 19:47:59 2002
@@ -22,168 +22,167 @@
#include <asm/types.h>
#include <video/fbcon.h>
+#include "fbcon-rotate.h"
+
#if BITS_PER_LONG == 32
-#define FB_READ fb_readl
-#define FB_WRITE fb_writel
+#define FB_WRITEL fb_writel
+#define FB_READL fb_readl
#else
-#define FB_READ fb_readq
-#define FB_WRITE fb_writeq
+#define FB_WRITEL fb_writeq
+#define FB_READL fb_readq
+#endif
+
+#if defined (__BIG_ENDIAN)
+#define SHIFT_HIGH(val, bits) ((val) >> (bits))
+#define SHIFT_LOW(val, bits) ((val) << (bits))
+#else
+#define SHIFT_HIGH(val, bits) ((val) << (bits))
+#define SHIFT_LOW(val, bits) ((val) >> (bits))
#endif
void cfb_fillrect(struct fb_info *p, struct fb_fillrect *rect)
{
- unsigned long start_index, end_index, start_mask = 0, end_mask = 0;
- unsigned long height, ppw, fg, fgcolor;
- int i, n, x2, y2, linesize = p->fix.line_length;
- int bpl = sizeof(unsigned long);
- unsigned long *dst;
- char *dst1;
+ unsigned long start_index, pitch_index;
+ unsigned long height, fg, bitstart, shift, color;
+ unsigned long bpp = p->var.bits_per_pixel;
+ unsigned long null_bits = BITS_PER_LONG - bpp;
+ unsigned long n, x2, y2, vxres, vyres, linesize = p->fix.line_length;
+ unsigned long bpl = sizeof(unsigned long);
+ unsigned long *dst = NULL;
+ char *dst1, *dst2;
+
+ vxres = p->var.xres_virtual;
+ vyres = p->var.yres_virtual;
+#ifdef FBCON_HAS_ROTATE
+ if (p->var.vmode & (FB_VMODE_ROTATE_CW | FB_VMODE_ROTATE_CCW)) {
+ vxres = p->var.yres_virtual;
+ vyres = p->var.xres_virtual;
+ }
+#endif
- if (!rect->width || !rect->height)
+ if (!rect->width || !rect->height ||
+ rect->dx > vxres ||
+ rect->dy > vyres)
return;
/* We could use hardware clipping but on many cards you get around
* hardware clipping by writing to framebuffer directly. */
+
x2 = rect->dx + rect->width;
y2 = rect->dy + rect->height;
- x2 = x2 < p->var.xres_virtual ? x2 : p->var.xres_virtual;
- y2 = y2 < p->var.yres_virtual ? y2 : p->var.yres_virtual;
+ x2 = x2 < vxres ? x2 : vxres;
+ y2 = y2 < vyres ? y2 : vyres;
rect->width = x2 - rect->dx;
height = y2 - rect->dy;
- /* Size of the scanline in bytes */
- n = (rect->width * (p->var.bits_per_pixel >> 3));
- ppw = BITS_PER_LONG / p->var.bits_per_pixel;
-
- dst1 = p->screen_base + (rect->dy * linesize) +
- (rect->dx * (p->var.bits_per_pixel >> 3));
- start_index = ((unsigned long) dst1 & (bpl - 1));
- end_index = ((unsigned long) (dst1 + n) & (bpl - 1));
- if (p->fix.visual == FB_VISUAL_TRUECOLOR)
- fg = fgcolor = ((u32 *) (p->pseudo_palette))[rect->color];
+ if (p->fix.visual == FB_VISUAL_TRUECOLOR ||
+ p->fix.visual == FB_VISUAL_DIRECTCOLOR )
+ fg = ((u32 *) (p->pseudo_palette))[rect->color];
else
- fg = fgcolor = rect->color;
-
- for (i = 0; i < ppw - 1; i++) {
- fg <<= p->var.bits_per_pixel;
- fg |= fgcolor;
- }
-
- if (start_index) {
- start_mask = fg << (start_index << 3);
- n -= (bpl - start_index);
- }
-
- if (end_index) {
- end_mask = fg >> ((bpl - end_index) << 3);
- n -= end_index;
- }
-
- n = n / bpl;
-
- if (n <= 0) {
- if (start_mask) {
- if (end_mask)
- end_mask &= start_mask;
- else
- end_mask = start_mask;
- start_mask = 0;
- }
- n = 0;
- }
-
- if ((BITS_PER_LONG % p->var.bits_per_pixel) == 0) {
- switch (rect->rop) {
- case ROP_COPY:
- do {
- /* Word align to increases performace :-) */
- dst = (unsigned long *) (dst1 - start_index);
-
- if (start_mask) {
- FB_WRITE(FB_READ(dst) |
- start_mask, dst);
- dst++;
- }
-
- for (i = 0; i < n; i++) {
- FB_WRITE(fg, dst);
- dst++;
- }
-
- if (end_mask)
- FB_WRITE(FB_READ(dst) | end_mask,
- dst);
- dst1 += linesize;
- } while (--height);
- break;
- case ROP_XOR:
- do {
- dst = (unsigned long *) (dst1 - start_index);
-
- if (start_mask) {
- FB_WRITE(FB_READ(dst) ^
- start_mask, dst);
- dst++;
- }
-
- for (i = 0; i < n; i++) {
- FB_WRITE(FB_READ(dst) ^ fg, dst);
- dst++;
- }
+ fg = rect->color;
- if (end_mask) {
- FB_WRITE(FB_READ(dst) ^ end_mask,
- dst);
- }
+ bitstart = (((rect->dy * linesize) * 8) +
+ rect->dx * bpp);
+
+ start_index = bitstart & (BITS_PER_LONG - 1);
+
+ /* line_length not a multiple of an unsigned long? */
+ pitch_index = (linesize & (bpl - 1)) * 8;
+
+ bitstart /= 8;
+ bitstart &= ~(bpl - 1);
+ dst1 = dst2 = p->screen_base + bitstart;
+
+ switch (rect->rop) {
+ case ROP_COPY:
+ do {
+ dst = (unsigned long *) dst1;
+ shift = 0;
+ color = 0;
+ n = rect->width;
+
+ /*
+ * read leading bits
+ */
+ if (start_index) {
+ unsigned long start_mask = ~(SHIFT_HIGH(~0UL, start_index));
+
+ color = FB_READL(dst) & start_mask;
+ shift = start_index;
+ }
+
+ while (n--) {
+ color |= SHIFT_HIGH(fg, shift);
+ if (shift >= null_bits) {
+ FB_WRITEL(color, dst++);
+ if (shift == null_bits)
+ color = 0;
+ else
+ color = SHIFT_LOW(color, BITS_PER_LONG - shift);
+ }
+ shift += bpp;
+ shift &= (BITS_PER_LONG - 1);
+ }
+
+ /*
+ * write trailing bits
+ */
+ if (shift) {
+ unsigned long end_mask = SHIFT_HIGH(~0UL, shift);
+
+ FB_WRITEL((FB_READL(dst) & end_mask) | color, dst);
+ }
+
+ if (!pitch_index) {
dst1 += linesize;
- } while (--height);
- break;
- }
- } else {
- /* Odd modes like 24 or 80 bits per pixel */
- start_mask = fg >> (start_index * p->var.bits_per_pixel);
- end_mask = fg << (end_index * p->var.bits_per_pixel);
- /* start_mask =& PFILL24(x1,fg);
- end_mask_or = end_mask & PFILL24(x1+width-1,fg); */
-
- n = (rect->width - start_index - end_index) / ppw;
-
- switch (rect->rop) {
- case ROP_COPY:
- do {
- dst = (unsigned long *) dst1;
- if (start_mask)
- *dst |= start_mask;
- if ((start_index + rect->width) > ppw)
- dst++;
+ }
+ else {
+ dst2 += linesize;
+ dst1 = dst2;
+ (unsigned long) dst1 &= ~(bpl - 1);
+ start_index += pitch_index;
+ start_index &= BITS_PER_LONG - 1;
+ }
+
+ } while (--height);
+ break;
+ case ROP_XOR:
+ do {
+ dst = (unsigned long *) dst1;
+ shift = start_index;
+ color = 0;
+ n = rect->width;
+
+ while (n--) {
+ color |= SHIFT_HIGH(fg, shift);
+ if (shift >= null_bits) {
+ FB_WRITEL(FB_READL(dst) ^ color, dst);
+ dst++;
+ if (shift == null_bits)
+ color = 0;
+ else
+ color = SHIFT_LOW(color, BITS_PER_LONG - shift);
+ }
+ shift += bpp;
+ shift &= (BITS_PER_LONG - 1);
+ }
+ if (shift)
+ FB_WRITEL(FB_READL(dst) ^ color, dst);
- /* XXX: slow */
- for (i = 0; i < n; i++) {
- *dst++ = fg;
- }
- if (end_mask)
- *dst |= end_mask;
- dst1 += linesize;
- } while (--height);
- break;
- case ROP_XOR:
- do {
- dst = (unsigned long *) dst1;
- if (start_mask)
- *dst ^= start_mask;
- if ((start_mask + rect->width) > ppw)
- dst++;
-
- for (i = 0; i < n; i++) {
- *dst++ ^= fg; /* PFILL24(fg,x1+i); */
- }
- if (end_mask)
- *dst ^= end_mask;
+ if (!pitch_index) {
dst1 += linesize;
- } while (--height);
- break;
- }
+ }
+ else {
+ dst2 += linesize;
+ dst1 = dst2;
+ (unsigned long) dst1 &= ~(bpl - 1);
+ start_index += pitch_index;
+ start_index &= BITS_PER_LONG - 1;
+ }
+ } while (--height);
+ break;
}
+
return;
}
diff -Naur linux-2.5.33/drivers/video/cfbimgblt.c linux/drivers/video/cfbimgblt.c
--- linux-2.5.33/drivers/video/cfbimgblt.c Sun Sep 8 19:31:34 2002
+++ linux/drivers/video/cfbimgblt.c Sun Sep 8 19:47:55 2002
@@ -22,6 +22,13 @@
* FIXME
* The code for 24 bit is horrible. It copies byte by byte size instead of
* longs like the other sizes. Needs to be optimized.
+ *
+ * Tony:
+ * Incorporate mask tables similar to fbcon-cfb*.c in 2.4 API. This speeds
+ * up the code significantly.
+ *
+ * Code for depths not multiples of BITS_PER_LONG is still kludgy, which is
+ * still processed a bit at a time.
*
* Also need to add code to deal with cards endians that are different than
* the native cpu endians. I also need to deal with MSB position in the word.
@@ -32,6 +39,7 @@
#include <asm/types.h>
#include <video/fbcon.h>
+#include "fbcon-rotate.h"
#define DEBUG
@@ -41,91 +49,303 @@
#define DPRINTK(fmt, args...)
#endif
+static u32 cfb_tab8[] = {
+#if defined(__BIG_ENDIAN)
+ 0x00000000,0x000000ff,0x0000ff00,0x0000ffff,
+ 0x00ff0000,0x00ff00ff,0x00ffff00,0x00ffffff,
+ 0xff000000,0xff0000ff,0xff00ff00,0xff00ffff,
+ 0xffff0000,0xffff00ff,0xffffff00,0xffffffff
+#elif defined(__LITTLE_ENDIAN)
+ 0x00000000,0xff000000,0x00ff0000,0xffff0000,
+ 0x0000ff00,0xff00ff00,0x00ffff00,0xffffff00,
+ 0x000000ff,0xff0000ff,0x00ff00ff,0xffff00ff,
+ 0x0000ffff,0xff00ffff,0x00ffffff,0xffffffff
+#else
+#error FIXME: No endianness??
+#endif
+};
+
+static u32 cfb_tab16[] = {
+#if defined(__BIG_ENDIAN)
+ 0x00000000, 0x0000ffff, 0xffff0000, 0xffffffff
+#elif defined(__LITTLE_ENDIAN)
+ 0x00000000, 0xffff0000, 0x0000ffff, 0xffffffff
+#else
+#error FIXME: No endianness??
+#endif
+};
+
+static u32 cfb_tab32[] = {
+ 0x00000000, 0xffffffff
+};
+
+#if BITS_PER_LONG == 32
+#define FB_WRITEL fb_writel
+#define FB_READL fb_readl
+#else
+#define FB_WRITEL fb_writeq
+#define FB_READL fb_readq
+#endif
+
+#if defined (__BIG_ENDIAN)
+#define LEFT_POS(bpp) (BITS_PER_LONG - bpp)
+#define NEXT_POS(pos, bpp) ((pos) -= (bpp))
+#define SHIFT_HIGH(val, bits) ((val) >> (bits))
+#define SHIFT_LOW(val, bits) ((val) << (bits))
+#else
+#define LEFT_POS(bpp) (0)
+#define NEXT_POS(pos, bpp) ((pos) += (bpp))
+#define SHIFT_HIGH(val, bits) ((val) << (bits))
+#define SHIFT_LOW(val, bits) ((val) >> (bits))
+#endif
+
+static inline void color_imageblit(struct fb_image *image, struct fb_info *p, u8 *dst1,
+ unsigned long start_index, unsigned long pitch_index)
+{
+ /* Draw the penguin */
+ int i, n;
+ unsigned long bitmask = SHIFT_LOW(~0UL, BITS_PER_LONG - p->var.bits_per_pixel);
+ unsigned long *palette = (unsigned long *) p->pseudo_palette;
+ unsigned long *dst, *dst2, color = 0, val, shift;
+ unsigned long null_bits = BITS_PER_LONG - p->var.bits_per_pixel;
+ u8 *src = image->data;
+
+ dst2 = (unsigned long *) dst1;
+ for (i = image->height; i--; ) {
+ n = image->width;
+ dst = (unsigned long *) dst1;
+ shift = 0;
+ val = 0;
+
+ if (start_index) {
+ unsigned long start_mask = ~(SHIFT_HIGH(~0UL, start_index));
+
+ val = FB_READL(dst) & start_mask;
+ shift = start_index;
+ }
+ while (n--) {
+ if (p->fix.visual == FB_VISUAL_PSEUDOCOLOR)
+ color = *src & bitmask;
+ if (p->fix.visual == FB_VISUAL_TRUECOLOR ||
+ p->fix.visual == FB_VISUAL_DIRECTCOLOR )
+ color = palette[*src] & bitmask;
+ val |= SHIFT_HIGH(color, shift);
+ if (shift >= null_bits) {
+ FB_WRITEL(val, dst++);
+ if (shift == null_bits)
+ val = 0;
+ else
+ val = SHIFT_LOW(color, BITS_PER_LONG - shift);
+ }
+ shift += p->var.bits_per_pixel;
+ shift &= (BITS_PER_LONG - 1);
+ src++;
+ }
+ if (shift) {
+ unsigned long end_mask = SHIFT_HIGH(~0UL, shift);
+
+ FB_WRITEL((FB_READL(dst) & end_mask) | val, dst);
+ }
+ dst1 += p->fix.line_length;
+ if (pitch_index) {
+ dst2 += p->fix.line_length;
+ dst1 = (char *) dst2;
+ (unsigned long) dst1 &= ~(sizeof(unsigned long) - 1);
+
+ start_index += pitch_index;
+ start_index &= BITS_PER_LONG - 1;
+ }
+ }
+}
+
+static inline void slow_imageblit(struct fb_image *image, struct fb_info *p, u8 *dst1,
+ unsigned long fgcolor, unsigned long bgcolor,
+ unsigned long start_index, unsigned long pitch_index)
+{
+ unsigned long i, j, l = 8;
+ unsigned long shift, color, bpp = p->var.bits_per_pixel;
+ unsigned long *dst, *dst2, val, pitch = p->fix.line_length;
+ unsigned long null_bits = BITS_PER_LONG - bpp;
+ u8 *src = image->data;
+
+ dst2 = (unsigned long *) dst1;
+
+ for (i = image->height; i--; ) {
+ shift = 0;
+ val = 0;
+ j = image->width;
+ dst = (unsigned long *) dst1;
+
+ /* write start bits, if any */
+ if (start_index) {
+ unsigned long start_mask = ~(SHIFT_HIGH(~0UL, start_index));
+
+ val = FB_READL(dst) & start_mask;
+ shift = start_index;
+ }
+ while (j--) {
+ l--;
+ if (*src & (1 << l))
+ color = fgcolor;
+ else
+ color = bgcolor;
+ val |= SHIFT_HIGH(color, shift);
+
+ /* Did the bitshift spill bits to the next long? */
+ if (shift >= null_bits) {
+ FB_WRITEL(val, dst++);
+ if (shift == null_bits)
+ val = 0;
+ else
+ val = SHIFT_LOW(color, BITS_PER_LONG - shift);
+ }
+ shift += bpp;
+ shift &= (BITS_PER_LONG - 1);
+ if (!l) { l = 8; src++; };
+ }
+ /* write end bits, if any*/
+ if (shift) {
+ unsigned long end_mask = SHIFT_HIGH(~0UL, shift);
+
+ FB_WRITEL((FB_READL(dst) & end_mask) | val, dst);
+ }
+ dst1 += pitch;
+
+ if (pitch_index) {
+ dst2 += pitch;
+ dst1 = (char *) dst2;
+ (unsigned long) dst1 &= ~(sizeof(unsigned long) - 1);
+
+ start_index += pitch_index;
+ start_index &= BITS_PER_LONG - 1;
+ }
+
+ }
+}
+
+static inline void fast_imageblit(struct fb_image *image, struct fb_info *p, u8 *dst1,
+ unsigned long fgcolor, unsigned long bgcolor)
+{
+ int i, j, k, l = 8, n;
+ unsigned long bit_mask, end_mask, eorx;
+ unsigned long fgx = fgcolor, bgx = bgcolor, pad, bpp = p->var.bits_per_pixel;
+ unsigned long tmp = (1 << bpp) - 1;
+ unsigned long ppw = BITS_PER_LONG/bpp, ppos;
+ unsigned long *dst;
+ u32 *tab = NULL;
+ char *src = image->data;
+
+ switch (ppw) {
+ case 4:
+ tab = cfb_tab8;
+ break;
+ case 2:
+ tab = cfb_tab16;
+ break;
+ case 1:
+ tab = cfb_tab32;
+ break;
+ }
+
+ for (i = ppw-1; i--; ) {
+ fgx <<= bpp;
+ bgx <<= bpp;
+ fgx |= fgcolor;
+ bgx |= bgcolor;
+ }
+
+ n = ((image->width + 7) / 8);
+ pad = (n * 8) - image->width;
+ n = image->width % ppw;
+
+ bit_mask = (1 << ppw) - 1;
+ eorx = fgx ^ bgx;
+
+ k = image->width/ppw;
+
+ for (i = image->height; i--; ) {
+ dst = (unsigned long *) dst1;
+
+ for (j = k; j--; ) {
+ l -= ppw;
+ end_mask = tab[(*src >> l) & bit_mask];
+ FB_WRITEL((end_mask & eorx)^bgx, dst++);
+ if (!l) { l = 8; src++; }
+ }
+ if (n) {
+ end_mask = 0;
+ ppos = LEFT_POS(bpp);
+ for (j = n; j > 0; j--) {
+ l--;
+ if (*src & (1 << l))
+ end_mask |= tmp << ppos;
+ NEXT_POS(ppos, bpp);
+ if (!l) { l = 8; src++; }
+ }
+ FB_WRITEL((end_mask & eorx)^bgx, dst++);
+ }
+ l -= pad;
+ dst1 += p->fix.line_length;
+ }
+}
+
void cfb_imageblit(struct fb_info *p, struct fb_image *image)
{
- int pad, ppw;
- int x2, y2, n, i, j, k, l = 7;
- unsigned long tmp = ~0 << (BITS_PER_LONG - p->var.bits_per_pixel);
- unsigned long fgx, bgx, fgcolor, bgcolor, eorx;
- unsigned long end_mask;
- unsigned long *dst = NULL;
+ int x2, y2, vxres, vyres;
+ unsigned long fgcolor, bgcolor, start_index, bitstart, pitch_index = 0;
+ unsigned long bpl = sizeof(unsigned long), bpp = p->var.bits_per_pixel;
u8 *dst1;
- u8 *src;
+
+ vxres = p->var.xres_virtual;
+ vyres = p->var.yres_virtual;
+#ifdef FBCON_HAS_ROTATE
+ if (p->var.vmode & (FB_VMODE_ROTATE_CW | FB_VMODE_ROTATE_CCW)) {
+ vxres = p->var.yres_virtual;
+ vyres = p->var.xres_virtual;
+ }
+#endif
/*
* We could use hardware clipping but on many cards you get around hardware
* clipping by writing to framebuffer directly like we are doing here.
*/
+ if (image->dx > vxres ||
+ image->dy > vyres)
+ return;
+
x2 = image->dx + image->width;
y2 = image->dy + image->height;
image->dx = image->dx > 0 ? image->dx : 0;
image->dy = image->dy > 0 ? image->dy : 0;
- x2 = x2 < p->var.xres_virtual ? x2 : p->var.xres_virtual;
- y2 = y2 < p->var.yres_virtual ? y2 : p->var.yres_virtual;
+ x2 = x2 < vxres ? x2 : vxres;
+ y2 = y2 < vyres ? y2 : vyres;
image->width = x2 - image->dx;
image->height = y2 - image->dy;
-
- dst1 = p->screen_base + image->dy * p->fix.line_length +
- ((image->dx * p->var.bits_per_pixel) >> 3);
-
- ppw = BITS_PER_LONG/p->var.bits_per_pixel;
- src = image->data;
+ bitstart = (image->dy * p->fix.line_length * 8) + (image->dx * bpp);
+ start_index = bitstart & (BITS_PER_LONG - 1);
+ pitch_index = (p->fix.line_length & (bpl - 1)) * 8;
- if (image->depth == 1) {
+ bitstart /= 8;
+ bitstart &= ~(bpl - 1);
+ dst1 = p->screen_base + bitstart;
- if (p->fix.visual == FB_VISUAL_TRUECOLOR) {
- fgx = fgcolor = ((u32 *)(p->pseudo_palette))[image->fg_color];
- bgx = bgcolor = ((u32 *)(p->pseudo_palette))[image->bg_color];
+ if (image->depth == 1) {
+ if (p->fix.visual == FB_VISUAL_TRUECOLOR ||
+ p->fix.visual == FB_VISUAL_DIRECTCOLOR) {
+ fgcolor = ((u32 *)(p->pseudo_palette))[image->fg_color];
+ bgcolor = ((u32 *)(p->pseudo_palette))[image->bg_color];
} else {
- fgx = fgcolor = image->fg_color;
- bgx = bgcolor = image->bg_color;
+ fgcolor = image->fg_color;
+ bgcolor = image->bg_color;
}
-
- for (i = 0; i < ppw-1; i++) {
- fgx <<= p->var.bits_per_pixel;
- bgx <<= p->var.bits_per_pixel;
- fgx |= fgcolor;
- bgx |= bgcolor;
- }
- eorx = fgx ^ bgx;
- n = ((image->width + 7) >> 3);
- pad = (n << 3) - image->width;
- n = image->width % ppw;
- for (i = 0; i < image->height; i++) {
- dst = (unsigned long *) dst1;
-
- for (j = image->width/ppw; j > 0; j--) {
- end_mask = 0;
-
- for (k = ppw; k > 0; k--) {
- if (test_bit(l, (unsigned long *) src))
- end_mask |= (tmp >> (p->var.bits_per_pixel*(k-1)));
- l--;
- if (l < 0) { l = 7; src++; }
- }
- fb_writel((end_mask & eorx)^bgx, dst);
- dst++;
- }
-
- if (n) {
- end_mask = 0;
- for (j = n; j > 0; j--) {
- if (test_bit(l, (unsigned long *) src))
- end_mask |= (tmp >> (p->var.bits_per_pixel*(j-1)));
- l--;
- if (l < 0) { l = 7; src++; }
- }
- fb_writel((end_mask & eorx)^bgx, dst);
- dst++;
- }
- l -= pad;
- dst1 += p->fix.line_length;
- }
- } else {
- /* Draw the penguin */
- n = ((image->width * p->var.bits_per_pixel) >> 3);
- end_mask = 0;
+ if (BITS_PER_LONG % bpp == 0 && !start_index && !pitch_index &&
+ bpp >= 8 && bpp <= 32 && (image->width & 7) == 0)
+ fast_imageblit(image, p, dst1, fgcolor, bgcolor);
+ else
+ slow_imageblit(image, p, dst1, fgcolor, bgcolor, start_index, pitch_index);
}
+ else if (image->depth == bpp)
+ color_imageblit(image, p, dst1, start_index, pitch_index);
}
-------------------------------------------------------
This sf.net email is sponsored by: OSDN - Tired of that same old
cell phone? Get a new here for FREE!
https://www.inphonic.com/r.asp?r=sourceforge1&refcode1=vs3390
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [Patch 2] Console Rotation
2002-09-09 14:22 [Patch 2] Console Rotation Antonino Daplas
@ 2002-09-09 14:35 ` Geert Uytterhoeven
2002-09-09 15:06 ` Antonino Daplas
0 siblings, 1 reply; 6+ messages in thread
From: Geert Uytterhoeven @ 2002-09-09 14:35 UTC (permalink / raw)
To: Antonino Daplas; +Cc: fbdev
On 9 Sep 2002, Antonino Daplas wrote:
> cfbcopyarea.c is just a 'copy 'n paste' of Geert's version I leached
> from fbutils (I hope this is okay with you, Geert :) The only changes
> are using FB_WRITEL and FB_READL where appropriate.
Of course (actually that was my intention ;-)
Did you measure any noticeable differences (performance-wise) with the old
routines?
BTW, expect a fillrect() for arbitrary bitdepths soon...
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
-------------------------------------------------------
This sf.net email is sponsored by: OSDN - Tired of that same old
cell phone? Get a new here for FREE!
https://www.inphonic.com/r.asp?r=sourceforge1&refcode1=vs3390
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [Patch 2] Console Rotation
2002-09-09 14:35 ` Geert Uytterhoeven
@ 2002-09-09 15:06 ` Antonino Daplas
2002-10-06 20:41 ` Geert Uytterhoeven
0 siblings, 1 reply; 6+ messages in thread
From: Antonino Daplas @ 2002-09-09 15:06 UTC (permalink / raw)
To: Geert Uytterhoeven; +Cc: fbdev
On Mon, 2002-09-09 at 22:35, Geert Uytterhoeven wrote:
> On 9 Sep 2002, Antonino Daplas wrote:
> > cfbcopyarea.c is just a 'copy 'n paste' of Geert's version I leached
> > from fbutils (I hope this is okay with you, Geert :) The only changes
> > are using FB_WRITEL and FB_READL where appropriate.
>
> Of course (actually that was my intention ;-)
Thanks.
>
> Did you measure any noticeable differences (performance-wise) with the old
> routines?
>
Yes, but not by much, something like less than a second. But I haven't
really done a test where the primary operation is copyarea... Will this
be affected by the the setting of display->scrollmode? I use
SCROLL_YREDRAW.
> BTW, expect a fillrect() for arbitrary bitdepths soon...
This is good :) The one I included should do that too, but it is
terribly inefficient.
Tony
-------------------------------------------------------
This sf.net email is sponsored by: OSDN - Tired of that same old
cell phone? Get a new here for FREE!
https://www.inphonic.com/r.asp?r=sourceforge1&refcode1=vs3390
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [Patch 2] Console Rotation
2002-09-09 15:06 ` Antonino Daplas
@ 2002-10-06 20:41 ` Geert Uytterhoeven
2002-10-07 19:44 ` Antonino Daplas
0 siblings, 1 reply; 6+ messages in thread
From: Geert Uytterhoeven @ 2002-10-06 20:41 UTC (permalink / raw)
To: Antonino Daplas; +Cc: fbdev
On 9 Sep 2002, Antonino Daplas wrote:
> On Mon, 2002-09-09 at 22:35, Geert Uytterhoeven wrote:
> > On 9 Sep 2002, Antonino Daplas wrote:
> > > cfbcopyarea.c is just a 'copy 'n paste' of Geert's version I leached
> > > from fbutils (I hope this is okay with you, Geert :) The only changes
> > > are using FB_WRITEL and FB_READL where appropriate.
> >
> > Of course (actually that was my intention ;-)
> Thanks.
>
> >
> > Did you measure any noticeable differences (performance-wise) with the old
> > routines?
> >
> Yes, but not by much, something like less than a second. But I haven't
> really done a test where the primary operation is copyarea... Will this
> be affected by the the setting of display->scrollmode? I use
> SCROLL_YREDRAW.
>
> > BTW, expect a fillrect() for arbitrary bitdepths soon...
> This is good :) The one I included should do that too, but it is
> terribly inefficient.
The fillrect() for arbitrary bitdepths is in fbtest now. I also added fast
support for planar screens. Well, now I can start porting amifb to the accel
framework.
So far I tested fbtest on
- PPC (atyfb on 3D RAGE II+, cfb)
- m68k (amifb on Amiga AGA, planar)
- Alpha (tgafb on DEC UDB with 8-bit TGA, cfb)
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
-------------------------------------------------------
This sf.net email is sponsored by:ThinkGeek
Welcome to geek heaven.
http://thinkgeek.com/sf
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [Patch 2] Console Rotation
2002-10-06 20:41 ` Geert Uytterhoeven
@ 2002-10-07 19:44 ` Antonino Daplas
2002-10-07 20:06 ` Geert Uytterhoeven
0 siblings, 1 reply; 6+ messages in thread
From: Antonino Daplas @ 2002-10-07 19:44 UTC (permalink / raw)
To: Geert Uytterhoeven; +Cc: fbdev
[-- Attachment #1: Type: text/plain, Size: 594 bytes --]
On Mon, 2002-10-07 at 04:41, Geert Uytterhoeven wrote:
>
> The fillrect() for arbitrary bitdepths is in fbtest now. I also added fast
> support for planar screens. Well, now I can start porting amifb to the accel
> framework.
>
I modified your fillrect for cfbfillrect.c. Just added support for
ROP_XOR.
Here's also some rudimentary benchmarks:
fill/copy a 256x256 rectangle 1000 times (8bpp):
old new
copyarea 4.930s 5.151s
fillrect(ROP_COPY) 0.136s 0.256s
fillrect(ROP_XOR) 4.059s 3.903s
Tony
[-- Attachment #2: cfbfillrect.c --]
[-- Type: text/x-c, Size: 10307 bytes --]
/*
* Generic fillrect for frame buffers with packed pixels of any depth.
*
* Copyright (C) 2000 James Simmons (jsimmons@linux-fbdev.org)
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file COPYING in the main directory of this archive for
* more details.
*
* NOTES:
* ++Tony: Based on fbtest (cfb.c bitstream.c) by Geert Uytterhoeven.
*
*/
#include <linux/string.h>
#include <linux/fb.h>
#include <asm/types.h>
#include <video/fbcon.h>
#if BITS_PER_LONG == 32
#define FB_WRITEL fb_writel
#define FB_READL fb_readl
#define BYTES_PER_LONG 4
#define SHIFT_PER_LONG 5
#else
#define FB_WRITEL fb_writeq
#define FB_READL fb_readq
#define BYTES_PER_LONG 8
#define SHIFT_PER_LONG 6
#endif
#define EXP1(x) 0xffffffffU*x
#define EXP2(x) 0x55555555U*x
#define EXP4(x) 0x11111111U*0x ## x
typedef u32 pixel_t;
static const u32 bpp1tab[2] = {
EXP1(0), EXP1(1)
};
static const u32 bpp2tab[4] = {
EXP2(0), EXP2(1), EXP2(2), EXP2(3)
};
static const u32 bpp4tab[16] = {
EXP4(0), EXP4(1), EXP4(2), EXP4(3), EXP4(4), EXP4(5), EXP4(6), EXP4(7),
EXP4(8), EXP4(9), EXP4(a), EXP4(b), EXP4(c), EXP4(d), EXP4(e), EXP4(f)
};
/*
* Compose two values, using a bitmask as decision value
* This is equivalent to (a & mask) | (b & ~mask)
*/
static inline unsigned long comp(unsigned long a, unsigned long b,
unsigned long mask)
{
return ((a ^ b) & mask) ^ b;
}
static inline u32 pixel_to_pat32(const struct fb_info *p, pixel_t pixel)
{
u32 pat = pixel;
switch (p->var.bits_per_pixel) {
case 1:
pat = bpp1tab[pat];
break;
case 2:
pat = bpp2tab[pat];
break;
case 4:
pat = bpp4tab[pat];
break;
case 8:
pat |= pat << 8;
// Fall through
case 16:
pat |= pat << 16;
// Fall through
case 32:
break;
}
return pat;
}
/*
* Expand a pixel value to a generic 32/64-bit pattern and rotate it to
* the correct start position
*/
static inline unsigned long pixel_to_pat(const struct fb_info *p, pixel_t pixel, int left)
{
unsigned long pat = pixel;
u32 bpp = p->var.bits_per_pixel;
int i;
/* expand pixel value */
for (i = bpp; i < BITS_PER_LONG; i *= 2)
pat |= pat << i;
/* rotate pattern to correct start position */
pat = pat << left | pat >> (bpp-left);
return pat;
}
/*
* Unaligned 32-bit pattern fill using 32/64-bit memory accesses
*/
void bitfill32(unsigned long *dst, int dst_idx, u32 pat, u32 n)
{
unsigned long val = pat;
unsigned long first, last;
if (!n)
return;
#if BITS_PER_LONG == 64
val |= val << 32;
#endif
first = ~0UL >> dst_idx;
last = ~(~0UL >> ((dst_idx+n) % BITS_PER_LONG));
if (dst_idx+n <= BITS_PER_LONG) {
// Single word
if (last)
first &= last;
FB_WRITEL(comp(val, FB_READL(dst), first), dst);
} else {
// Multiple destination words
// Leading bits
if (first) {
FB_WRITEL(comp(val, FB_READL(dst), first), dst);
dst++;
n -= BITS_PER_LONG-dst_idx;
}
// Main chunk
n /= BITS_PER_LONG;
while (n >= 8) {
FB_WRITEL(val, dst++);
FB_WRITEL(val, dst++);
FB_WRITEL(val, dst++);
FB_WRITEL(val, dst++);
FB_WRITEL(val, dst++);
FB_WRITEL(val, dst++);
FB_WRITEL(val, dst++);
FB_WRITEL(val, dst++);
n -= 8;
}
while (n--)
FB_WRITEL(val, dst++);
// Trailing bits
if (last)
FB_WRITEL(comp(val, FB_READL(dst), first), dst);
}
}
/*
* Unaligned generic pattern fill using 32/64-bit memory accesses
* The pattern must have been expanded to a full 32/64-bit value
* Left/right are the appropriate shifts to convert to the pattern to be
* used for the next 32/64-bit word
*/
void bitfill(unsigned long *dst, int dst_idx, unsigned long pat, int left,
int right, u32 n)
{
unsigned long first, last;
if (!n)
return;
first = ~0UL >> dst_idx;
last = ~(~0UL >> ((dst_idx+n) % BITS_PER_LONG));
if (dst_idx+n <= BITS_PER_LONG) {
// Single word
if (last)
first &= last;
FB_WRITEL(comp(pat, FB_READL(dst), first), dst);
} else {
// Multiple destination words
// Leading bits
if (first) {
FB_WRITEL(comp(pat, FB_READL(dst), first), dst);
dst++;
pat = pat << left | pat >> right;
n -= BITS_PER_LONG-dst_idx;
}
// Main chunk
n /= BITS_PER_LONG;
while (n >= 4) {
FB_WRITEL(pat, dst++);
pat = pat << left | pat >> right;
FB_WRITEL(pat, dst++);
pat = pat << left | pat >> right;
FB_WRITEL(pat, dst++);
pat = pat << left | pat >> right;
FB_WRITEL(pat, dst++);
pat = pat << left | pat >> right;
n -= 4;
}
while (n--) {
FB_WRITEL(pat, dst++);
pat = pat << left | pat >> right;
}
// Trailing bits
if (last)
FB_WRITEL(comp(pat, FB_READL(dst), first), dst);
}
}
void bitfill32_xor(unsigned long *dst, int dst_idx, u32 pat, u32 n)
{
unsigned long val = pat, dat;
unsigned long first, last;
if (!n)
return;
#if BITS_PER_LONG == 64
val |= val << 32;
#endif
first = ~0UL >> dst_idx;
last = ~(~0UL >> ((dst_idx+n) % BITS_PER_LONG));
if (dst_idx+n <= BITS_PER_LONG) {
// Single word
if (last)
first &= last;
dat = FB_READL(dst);
FB_WRITEL(comp(dat ^ val, dat, first), dst);
} else {
// Multiple destination words
// Leading bits
if (first) {
dat = FB_READL(dst);
FB_WRITEL(comp(dat ^ val, dat, first), dst);
dst++;
n -= BITS_PER_LONG-dst_idx;
}
// Main chunk
n /= BITS_PER_LONG;
while (n >= 8) {
FB_WRITEL(FB_READL(dst) ^ val, dst);
dst++;
FB_WRITEL(FB_READL(dst) ^ val, dst);
dst++;
FB_WRITEL(FB_READL(dst) ^ val, dst);
dst++;
FB_WRITEL(FB_READL(dst) ^ val, dst);
dst++;
FB_WRITEL(FB_READL(dst) ^ val, dst);
dst++;
FB_WRITEL(FB_READL(dst) ^ val, dst);
dst++;
FB_WRITEL(FB_READL(dst) ^ val, dst);
dst++;
FB_WRITEL(FB_READL(dst) ^ val, dst);
dst++;
n -= 8;
}
while (n--) {
FB_WRITEL(FB_READL(dst) ^ val, dst);
dst++;
}
// Trailing bits
if (last) {
dat = FB_READL(dst);
FB_WRITEL(comp(dat ^ val, dat, first), dst);
}
}
}
/*
* Unaligned generic pattern fill using 32/64-bit memory accesses
* The pattern must have been expanded to a full 32/64-bit value
* Left/right are the appropriate shifts to convert to the pattern to be
* used for the next 32/64-bit word
*/
void bitfill_xor(unsigned long *dst, int dst_idx, unsigned long pat, int left,
int right, u32 n)
{
unsigned long first, last, dat;
if (!n)
return;
first = ~0UL >> dst_idx;
last = ~(~0UL >> ((dst_idx+n) % BITS_PER_LONG));
if (dst_idx+n <= BITS_PER_LONG) {
// Single word
if (last)
first &= last;
dat = FB_READL(dst);
FB_WRITEL(comp(dat ^ pat, dat, first), dst);
} else {
// Multiple destination words
// Leading bits
if (first) {
dat = FB_READL(dst);
FB_WRITEL(comp(dat ^ pat, dat, first), dst);
dst++;
pat = pat << left | pat >> right;
n -= BITS_PER_LONG-dst_idx;
}
// Main chunk
n /= BITS_PER_LONG;
while (n >= 4) {
FB_WRITEL(FB_READL(dst) ^ pat, dst);
dst++;
pat = pat << left | pat >> right;
FB_WRITEL(FB_READL(dst) ^ pat, dst);
dst++;
pat = pat << left | pat >> right;
FB_WRITEL(FB_READL(dst) ^ pat, dst);
dst++;
pat = pat << left | pat >> right;
FB_WRITEL(FB_READL(dst) ^ pat, dst);
dst++;
pat = pat << left | pat >> right;
n -= 4;
}
while (n--) {
FB_WRITEL(FB_READL(dst) ^ pat, dst);
dst++;
pat = pat << left | pat >> right;
}
// Trailing bits
if (last) {
dat = FB_READL(dst);
FB_WRITEL(comp(dat ^ pat, dat, first), dst);
}
}
}
void cfb_fillrect(struct fb_info *p, struct fb_fillrect *rect)
{
unsigned long height, fg;
unsigned long x2, y2, vxres, vyres;
unsigned long *dst;
int dst_idx, left;
u32 bpp = p->var.bits_per_pixel;
vxres = p->var.xres_virtual;
vyres = p->var.yres_virtual;
if (!rect->width || !rect->height ||
rect->dx > vxres ||
rect->dy > vyres)
return;
/* We could use hardware clipping but on many cards you get around
* hardware clipping by writing to framebuffer directly. */
x2 = rect->dx + rect->width;
y2 = rect->dy + rect->height;
x2 = x2 < vxres ? x2 : vxres;
y2 = y2 < vyres ? y2 : vyres;
rect->width = x2 - rect->dx;
height = y2 - rect->dy;
if (p->fix.visual == FB_VISUAL_TRUECOLOR ||
p->fix.visual == FB_VISUAL_DIRECTCOLOR )
fg = ((u32 *) (p->pseudo_palette))[rect->color];
else
fg = rect->color;
dst = (unsigned long *)((unsigned long)p->screen_base & ~(BYTES_PER_LONG-1));
dst_idx = ((unsigned long)p->screen_base & (BYTES_PER_LONG-1))*8;
dst_idx += rect->dy*p->fix.line_length*8+rect->dx*bpp;
/* FIXME For now we support 1-32 bpp only */
left = BITS_PER_LONG % bpp;
if (!left) {
u32 pat = pixel_to_pat32(p, fg);
void (*fill_op32)(unsigned long *dst, int dst_idx, u32 pat, u32 n) = NULL;
switch (rect->rop) {
case ROP_XOR:
fill_op32 = bitfill32_xor;
break;
case ROP_COPY:
default:
fill_op32 = bitfill32;
break;
}
while (height--) {
dst += dst_idx >> SHIFT_PER_LONG;
dst_idx &= (BITS_PER_LONG-1);
fill_op32(dst, dst_idx, pat, rect->width*bpp);
dst_idx += p->fix.line_length*8;
}
} else {
unsigned long pat = pixel_to_pat(p, fg, (left-dst_idx) % bpp);
int right = bpp-left;
int r;
void (*fill_op)(unsigned long *dst, int dst_idx, unsigned long pat,
int left, int right, u32 n) = NULL;
switch (rect->rop) {
case ROP_XOR:
fill_op = bitfill_xor;
break;
case ROP_COPY:
default:
fill_op = bitfill;
break;
}
while (height--) {
dst += dst_idx >> SHIFT_PER_LONG;
dst_idx &= (BITS_PER_LONG-1);
fill_op(dst, dst_idx, pat, left, right, rect->width*bpp);
r = (p->fix.line_length*8) % bpp;
pat = pat << (bpp-r) | pat >> r;
dst_idx += p->fix.line_length*8;
}
}
}
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [Patch 2] Console Rotation
2002-10-07 19:44 ` Antonino Daplas
@ 2002-10-07 20:06 ` Geert Uytterhoeven
0 siblings, 0 replies; 6+ messages in thread
From: Geert Uytterhoeven @ 2002-10-07 20:06 UTC (permalink / raw)
To: Antonino Daplas; +Cc: fbdev
On 8 Oct 2002, Antonino Daplas wrote:
> On Mon, 2002-10-07 at 04:41, Geert Uytterhoeven wrote:
> > The fillrect() for arbitrary bitdepths is in fbtest now. I also added fast
> > support for planar screens. Well, now I can start porting amifb to the accel
> > framework.
> >
> I modified your fillrect for cfbfillrect.c. Just added support for
> ROP_XOR.
>
> Here's also some rudimentary benchmarks:
>
> fill/copy a 256x256 rectangle 1000 times (8bpp):
>
> old new
> copyarea 4.930s 5.151s
> fillrect(ROP_COPY) 0.136s 0.256s
> fillrect(ROP_XOR) 4.059s 3.903s
Yes, there is still room for optimization...
If next_line is a multiple of BYTES_PER_LONG, there's no need to recalculate
the first and last masks.
Even if next_line is not a multiple of BYTES_PER_LONG, the first and last masks
are periodic, and can be precalculated and put in a table (4 entries on 32-bit
and 8 entries on 64-bit).
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
-------------------------------------------------------
This sf.net email is sponsored by:ThinkGeek
Welcome to geek heaven.
http://thinkgeek.com/sf
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2002-10-07 20:06 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-09-09 14:22 [Patch 2] Console Rotation Antonino Daplas
2002-09-09 14:35 ` Geert Uytterhoeven
2002-09-09 15:06 ` Antonino Daplas
2002-10-06 20:41 ` Geert Uytterhoeven
2002-10-07 19:44 ` Antonino Daplas
2002-10-07 20:06 ` Geert Uytterhoeven
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).