qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 1/1] Sun4m : TCX framebuffer hardware acceleration
@ 2014-02-16 23:15 Olivier Danet
  2014-05-25  9:50 ` Mark Cave-Ayland
  0 siblings, 1 reply; 6+ messages in thread
From: Olivier Danet @ 2014-02-16 23:15 UTC (permalink / raw)
  To: qemu-devel, Blue Swirl, Mark Cave-Ayland, Artyom Tarasenko

The S24/TCX framebuffer is a mildly accelerated video card, with
blitter, stippler and hardware cursor.
* Solaris and NetBSD 6.x use all the hardware acceleration features.
* The Xorg driver (used by Linux) can use the hardware cursor only.

This patch implements hardware acceleration in both 8bits and 24bits
modes. It is based on the NetBSD driver sources and from tests with Solaris.

Signed-off-by: Olivier Danet <odanet@caramail.com>
---
  hw/display/tcx.c | 679 
+++++++++++++++++++++++++++++++++++++++++++++----------
  hw/sparc/sun4m.c |  46 ++--
  2 files changed, 589 insertions(+), 136 deletions(-)

diff --git a/hw/display/tcx.c b/hw/display/tcx.c
index 873b82c..bcd64e5 100644
--- a/hw/display/tcx.c
+++ b/hw/display/tcx.c
@@ -33,17 +33,26 @@

  #define MAXX 1024
  #define MAXY 768
-#define TCX_DAC_NREGS 16
-#define TCX_THC_NREGS_8  0x081c
-#define TCX_THC_NREGS_24 0x1000
+#define TCX_DAC_NREGS    16
+#define TCX_THC_NREGS    0x1000
+#define TCX_DHC_NREGS    0x4000
  #define TCX_TEC_NREGS    0x1000
+#define TCX_ALT_NREGS    0x8000
+#define TCX_STIP_NREGS   0x800000
+#define TCX_BLIT_NREGS   0x800000
+#define TCX_RSTIP_NREGS  0x800000
+#define TCX_RBLIT_NREGS  0x800000
+
+#define TCX_THC_MISC     0x818
+#define TCX_THC_CURSXY   0x8fc
+#define TCX_THC_CURSMASK 0x900
+#define TCX_THC_CURSBITS 0x980

  #define TYPE_TCX "SUNW,tcx"
  #define TCX(obj) OBJECT_CHECK(TCXState, (obj), TYPE_TCX)

  typedef struct TCXState {
      SysBusDevice parent_obj;
-
      QemuConsole *con;
      uint8_t *vram;
      uint32_t *vram24, *cplane;
@@ -52,17 +61,30 @@ typedef struct TCXState {
      MemoryRegion vram_mem;
      MemoryRegion vram_8bit;
      MemoryRegion vram_24bit;
+    MemoryRegion stip;
+    MemoryRegion blit;
      MemoryRegion vram_cplane;
-    MemoryRegion dac;
+    MemoryRegion rstip;
+    MemoryRegion rblit;
      MemoryRegion tec;
+    MemoryRegion dac;
+    MemoryRegion thc;
+    MemoryRegion dhc;
+    MemoryRegion alt;
      MemoryRegion thc24;
-    MemoryRegion thc8;
+
      ram_addr_t vram24_offset, cplane_offset;
+    uint32_t tmpblit;
      uint32_t vram_size;
-    uint32_t palette[256];
-    uint8_t r[256], g[256], b[256];
+    uint32_t palette[260];
+    uint8_t r[260], g[260], b[260];
      uint16_t width, height, depth;
      uint8_t dac_index, dac_state;
+    uint32_t thcmisc;
+    uint32_t cursmask[32];
+    uint32_t cursbits[32];
+    uint16_t cursx;
+    uint16_t cursy;
  } TCXState;

  static void tcx_set_dirty(TCXState *s)
@@ -70,10 +92,36 @@ static void tcx_set_dirty(TCXState *s)
      memory_region_set_dirty(&s->vram_mem, 0, MAXX * MAXY);
  }

-static void tcx24_set_dirty(TCXState *s)
+static inline int tcx24_check_dirty(TCXState *s, ram_addr_t page,
+                                    ram_addr_t page24, ram_addr_t cpage)
  {
-    memory_region_set_dirty(&s->vram_mem, s->vram24_offset, MAXX * MAXY 
* 4);
-    memory_region_set_dirty(&s->vram_mem, s->cplane_offset, MAXX * MAXY 
* 4);
+    int ret;
+
+    ret = memory_region_get_dirty(&s->vram_mem, page, TARGET_PAGE_SIZE,
+                                  DIRTY_MEMORY_VGA);
+    ret |= memory_region_get_dirty(&s->vram_mem, page24, 
TARGET_PAGE_SIZE * 4,
+                                   DIRTY_MEMORY_VGA);
+    ret |= memory_region_get_dirty(&s->vram_mem, cpage, 
TARGET_PAGE_SIZE * 4,
+                                   DIRTY_MEMORY_VGA);
+    return ret;
+}
+
+static inline void tcx24_reset_dirty(TCXState *ts, ram_addr_t page_min,
+                               ram_addr_t page_max, ram_addr_t page24,
+                              ram_addr_t cpage)
+{
+    memory_region_reset_dirty(&ts->vram_mem,
+                              page_min,
+                              (page_max - page_min) + TARGET_PAGE_SIZE,
+                              DIRTY_MEMORY_VGA);
+    memory_region_reset_dirty(&ts->vram_mem,
+                              page24 + page_min * 4,
+                              (page_max - page_min) * 4 + TARGET_PAGE_SIZE,
+                              DIRTY_MEMORY_VGA);
+    memory_region_reset_dirty(&ts->vram_mem,
+                              cpage + page_min * 4,
+                              (page_max - page_min) * 4 + TARGET_PAGE_SIZE,
+                              DIRTY_MEMORY_VGA);
  }

  static void update_palette_entries(TCXState *s, int start, int end)
@@ -102,11 +150,7 @@ static void update_palette_entries(TCXState *s, int 
start, int end)
              break;
          }
      }
-    if (s->depth == 24) {
-        tcx24_set_dirty(s);
-    } else {
-        tcx_set_dirty(s);
-    }
+    tcx_set_dirty(s);
  }

  static void tcx_draw_line32(TCXState *s1, uint8_t *d,
@@ -116,7 +160,7 @@ static void tcx_draw_line32(TCXState *s1, uint8_t *d,
      uint8_t val;
      uint32_t *p = (uint32_t *)d;

-    for(x = 0; x < width; x++) {
+    for (x = 0; x < width; x++) {
          val = *s++;
          *p++ = s1->palette[val];
      }
@@ -129,7 +173,7 @@ static void tcx_draw_line16(TCXState *s1, uint8_t *d,
      uint8_t val;
      uint16_t *p = (uint16_t *)d;

-    for(x = 0; x < width; x++) {
+    for (x = 0; x < width; x++) {
          val = *s++;
          *p++ = s1->palette[val];
      }
@@ -147,6 +191,83 @@ static void tcx_draw_line8(TCXState *s1, uint8_t *d,
      }
  }

+static void tcx_draw_cursor32(TCXState *s1, uint8_t *d,
+                              int y, int width)
+{
+    int x, len;
+    uint32_t mask, bits;
+    uint32_t *p = (uint32_t *)d;
+
+    y = y - s1->cursy;
+    mask = s1->cursmask[y];
+    bits = s1->cursbits[y];
+    len = MIN(width - s1->cursx, 32);
+    p = &p[s1->cursx];
+    for (x = 0; x < len; x++) {
+        if (mask & 0x80000000) {
+            if (bits & 0x80000000) {
+                *p = s1->palette[259];
+            } else {
+                *p = s1->palette[258];
+            }
+        }
+        p++;
+        mask <<= 1;
+        bits <<= 1;
+    }
+}
+
+static void tcx_draw_cursor16(TCXState *s1, uint8_t *d,
+                              int y, int width)
+{
+    int x, len;
+    uint32_t mask, bits;
+    uint16_t *p = (uint16_t *)d;
+
+    y = y - s1->cursy;
+    mask = s1->cursmask[y];
+    bits = s1->cursbits[y];
+    len = MIN(width - s1->cursx, 32);
+    p = &p[s1->cursx];
+    for (x = 0; x < len; x++) {
+        if (mask & 0x80000000) {
+            if (bits & 0x80000000) {
+                *p = s1->palette[259];
+            } else {
+                *p = s1->palette[258];
+            }
+        }
+        p++;
+        mask <<= 1;
+        bits <<= 1;
+    }
+}
+
+static void tcx_draw_cursor8(TCXState *s1, uint8_t *d,
+                              int y, int width)
+{
+    int x, len;
+    uint32_t mask, bits;
+
+    y = y - s1->cursy;
+    mask = s1->cursmask[y];
+    bits = s1->cursbits[y];
+    len = MIN(width - s1->cursx, 32);
+    d = &d[s1->cursx];
+    for (x = 0; x < len; x++) {
+        if (mask & 0x80000000) {
+            if (bits & 0x80000000) {
+                *d = s1->palette[259];
+            } else {
+                *d = s1->palette[258];
+            }
+        }
+        d++;
+        mask <<= 1;
+        bits <<= 1;
+    }
+}
+
  /*
    XXX Could be much more optimal:
    * detect if line/page/whole screen is in 24 bit mode
@@ -162,11 +283,10 @@ static inline void tcx24_draw_line32(TCXState *s1, 
uint8_t *d,
      uint8_t val, *p8;
      uint32_t *p = (uint32_t *)d;
      uint32_t dval;
-
      bgr = is_surface_bgr(surface);
      for(x = 0; x < width; x++, s++, s24++) {
-        if ((be32_to_cpu(*cplane++) & 0xff000000) == 0x03000000) {
-            // 24-bit direct, BGR order
+        if (be32_to_cpu(*cplane) & 0x03000000) {
+            /* 24-bit direct, BGR order */
              p8 = (uint8_t *)s24;
              p8++;
              b = *p8++;
@@ -177,47 +297,18 @@ static inline void tcx24_draw_line32(TCXState *s1, 
uint8_t *d,
              else
                  dval = rgb_to_pixel32(r, g, b);
          } else {
+            /* 8 bits pseudocolor */
              val = *s;
              dval = s1->palette[val];
          }
          *p++ = dval;
+        cplane++;
      }
  }

-static inline int check_dirty(TCXState *s, ram_addr_t page, ram_addr_t 
page24,
-                              ram_addr_t cpage)
-{
-    int ret;
-
-    ret = memory_region_get_dirty(&s->vram_mem, page, TARGET_PAGE_SIZE,
-                                  DIRTY_MEMORY_VGA);
-    ret |= memory_region_get_dirty(&s->vram_mem, page24, 
TARGET_PAGE_SIZE * 4,
-                                   DIRTY_MEMORY_VGA);
-    ret |= memory_region_get_dirty(&s->vram_mem, cpage, 
TARGET_PAGE_SIZE * 4,
-                                   DIRTY_MEMORY_VGA);
-    return ret;
-}
-
-static inline void reset_dirty(TCXState *ts, ram_addr_t page_min,
-                               ram_addr_t page_max, ram_addr_t page24,
-                              ram_addr_t cpage)
-{
-    memory_region_reset_dirty(&ts->vram_mem,
-                              page_min,
-                              (page_max - page_min) + TARGET_PAGE_SIZE,
-                              DIRTY_MEMORY_VGA);
-    memory_region_reset_dirty(&ts->vram_mem,
-                              page24 + page_min * 4,
-                              (page_max - page_min) * 4 + TARGET_PAGE_SIZE,
-                              DIRTY_MEMORY_VGA);
-    memory_region_reset_dirty(&ts->vram_mem,
-                              cpage + page_min * 4,
-                              (page_max - page_min) * 4 + TARGET_PAGE_SIZE,
-                              DIRTY_MEMORY_VGA);
-}
-
  /* Fixed line length 1024 allows us to do nice tricks not possible on
     VGA... */
+
  static void tcx_update_display(void *opaque)
  {
      TCXState *ts = opaque;
@@ -226,6 +317,7 @@ static void tcx_update_display(void *opaque)
      int y, y_start, dd, ds;
      uint8_t *d, *s;
      void (*f)(TCXState *s1, uint8_t *dst, const uint8_t *src, int width);
+    void (*fc)(TCXState *s1, uint8_t *dst, int y, int width);

      if (surface_bits_per_pixel(surface) == 0) {
          return;
@@ -243,20 +335,23 @@ static void tcx_update_display(void *opaque)
      switch (surface_bits_per_pixel(surface)) {
      case 32:
          f = tcx_draw_line32;
+        fc = tcx_draw_cursor32;
          break;
      case 15:
      case 16:
          f = tcx_draw_line16;
+        fc = tcx_draw_cursor16;
          break;
      default:
      case 8:
          f = tcx_draw_line8;
+        fc = tcx_draw_cursor8;
          break;
      case 0:
          return;
      }

-    for(y = 0; y < ts->height; y += 4, page += TARGET_PAGE_SIZE) {
+    for (y = 0; y < ts->height; page += TARGET_PAGE_SIZE) {
          if (memory_region_get_dirty(&ts->vram_mem, page, TARGET_PAGE_SIZE,
                                      DIRTY_MEMORY_VGA)) {
              if (y_start < 0)
@@ -265,18 +360,35 @@ static void tcx_update_display(void *opaque)
                  page_min = page;
              if (page > page_max)
                  page_max = page;
+
              f(ts, d, s, ts->width);
+            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < 
ts->width) {
+                fc(ts, d, y, ts->width);
+            }
              d += dd;
              s += ds;
+            y++;
              f(ts, d, s, ts->width);
+            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < 
ts->width) {
+                fc(ts, d, y, ts->width);
+            }
              d += dd;
              s += ds;
+            y++;
              f(ts, d, s, ts->width);
+            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < 
ts->width) {
+                fc(ts, d, y, ts->width);
+            }
              d += dd;
              s += ds;
+            y++;
              f(ts, d, s, ts->width);
+            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < 
ts->width) {
+                fc(ts, d, y, ts->width);
+            }
              d += dd;
              s += ds;
+            y++;
          } else {
              if (y_start >= 0) {
                  /* flush to display */
@@ -286,6 +398,7 @@ static void tcx_update_display(void *opaque)
              }
              d += dd * 4;
              s += ds * 4;
+            y += 4;
          }
      }
      if (y_start >= 0) {
@@ -328,9 +441,9 @@ static void tcx24_update_display(void *opaque)
      dd = surface_stride(surface);
      ds = 1024;

-    for(y = 0; y < ts->height; y += 4, page += TARGET_PAGE_SIZE,
+    for (y = 0; y < ts->height; page += TARGET_PAGE_SIZE,
              page24 += TARGET_PAGE_SIZE, cpage += TARGET_PAGE_SIZE) {
-        if (check_dirty(ts, page, page24, cpage)) {
+        if (tcx24_check_dirty(ts, page, page24, cpage)) {
              if (y_start < 0)
                  y_start = y;
              if (page < page_min)
@@ -338,25 +451,41 @@ static void tcx24_update_display(void *opaque)
              if (page > page_max)
                  page_max = page;
              tcx24_draw_line32(ts, d, s, ts->width, cptr, s24);
+            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < 
ts->width) {
+                tcx_draw_cursor32(ts, d, y, ts->width);
+            }
              d += dd;
              s += ds;
              cptr += ds;
              s24 += ds;
+            y++;
              tcx24_draw_line32(ts, d, s, ts->width, cptr, s24);
+            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < 
ts->width) {
+                tcx_draw_cursor32(ts, d, y, ts->width);
+            }
              d += dd;
              s += ds;
              cptr += ds;
              s24 += ds;
+            y++;
              tcx24_draw_line32(ts, d, s, ts->width, cptr, s24);
+            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < 
ts->width) {
+                tcx_draw_cursor32(ts, d, y, ts->width);
+            }
              d += dd;
              s += ds;
              cptr += ds;
              s24 += ds;
+            y++;
              tcx24_draw_line32(ts, d, s, ts->width, cptr, s24);
+            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < 
ts->width) {
+                tcx_draw_cursor32(ts, d, y, ts->width);
+            }
              d += dd;
              s += ds;
              cptr += ds;
              s24 += ds;
+            y++;
          } else {
              if (y_start >= 0) {
                  /* flush to display */
@@ -368,6 +497,7 @@ static void tcx24_update_display(void *opaque)
              s += ds * 4;
              cptr += ds * 4;
              s24 += ds * 4;
+            y += 4;
          }
      }
      if (y_start >= 0) {
@@ -377,7 +507,7 @@ static void tcx24_update_display(void *opaque)
      }
      /* reset modified pages */
      if (page_max >= page_min) {
-        reset_dirty(ts, page_min, page_max, page24, cpage);
+        tcx24_reset_dirty(ts, page_min, page_max, page24, cpage);
      }
  }

@@ -394,7 +524,6 @@ static void tcx24_invalidate_display(void *opaque)
      TCXState *s = opaque;

      tcx_set_dirty(s);
-    tcx24_set_dirty(s);
      qemu_console_resize(s->con, s->width, s->height);
  }

@@ -403,12 +532,7 @@ static int vmstate_tcx_post_load(void *opaque, int 
version_id)
      TCXState *s = opaque;

      update_palette_entries(s, 0, 256);
-    if (s->depth == 24) {
-        tcx24_set_dirty(s);
-    } else {
-        tcx_set_dirty(s);
-    }
-
+    tcx_set_dirty(s);
      return 0;
  }

@@ -436,56 +560,87 @@ static void tcx_reset(DeviceState *d)
      TCXState *s = TCX(d);

      /* Initialize palette */
-    memset(s->r, 0, 256);
-    memset(s->g, 0, 256);
-    memset(s->b, 0, 256);
+    memset(s->r, 0, 260);
+    memset(s->g, 0, 260);
+    memset(s->b, 0, 260);
      s->r[255] = s->g[255] = s->b[255] = 255;
-    update_palette_entries(s, 0, 256);
+    s->r[256] = s->g[256] = s->b[256] = 255;
+    s->r[258] = s->g[258] = s->b[258] = 255;
+    update_palette_entries(s, 0, 260);
      memset(s->vram, 0, MAXX*MAXY);
      memory_region_reset_dirty(&s->vram_mem, 0, MAXX * MAXY * (1 + 4 + 4),
                                DIRTY_MEMORY_VGA);
      s->dac_index = 0;
      s->dac_state = 0;
+    s->cursx = 0xF000; /*Put cursor off screen */
+    s->cursy = 0xF000;
  }

  static uint64_t tcx_dac_readl(void *opaque, hwaddr addr,
                                unsigned size)
  {
-    return 0;
+    TCXState *s = opaque;
+    uint32_t val = 0;
+
+    switch (s->dac_state) {
+    case 0:
+        val = s->r[s->dac_index] << 24;
+        s->dac_state++;
+        break;
+    case 1:
+        val = s->g[s->dac_index] << 24;
+        s->dac_state++;
+        break;
+    case 2:
+        val = s->b[s->dac_index] << 24;
+        s->dac_index = (s->dac_index + 1) & 255; /* Index autoincrement */
+    default:
+        s->dac_state = 0;
+        break;
+    }
+
+    return val;
  }

  static void tcx_dac_writel(void *opaque, hwaddr addr, uint64_t val,
                             unsigned size)
  {
      TCXState *s = opaque;
+    unsigned index;

      switch (addr) {
-    case 0:
+    case 0: /* Address */
          s->dac_index = val >> 24;
          s->dac_state = 0;
          break;
-    case 4:
+    case 4:  /* Pixel colours */
+    case 12: /* Overlay (cursor) colours */
+        if (addr & 8) {
+            index = (s->dac_index & 3) + 256;
+        } else {
+            index = s->dac_index;
+        }
          switch (s->dac_state) {
          case 0:
-            s->r[s->dac_index] = val >> 24;
-            update_palette_entries(s, s->dac_index, s->dac_index + 1);
+            s->r[index] = val >> 24;
+            update_palette_entries(s, index, index + 1);
              s->dac_state++;
              break;
          case 1:
-            s->g[s->dac_index] = val >> 24;
-            update_palette_entries(s, s->dac_index, s->dac_index + 1);
+            s->g[index] = val >> 24;
+            update_palette_entries(s, index, index + 1);
              s->dac_state++;
              break;
          case 2:
-            s->b[s->dac_index] = val >> 24;
-            update_palette_entries(s, s->dac_index, s->dac_index + 1);
-            s->dac_index = (s->dac_index + 1) & 255; // Index autoincrement
+            s->b[index] = val >> 24;
+            update_palette_entries(s, index, index + 1);
+            s->dac_index = (s->dac_index + 1) & 255; /* Index 
autoincrement */
          default:
              s->dac_state = 0;
              break;
          }
          break;
-    default:
+    default: /* Control registers */
          break;
      }
  }
@@ -500,20 +655,267 @@ static const MemoryRegionOps tcx_dac_ops = {
      },
  };

-static uint64_t dummy_readl(void *opaque, hwaddr addr,
+static uint64_t tcx_stip_readl(void *opaque, hwaddr addr,
+                               unsigned size)
+{
+    return 0;
+}
+
+static void tcx_stip_writel(void *opaque, hwaddr addr,
+                            uint64_t val, unsigned size)
+{
+    TCXState *s = opaque;
+    int i;
+    uint32_t col;
+
+    if (!(addr & 4)) {
+        s->tmpblit = val;
+    } else {
+        addr = (addr / 8) & 0xFFFFF;
+        col = cpu_to_be32(s->tmpblit);
+        if (s->depth == 24) {
+            for (i = 0; i < 32; i++)  {
+                if (val & 0x80000000) {
+                    s->vram[addr + i] = s->tmpblit;
+                    s->vram24[addr + i] = col;
+                }
+                val <<= 1;
+            }
+        } else {
+            for (i = 0; i < 32; i++)  {
+                if (val & 0x80000000) {
+                    s->vram[addr + i] = s->tmpblit;
+                }
+                val <<= 1;
+            }
+        }
+        memory_region_set_dirty(&s->vram_mem, addr, 32);
+    }
+}
+
+static void tcx_rstip_writel(void *opaque, hwaddr addr,
+                             uint64_t val, unsigned size)
+{
+    TCXState *s = opaque;
+    int i;
+    uint32_t col;
+
+    if (!(addr & 4)) {
+        s->tmpblit = val;
+    } else {
+        addr = (addr / 8) & 0xFFFFF;
+        col = cpu_to_be32(s->tmpblit);
+        if (s->depth == 24) {
+            for (i = 0; i < 32; i++) {
+                if (val&0x80000000) {
+                    s->vram[addr + i] = s->tmpblit;
+                    s->vram24[addr + i] = col;
+                    s->cplane[addr + i] = col;
+                }
+                val <<= 1;
+            }
+        } else {
+            for (i = 0; i < 32; i++)  {
+                if (val&0x80000000) {
+                    s->vram[addr + i] = s->tmpblit;
+                }
+                val <<= 1;
+            }
+        }
+        memory_region_set_dirty(&s->vram_mem, addr, 32);
+    }
+}
+
+static const MemoryRegionOps tcx_stip_ops = {
+    .read = tcx_stip_readl,
+    .write = tcx_stip_writel,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static const MemoryRegionOps tcx_rstip_ops = {
+    .read = tcx_stip_readl,
+    .write = tcx_rstip_writel,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static uint64_t tcx_blit_readl(void *opaque, hwaddr addr,
+                               unsigned size)
+{
+    return 0;
+}
+
+static void tcx_blit_writel(void *opaque, hwaddr addr,
+                            uint64_t val, unsigned size)
+{
+    TCXState *s = opaque;
+    uint32_t adsr, len;
+    int i;
+
+    if (!(addr & 4)) {
+        s->tmpblit = val;
+    } else {
+        addr = (addr / 8) & 0xFFFFF;
+        adsr = val & 0xFFFFFF;
+        len = ((val >> 24) & 0x1F) + 1;
+        if (adsr == 0xFFFFFF) {
+            memset(&s->vram[addr], s->tmpblit, len);
+            if (s->depth == 24) {
+                val = s->tmpblit & 0xFFFFFF;
+                val = cpu_to_be32(val);
+                for (i = 0; i < len; i++) {
+                    s->vram24[addr + i] = val;
+                }
+            }
+        } else {
+            memcpy(&s->vram[addr], &s->vram[adsr], len);
+            if (s->depth == 24) {
+                memcpy(&s->vram24[addr], &s->vram24[adsr], len * 4);
+            }
+        }
+        memory_region_set_dirty(&s->vram_mem, addr, len);
+    }
+}
+
+static void tcx_rblit_writel(void *opaque, hwaddr addr,
+                         uint64_t val, unsigned size)
+{
+    TCXState *s = opaque;
+    uint32_t adsr, len;
+    int i;
+
+    if (!(addr & 4)) {
+        s->tmpblit = val;
+    } else {
+        addr = (addr / 8) & 0xFFFFF;
+        adsr = val & 0xFFFFFF;
+        len = ((val >> 24) & 0x1F) + 1;
+        if (adsr == 0xFFFFFF) {
+            memset(&s->vram[addr], s->tmpblit, len);
+            if (s->depth == 24) {
+                val = s->tmpblit & 0xFFFFFF;
+                val = cpu_to_be32(val);
+                for (i = 0; i < len; i++) {
+                    s->vram24[addr + i] = val;
+                    s->cplane[addr + i] = val;
+                }
+            }
+        } else {
+            memcpy(&s->vram[addr], &s->vram[adsr], len);
+            if (s->depth == 24) {
+                memcpy(&s->vram24[addr], &s->vram24[adsr], len * 4);
+                memcpy(&s->cplane[addr], &s->cplane[adsr], len * 4);
+            }
+        }
+        memory_region_set_dirty(&s->vram_mem, addr, len);
+    }
+}
+
+static const MemoryRegionOps tcx_blit_ops = {
+    .read = tcx_blit_readl,
+    .write = tcx_blit_writel,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static const MemoryRegionOps tcx_rblit_ops = {
+    .read = tcx_blit_readl,
+    .write = tcx_rblit_writel,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static void tcx_invalidate_cursor_position(TCXState *s)
+{
+    int ymin, ymax, start, end;
+
+    /* invalidate only near the cursor */
+    ymin = s->cursy;
+    if (ymin >= s->height) {
+        return;
+    }
+    ymax = MIN(s->height, ymin + 32);
+    start = ymin * 1024;
+    end   = ymax * 1024;
+
+    memory_region_set_dirty(&s->vram_mem, start, end-start);
+}
+
+static uint64_t tcx_thc_readl(void *opaque, hwaddr addr,
+                            unsigned size)
+{
+    TCXState *s = opaque;
+    uint64_t val;
+
+    if (addr == TCX_THC_MISC) {
+        val = s->thcmisc | 0x02000000;
+    } else {
+        val = 0;
+    }
+    return val;
+}
+
+static void tcx_thc_writel(void *opaque, hwaddr addr,
+                         uint64_t val, unsigned size)
+{
+    TCXState *s = opaque;
+
+    if (addr == TCX_THC_CURSXY) {
+        tcx_invalidate_cursor_position(s);
+        s->cursx = val >> 16;
+        s->cursy = val;
+        tcx_invalidate_cursor_position(s);
+    } else if (addr >= TCX_THC_CURSMASK && addr < TCX_THC_CURSMASK + 128) {
+        s->cursmask[(addr - TCX_THC_CURSMASK) / 4] = val;
+        tcx_invalidate_cursor_position(s);
+    } else if (addr >= TCX_THC_CURSBITS && addr < TCX_THC_CURSBITS + 128) {
+        s->cursbits[(addr - TCX_THC_CURSBITS) / 4] = val;
+        tcx_invalidate_cursor_position(s);
+    } else if (addr == TCX_THC_MISC) {
+        s->thcmisc = val;
+    }
+
+}
+
+static const MemoryRegionOps tcx_thc_ops = {
+    .read = tcx_thc_readl,
+    .write = tcx_thc_writel,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static uint64_t tcx_dummy_readl(void *opaque, hwaddr addr,
                              unsigned size)
  {
+    /* printf ("TCX Dummy (ALT,DHC,TEC) READ %X 
%i\n",(int)addr,(int)size);*/
      return 0;
  }

-static void dummy_writel(void *opaque, hwaddr addr,
+static void tcx_dummy_writel(void *opaque, hwaddr addr,
                           uint64_t val, unsigned size)
  {
+    /* printf ("TCX Dummy (ALT,DHC,TEC) WRITE %X 
%X\n",(int)addr,(int)val);*/
  }

-static const MemoryRegionOps dummy_ops = {
-    .read = dummy_readl,
-    .write = dummy_writel,
+static const MemoryRegionOps tcx_dummy_ops = {
+    .read = tcx_dummy_readl,
+    .write = tcx_dummy_writel,
      .endianness = DEVICE_NATIVE_ENDIAN,
      .valid = {
          .min_access_size = 4,
@@ -544,7 +946,7 @@ static int tcx_init1(SysBusDevice *dev)
      vmstate_register_ram_global(&s->vram_mem);
      vram_base = memory_region_get_ram_ptr(&s->vram_mem);

-    /* FCode ROM */
+    /* 10/ROM   : FCode ROM */
      memory_region_init_ram(&s->rom, NULL, "tcx.prom", FCODE_MAX_ROM_SIZE);
      vmstate_register_ram_global(&s->rom);
      memory_region_set_readonly(&s->rom, true);
@@ -560,7 +962,7 @@ static int tcx_init1(SysBusDevice *dev)
          }
      }

-    /* 8-bit plane */
+    /*  0/DFB8  : 8-bit plane */
      s->vram = vram_base;
      size = s->vram_size;
      memory_region_init_alias(&s->vram_8bit, OBJECT(s), "tcx.vram.8bit",
@@ -569,50 +971,85 @@ static int tcx_init1(SysBusDevice *dev)
      vram_offset += size;
      vram_base += size;

-    /* DAC */
-    memory_region_init_io(&s->dac, OBJECT(s), &tcx_dac_ops, s,
-                          "tcx.dac", TCX_DAC_NREGS);
-    sysbus_init_mmio(dev, &s->dac);
+    /*  1/DFB24 : 24bit plane */
+    size = s->vram_size * 4;
+    s->vram24 = (uint32_t *)vram_base;
+    s->vram24_offset = vram_offset;
+    memory_region_init_alias(&s->vram_24bit, OBJECT(s), "tcx.vram.24bit",
+                             &s->vram_mem, vram_offset, size);
+    sysbus_init_mmio(dev, &s->vram_24bit);
+    vram_offset += size;
+    vram_base += size;
+
+    /*  2/STIP : Stippler */
+    memory_region_init_io(&s->stip, OBJECT(s), &tcx_stip_ops, s, 
"tcx.stip",
+                          TCX_STIP_NREGS);
+    sysbus_init_mmio(dev, &s->stip);
+
+    /*  3/BLIT : Blitter */
+    memory_region_init_io(&s->blit, OBJECT(s), &tcx_blit_ops, s, 
"tcx.blit",
+                          TCX_BLIT_NREGS);
+    sysbus_init_mmio(dev, &s->blit);
+
+    /*  4/RDFB32 : Raw Framebuffer */
+    size = s->vram_size * 4;
+    s->cplane = (uint32_t *)vram_base;
+    s->cplane_offset = vram_offset;
+    memory_region_init_alias(&s->vram_cplane, OBJECT(s), "tcx.vram.cplane",
+                             &s->vram_mem, vram_offset, size);
+    sysbus_init_mmio(dev, &s->vram_cplane);

-    /* TEC (dummy) */
-    memory_region_init_io(&s->tec, OBJECT(s), &dummy_ops, s,
+    /*  5/RSTIP : Raw Stippler */
+    memory_region_init_io(&s->rstip, OBJECT(s), &tcx_rstip_ops, s, 
"tcx.rstip",
+                          TCX_RSTIP_NREGS);
+    sysbus_init_mmio(dev, &s->rstip);
+
+    /*  6/RBLIT : Raw Blitter */
+    memory_region_init_io(&s->rblit, OBJECT(s), &tcx_rblit_ops, s, 
"tcx.rblit",
+                          TCX_RBLIT_NREGS);
+    sysbus_init_mmio(dev, &s->rblit);
+
+    /*  7/TEC : ??? */
+    memory_region_init_io(&s->tec, OBJECT(s), &tcx_dummy_ops, s,
                            "tcx.tec", TCX_TEC_NREGS);
      sysbus_init_mmio(dev, &s->tec);
-    /* THC: NetBSD writes here even with 8-bit display: dummy */
-    memory_region_init_io(&s->thc24, OBJECT(s), &dummy_ops, s, "tcx.thc24",
-                          TCX_THC_NREGS_24);
-    sysbus_init_mmio(dev, &s->thc24);
-
-    if (s->depth == 24) {
-        /* 24-bit plane */
-        size = s->vram_size * 4;
-        s->vram24 = (uint32_t *)vram_base;
-        s->vram24_offset = vram_offset;
-        memory_region_init_alias(&s->vram_24bit, OBJECT(s), 
"tcx.vram.24bit",
-                                 &s->vram_mem, vram_offset, size);
-        sysbus_init_mmio(dev, &s->vram_24bit);
-        vram_offset += size;
-        vram_base += size;
-
-        /* Control plane */
-        size = s->vram_size * 4;
-        s->cplane = (uint32_t *)vram_base;
-        s->cplane_offset = vram_offset;
-        memory_region_init_alias(&s->vram_cplane, OBJECT(s), 
"tcx.vram.cplane",
-                                 &s->vram_mem, vram_offset, size);
-        sysbus_init_mmio(dev, &s->vram_cplane);

-        s->con = graphic_console_init(DEVICE(dev), &tcx24_ops, s);
-    } else {
-        /* THC 8 bit (dummy) */
-        memory_region_init_io(&s->thc8, OBJECT(s), &dummy_ops, s, 
"tcx.thc8",
-                              TCX_THC_NREGS_8);
-        sysbus_init_mmio(dev, &s->thc8);
+    /*  8/CMAP : DAC */
+    memory_region_init_io(&s->dac, OBJECT(s), &tcx_dac_ops, s,
+                          "tcx.dac", TCX_DAC_NREGS);
+    sysbus_init_mmio(dev, &s->dac);
+
+    /*  9/THC : Cursor */
+    memory_region_init_io(&s->thc, OBJECT(s), &tcx_thc_ops, s, "tcx.thc",
+                          TCX_THC_NREGS);
+    sysbus_init_mmio(dev, &s->thc);
+
+    /* 11/DHC : ??? */
+    memory_region_init_io(&s->dhc, OBJECT(s), &tcx_dummy_ops, s, "tcx.dhc",
+                          TCX_DHC_NREGS);
+    sysbus_init_mmio(dev, &s->dhc);
+
+    /* 12/ALT : ??? */
+    memory_region_init_io(&s->alt, OBJECT(s), &tcx_dummy_ops, s, "tcx.alt",
+                          TCX_ALT_NREGS);
+    sysbus_init_mmio(dev, &s->alt);
+
+    /* 9/THC24bits : NetBSD writes here even with 8-bit display: dummy */
+    if (s->depth == 8) {
+        memory_region_init_io(&s->thc24, OBJECT(s), &tcx_dummy_ops, s,
+                              "tcx.thc24", TCX_THC_NREGS);
+        sysbus_init_mmio(dev, &s->thc24);
+    }

+    if (s->depth == 8) {
          s->con = graphic_console_init(DEVICE(dev), &tcx_ops, s);
+    } else {
+        s->con = graphic_console_init(DEVICE(dev), &tcx24_ops, s);
      }
+    s->thcmisc = 0;

      qemu_console_resize(s->con, s->width, s->height);
+
      return 0;
  }

diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c
index 2957d90..7eee180 100644
--- a/hw/sparc/sun4m.c
+++ b/hw/sparc/sun4m.c
@@ -540,24 +540,40 @@ static void tcx_init(hwaddr addr, int vram_size, 
int width,
      qdev_prop_set_uint64(dev, "prom_addr", addr);
      qdev_init_nofail(dev);
      s = SYS_BUS_DEVICE(dev);
-    /* FCode ROM */
+
+    /* 10/ROM   : FCode ROM */
      sysbus_mmio_map(s, 0, addr);
-    /* 8-bit plane */
+    /*  0/DFB8  : 8-bit plane */
      sysbus_mmio_map(s, 1, addr + 0x00800000ULL);
-    /* DAC */
-    sysbus_mmio_map(s, 2, addr + 0x00200000ULL);
-    /* TEC (dummy) */
-    sysbus_mmio_map(s, 3, addr + 0x00700000ULL);
-    /* THC 24 bit: NetBSD writes here even with 8-bit display: dummy */
-    sysbus_mmio_map(s, 4, addr + 0x00301000ULL);
-    if (depth == 24) {
-        /* 24-bit plane */
-        sysbus_mmio_map(s, 5, addr + 0x02000000ULL);
-        /* Control plane */
-        sysbus_mmio_map(s, 6, addr + 0x0a000000ULL);
+    /*  1/DFB24 : 24bit plane */
+    sysbus_mmio_map(s, 2, addr + 0x02000000ULL);
+    /*  2/STIP  : Stipple */
+    sysbus_mmio_map(s, 3, addr + 0x04000000ULL);
+    /*  3/BLIT  : Blitter */
+    sysbus_mmio_map(s, 4, addr + 0x06000000ULL);
+    /*  4/RDFB32: Raw framebuffer. Control plane */
+    sysbus_mmio_map(s, 5, addr + 0x0A000000ULL);
+    /*  5/RSTIP : Raw Stipple */
+    sysbus_mmio_map(s, 6, addr + 0x0C000000ULL);
+    /*  6/RBLIT : Raw Blitter */
+    sysbus_mmio_map(s, 7, addr + 0x0E000000ULL);
+    /*  7/TEC   : Transform Engine */
+    sysbus_mmio_map(s, 8, addr + 0x00700000ULL);
+    /*  8/CMAP  : DAC */
+    sysbus_mmio_map(s, 9, addr + 0x00200000ULL);
+    /*  9/THC   : */
+    if (depth == 8) {
+        sysbus_mmio_map(s, 10, addr + 0x00300000ULL);
      } else {
-        /* THC 8 bit (dummy) */
-        sysbus_mmio_map(s, 5, addr + 0x00300000ULL);
+        sysbus_mmio_map(s, 10, addr + 0x00301000ULL);
+    }
+    /* 11/DHC   : */
+    sysbus_mmio_map(s, 11, addr + 0x00240000ULL);
+    /* 12/ALT   : */
+    sysbus_mmio_map(s, 12, addr + 0x00280000ULL);
+    /* 9/THC24bits : NetBSD writes here even with 8-bit display: dummy */
+    if (depth == 8) {
+        sysbus_mmio_map(s, 13, addr + 0x00301000ULL);
      }
  }

-- 
1.8.1.5

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [Qemu-devel] [PATCH 1/1] Sun4m : TCX framebuffer hardware acceleration
  2014-02-16 23:15 [Qemu-devel] [PATCH 1/1] Sun4m : TCX framebuffer hardware acceleration Olivier Danet
@ 2014-05-25  9:50 ` Mark Cave-Ayland
  2014-05-25 13:20   ` Olivier Danet
  0 siblings, 1 reply; 6+ messages in thread
From: Mark Cave-Ayland @ 2014-05-25  9:50 UTC (permalink / raw)
  To: Olivier Danet, qemu-devel, Blue Swirl, Artyom Tarasenko

On 16/02/14 23:15, Olivier Danet wrote:

> The S24/TCX framebuffer is a mildly accelerated video card, with
> blitter, stippler and hardware cursor.
> * Solaris and NetBSD 6.x use all the hardware acceleration features.
> * The Xorg driver (used by Linux) can use the hardware cursor only.
>
> This patch implements hardware acceleration in both 8bits and 24bits
> modes. It is based on the NetBSD driver sources and from tests with
> Solaris.

I'm looking at reviewing this patch today, however it doesn't apply for 
me - it looks as if your mail program has truncated lines prematurely? 
Please can you try to resend using git send-mail directly?


Many thanks,

Mark.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Qemu-devel] [PATCH 1/1] Sun4m : TCX framebuffer hardware acceleration
  2014-05-25  9:50 ` Mark Cave-Ayland
@ 2014-05-25 13:20   ` Olivier Danet
  2014-05-25 20:28     ` Mark Cave-Ayland
  0 siblings, 1 reply; 6+ messages in thread
From: Olivier Danet @ 2014-05-25 13:20 UTC (permalink / raw)
  To: Mark Cave-Ayland, qemu-devel, Blue Swirl, Artyom Tarasenko

On 25/05/2014 11:50, Mark Cave-Ayland wrote:
> On 16/02/14 23:15, Olivier Danet wrote:
> 
>> The S24/TCX framebuffer is a mildly accelerated video card, with
>> blitter, stippler and hardware cursor.
>> * Solaris and NetBSD 6.x use all the hardware acceleration features.
>> * The Xorg driver (used by Linux) can use the hardware cursor only.
>>
>> This patch implements hardware acceleration in both 8bits and 24bits
>> modes. It is based on the NetBSD driver sources and from tests with
>> Solaris.
> 
> I'm looking at reviewing this patch today, however it doesn't apply for me - it looks as if your mail program has truncated lines prematurely? Please can you try to resend using git send-mail directly?
> 
> 
> Many thanks,
> 
> Mark.
> 
Hi Mark

Here is the original patch, I have changed email settings since then, it should work better.
Alas, I have not merged latest QEMU changes (your CG3/TCX patches), so it will probably not compile as-is...

Olivier



[PATCH 1/1] Sun4m : TCX framebuffer hardware acceleration

The S24/TCX framebuffer is a mildly accelerated video card, with
blitter, stippler and hardware cursor.
* Solaris and NetBSD 6.x use all the hardware acceleration features.
* The Xorg driver (used by Linux) can use the hardware cursor only.

This patch implements hardware acceleration in both 8bits and 24bits
modes. It is based on the NetBSD driver sources and from tests with Solaris.

Signed-off-by: Olivier Danet <odanet@caramail.com>
---
 hw/display/tcx.c | 679 +++++++++++++++++++++++++++++++++++++++++++++----------
 hw/sparc/sun4m.c |  46 ++--
 2 files changed, 589 insertions(+), 136 deletions(-)

diff --git a/hw/display/tcx.c b/hw/display/tcx.c
index 873b82c..bcd64e5 100644
--- a/hw/display/tcx.c
+++ b/hw/display/tcx.c
@@ -33,17 +33,26 @@
 
 #define MAXX 1024
 #define MAXY 768
-#define TCX_DAC_NREGS 16
-#define TCX_THC_NREGS_8  0x081c
-#define TCX_THC_NREGS_24 0x1000
+#define TCX_DAC_NREGS    16
+#define TCX_THC_NREGS    0x1000
+#define TCX_DHC_NREGS    0x4000
 #define TCX_TEC_NREGS    0x1000
+#define TCX_ALT_NREGS    0x8000
+#define TCX_STIP_NREGS   0x800000
+#define TCX_BLIT_NREGS   0x800000
+#define TCX_RSTIP_NREGS  0x800000
+#define TCX_RBLIT_NREGS  0x800000
+
+#define TCX_THC_MISC     0x818
+#define TCX_THC_CURSXY   0x8fc
+#define TCX_THC_CURSMASK 0x900
+#define TCX_THC_CURSBITS 0x980
 
 #define TYPE_TCX "SUNW,tcx"
 #define TCX(obj) OBJECT_CHECK(TCXState, (obj), TYPE_TCX)
 
 typedef struct TCXState {
     SysBusDevice parent_obj;
-
     QemuConsole *con;
     uint8_t *vram;
     uint32_t *vram24, *cplane;
@@ -52,17 +61,30 @@ typedef struct TCXState {
     MemoryRegion vram_mem;
     MemoryRegion vram_8bit;
     MemoryRegion vram_24bit;
+    MemoryRegion stip;
+    MemoryRegion blit;
     MemoryRegion vram_cplane;
-    MemoryRegion dac;
+    MemoryRegion rstip;
+    MemoryRegion rblit;
     MemoryRegion tec;
+    MemoryRegion dac;
+    MemoryRegion thc;
+    MemoryRegion dhc;
+    MemoryRegion alt;
     MemoryRegion thc24;
-    MemoryRegion thc8;
+
     ram_addr_t vram24_offset, cplane_offset;
+    uint32_t tmpblit;
     uint32_t vram_size;
-    uint32_t palette[256];
-    uint8_t r[256], g[256], b[256];
+    uint32_t palette[260];
+    uint8_t r[260], g[260], b[260];
     uint16_t width, height, depth;
     uint8_t dac_index, dac_state;
+    uint32_t thcmisc;
+    uint32_t cursmask[32];
+    uint32_t cursbits[32];
+    uint16_t cursx;
+    uint16_t cursy;
 } TCXState;
 
 static void tcx_set_dirty(TCXState *s)
@@ -70,10 +92,36 @@ static void tcx_set_dirty(TCXState *s)
     memory_region_set_dirty(&s->vram_mem, 0, MAXX * MAXY);
 }
 
-static void tcx24_set_dirty(TCXState *s)
+static inline int tcx24_check_dirty(TCXState *s, ram_addr_t page,
+                                    ram_addr_t page24, ram_addr_t cpage)
 {
-    memory_region_set_dirty(&s->vram_mem, s->vram24_offset, MAXX * MAXY * 4);
-    memory_region_set_dirty(&s->vram_mem, s->cplane_offset, MAXX * MAXY * 4);
+    int ret;
+
+    ret = memory_region_get_dirty(&s->vram_mem, page, TARGET_PAGE_SIZE,
+                                  DIRTY_MEMORY_VGA);
+    ret |= memory_region_get_dirty(&s->vram_mem, page24, TARGET_PAGE_SIZE * 4,
+                                   DIRTY_MEMORY_VGA);
+    ret |= memory_region_get_dirty(&s->vram_mem, cpage, TARGET_PAGE_SIZE * 4,
+                                   DIRTY_MEMORY_VGA);
+    return ret;
+}
+
+static inline void tcx24_reset_dirty(TCXState *ts, ram_addr_t page_min,
+                               ram_addr_t page_max, ram_addr_t page24,
+                              ram_addr_t cpage)
+{
+    memory_region_reset_dirty(&ts->vram_mem,
+                              page_min,
+                              (page_max - page_min) + TARGET_PAGE_SIZE,
+                              DIRTY_MEMORY_VGA);
+    memory_region_reset_dirty(&ts->vram_mem,
+                              page24 + page_min * 4,
+                              (page_max - page_min) * 4 + TARGET_PAGE_SIZE,
+                              DIRTY_MEMORY_VGA);
+    memory_region_reset_dirty(&ts->vram_mem,
+                              cpage + page_min * 4,
+                              (page_max - page_min) * 4 + TARGET_PAGE_SIZE,
+                              DIRTY_MEMORY_VGA);
 }
 
 static void update_palette_entries(TCXState *s, int start, int end)
@@ -102,11 +150,7 @@ static void update_palette_entries(TCXState *s, int start, int end)
             break;
         }
     }
-    if (s->depth == 24) {
-        tcx24_set_dirty(s);
-    } else {
-        tcx_set_dirty(s);
-    }
+    tcx_set_dirty(s);
 }
 
 static void tcx_draw_line32(TCXState *s1, uint8_t *d,
@@ -116,7 +160,7 @@ static void tcx_draw_line32(TCXState *s1, uint8_t *d,
     uint8_t val;
     uint32_t *p = (uint32_t *)d;
 
-    for(x = 0; x < width; x++) {
+    for (x = 0; x < width; x++) {
         val = *s++;
         *p++ = s1->palette[val];
     }
@@ -129,7 +173,7 @@ static void tcx_draw_line16(TCXState *s1, uint8_t *d,
     uint8_t val;
     uint16_t *p = (uint16_t *)d;
 
-    for(x = 0; x < width; x++) {
+    for (x = 0; x < width; x++) {
         val = *s++;
         *p++ = s1->palette[val];
     }
@@ -147,6 +191,83 @@ static void tcx_draw_line8(TCXState *s1, uint8_t *d,
     }
 }
 
+static void tcx_draw_cursor32(TCXState *s1, uint8_t *d,
+                              int y, int width)
+{
+    int x, len;
+    uint32_t mask, bits;
+    uint32_t *p = (uint32_t *)d;
+
+    y = y - s1->cursy;
+    mask = s1->cursmask[y];
+    bits = s1->cursbits[y];
+    len = MIN(width - s1->cursx, 32);
+    p = &p[s1->cursx];
+    for (x = 0; x < len; x++) {
+        if (mask & 0x80000000) {
+            if (bits & 0x80000000) {
+                *p = s1->palette[259];
+            } else {
+                *p = s1->palette[258];
+            }
+        }
+        p++;
+        mask <<= 1;
+        bits <<= 1;
+    }
+}
+
+static void tcx_draw_cursor16(TCXState *s1, uint8_t *d,
+                              int y, int width)
+{
+    int x, len;
+    uint32_t mask, bits;
+    uint16_t *p = (uint16_t *)d;
+
+    y = y - s1->cursy;
+    mask = s1->cursmask[y];
+    bits = s1->cursbits[y];
+    len = MIN(width - s1->cursx, 32);
+    p = &p[s1->cursx];
+    for (x = 0; x < len; x++) {
+        if (mask & 0x80000000) {
+            if (bits & 0x80000000) {
+                *p = s1->palette[259];
+            } else {
+                *p = s1->palette[258];
+            }
+        }
+        p++;
+        mask <<= 1;
+        bits <<= 1;
+    }
+}
+
+static void tcx_draw_cursor8(TCXState *s1, uint8_t *d,
+                              int y, int width)
+{
+    int x, len;
+    uint32_t mask, bits;
+
+    y = y - s1->cursy;
+    mask = s1->cursmask[y];
+    bits = s1->cursbits[y];
+    len = MIN(width - s1->cursx, 32);
+    d = &d[s1->cursx];
+    for (x = 0; x < len; x++) {
+        if (mask & 0x80000000) {
+            if (bits & 0x80000000) {
+                *d = s1->palette[259];
+            } else {
+                *d = s1->palette[258];
+            }
+        }
+        d++;
+        mask <<= 1;
+        bits <<= 1;
+    }
+}
+
 /*
   XXX Could be much more optimal:
   * detect if line/page/whole screen is in 24 bit mode
@@ -162,11 +283,10 @@ static inline void tcx24_draw_line32(TCXState *s1, uint8_t *d,
     uint8_t val, *p8;
     uint32_t *p = (uint32_t *)d;
     uint32_t dval;
-
     bgr = is_surface_bgr(surface);
     for(x = 0; x < width; x++, s++, s24++) {
-        if ((be32_to_cpu(*cplane++) & 0xff000000) == 0x03000000) {
-            // 24-bit direct, BGR order
+        if (be32_to_cpu(*cplane) & 0x03000000) {
+            /* 24-bit direct, BGR order */
             p8 = (uint8_t *)s24;
             p8++;
             b = *p8++;
@@ -177,47 +297,18 @@ static inline void tcx24_draw_line32(TCXState *s1, uint8_t *d,
             else
                 dval = rgb_to_pixel32(r, g, b);
         } else {
+            /* 8 bits pseudocolor */
             val = *s;
             dval = s1->palette[val];
         }
         *p++ = dval;
+        cplane++;
     }
 }
 
-static inline int check_dirty(TCXState *s, ram_addr_t page, ram_addr_t page24,
-                              ram_addr_t cpage)
-{
-    int ret;
-
-    ret = memory_region_get_dirty(&s->vram_mem, page, TARGET_PAGE_SIZE,
-                                  DIRTY_MEMORY_VGA);
-    ret |= memory_region_get_dirty(&s->vram_mem, page24, TARGET_PAGE_SIZE * 4,
-                                   DIRTY_MEMORY_VGA);
-    ret |= memory_region_get_dirty(&s->vram_mem, cpage, TARGET_PAGE_SIZE * 4,
-                                   DIRTY_MEMORY_VGA);
-    return ret;
-}
-
-static inline void reset_dirty(TCXState *ts, ram_addr_t page_min,
-                               ram_addr_t page_max, ram_addr_t page24,
-                              ram_addr_t cpage)
-{
-    memory_region_reset_dirty(&ts->vram_mem,
-                              page_min,
-                              (page_max - page_min) + TARGET_PAGE_SIZE,
-                              DIRTY_MEMORY_VGA);
-    memory_region_reset_dirty(&ts->vram_mem,
-                              page24 + page_min * 4,
-                              (page_max - page_min) * 4 + TARGET_PAGE_SIZE,
-                              DIRTY_MEMORY_VGA);
-    memory_region_reset_dirty(&ts->vram_mem,
-                              cpage + page_min * 4,
-                              (page_max - page_min) * 4 + TARGET_PAGE_SIZE,
-                              DIRTY_MEMORY_VGA);
-}
-
 /* Fixed line length 1024 allows us to do nice tricks not possible on
    VGA... */
+
 static void tcx_update_display(void *opaque)
 {
     TCXState *ts = opaque;
@@ -226,6 +317,7 @@ static void tcx_update_display(void *opaque)
     int y, y_start, dd, ds;
     uint8_t *d, *s;
     void (*f)(TCXState *s1, uint8_t *dst, const uint8_t *src, int width);
+    void (*fc)(TCXState *s1, uint8_t *dst, int y, int width);
 
     if (surface_bits_per_pixel(surface) == 0) {
         return;
@@ -243,20 +335,23 @@ static void tcx_update_display(void *opaque)
     switch (surface_bits_per_pixel(surface)) {
     case 32:
         f = tcx_draw_line32;
+        fc = tcx_draw_cursor32;
         break;
     case 15:
     case 16:
         f = tcx_draw_line16;
+        fc = tcx_draw_cursor16;
         break;
     default:
     case 8:
         f = tcx_draw_line8;
+        fc = tcx_draw_cursor8;
         break;
     case 0:
         return;
     }
 
-    for(y = 0; y < ts->height; y += 4, page += TARGET_PAGE_SIZE) {
+    for (y = 0; y < ts->height; page += TARGET_PAGE_SIZE) {
         if (memory_region_get_dirty(&ts->vram_mem, page, TARGET_PAGE_SIZE,
                                     DIRTY_MEMORY_VGA)) {
             if (y_start < 0)
@@ -265,18 +360,35 @@ static void tcx_update_display(void *opaque)
                 page_min = page;
             if (page > page_max)
                 page_max = page;
+
             f(ts, d, s, ts->width);
+            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
+                fc(ts, d, y, ts->width);
+            }
             d += dd;
             s += ds;
+            y++;
             f(ts, d, s, ts->width);
+            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
+                fc(ts, d, y, ts->width);
+            }
             d += dd;
             s += ds;
+            y++;
             f(ts, d, s, ts->width);
+            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
+                fc(ts, d, y, ts->width);
+            }
             d += dd;
             s += ds;
+            y++;
             f(ts, d, s, ts->width);
+            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
+                fc(ts, d, y, ts->width);
+            }
             d += dd;
             s += ds;
+            y++;
         } else {
             if (y_start >= 0) {
                 /* flush to display */
@@ -286,6 +398,7 @@ static void tcx_update_display(void *opaque)
             }
             d += dd * 4;
             s += ds * 4;
+            y += 4;
         }
     }
     if (y_start >= 0) {
@@ -328,9 +441,9 @@ static void tcx24_update_display(void *opaque)
     dd = surface_stride(surface);
     ds = 1024;
 
-    for(y = 0; y < ts->height; y += 4, page += TARGET_PAGE_SIZE,
+    for (y = 0; y < ts->height; page += TARGET_PAGE_SIZE,
             page24 += TARGET_PAGE_SIZE, cpage += TARGET_PAGE_SIZE) {
-        if (check_dirty(ts, page, page24, cpage)) {
+        if (tcx24_check_dirty(ts, page, page24, cpage)) {
             if (y_start < 0)
                 y_start = y;
             if (page < page_min)
@@ -338,25 +451,41 @@ static void tcx24_update_display(void *opaque)
             if (page > page_max)
                 page_max = page;
             tcx24_draw_line32(ts, d, s, ts->width, cptr, s24);
+            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
+                tcx_draw_cursor32(ts, d, y, ts->width);
+            }
             d += dd;
             s += ds;
             cptr += ds;
             s24 += ds;
+            y++;
             tcx24_draw_line32(ts, d, s, ts->width, cptr, s24);
+            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
+                tcx_draw_cursor32(ts, d, y, ts->width);
+            }
             d += dd;
             s += ds;
             cptr += ds;
             s24 += ds;
+            y++;
             tcx24_draw_line32(ts, d, s, ts->width, cptr, s24);
+            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
+                tcx_draw_cursor32(ts, d, y, ts->width);
+            }
             d += dd;
             s += ds;
             cptr += ds;
             s24 += ds;
+            y++;
             tcx24_draw_line32(ts, d, s, ts->width, cptr, s24);
+            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
+                tcx_draw_cursor32(ts, d, y, ts->width);
+            }
             d += dd;
             s += ds;
             cptr += ds;
             s24 += ds;
+            y++;
         } else {
             if (y_start >= 0) {
                 /* flush to display */
@@ -368,6 +497,7 @@ static void tcx24_update_display(void *opaque)
             s += ds * 4;
             cptr += ds * 4;
             s24 += ds * 4;
+            y += 4;
         }
     }
     if (y_start >= 0) {
@@ -377,7 +507,7 @@ static void tcx24_update_display(void *opaque)
     }
     /* reset modified pages */
     if (page_max >= page_min) {
-        reset_dirty(ts, page_min, page_max, page24, cpage);
+        tcx24_reset_dirty(ts, page_min, page_max, page24, cpage);
     }
 }
 
@@ -394,7 +524,6 @@ static void tcx24_invalidate_display(void *opaque)
     TCXState *s = opaque;
 
     tcx_set_dirty(s);
-    tcx24_set_dirty(s);
     qemu_console_resize(s->con, s->width, s->height);
 }
 
@@ -403,12 +532,7 @@ static int vmstate_tcx_post_load(void *opaque, int version_id)
     TCXState *s = opaque;
 
     update_palette_entries(s, 0, 256);
-    if (s->depth == 24) {
-        tcx24_set_dirty(s);
-    } else {
-        tcx_set_dirty(s);
-    }
-
+    tcx_set_dirty(s);
     return 0;
 }
 
@@ -436,56 +560,87 @@ static void tcx_reset(DeviceState *d)
     TCXState *s = TCX(d);
 
     /* Initialize palette */
-    memset(s->r, 0, 256);
-    memset(s->g, 0, 256);
-    memset(s->b, 0, 256);
+    memset(s->r, 0, 260);
+    memset(s->g, 0, 260);
+    memset(s->b, 0, 260);
     s->r[255] = s->g[255] = s->b[255] = 255;
-    update_palette_entries(s, 0, 256);
+    s->r[256] = s->g[256] = s->b[256] = 255;
+    s->r[258] = s->g[258] = s->b[258] = 255;
+    update_palette_entries(s, 0, 260);
     memset(s->vram, 0, MAXX*MAXY);
     memory_region_reset_dirty(&s->vram_mem, 0, MAXX * MAXY * (1 + 4 + 4),
                               DIRTY_MEMORY_VGA);
     s->dac_index = 0;
     s->dac_state = 0;
+    s->cursx = 0xF000; /*Put cursor off screen */
+    s->cursy = 0xF000;
 }
 
 static uint64_t tcx_dac_readl(void *opaque, hwaddr addr,
                               unsigned size)
 {
-    return 0;
+    TCXState *s = opaque;
+    uint32_t val = 0;
+
+    switch (s->dac_state) {
+    case 0:
+        val = s->r[s->dac_index] << 24;
+        s->dac_state++;
+        break;
+    case 1:
+        val = s->g[s->dac_index] << 24;
+        s->dac_state++;
+        break;
+    case 2:
+        val = s->b[s->dac_index] << 24;
+        s->dac_index = (s->dac_index + 1) & 255; /* Index autoincrement */
+    default:
+        s->dac_state = 0;
+        break;
+    }
+
+    return val;
 }
 
 static void tcx_dac_writel(void *opaque, hwaddr addr, uint64_t val,
                            unsigned size)
 {
     TCXState *s = opaque;
+    unsigned index;
 
     switch (addr) {
-    case 0:
+    case 0: /* Address */
         s->dac_index = val >> 24;
         s->dac_state = 0;
         break;
-    case 4:
+    case 4:  /* Pixel colours */
+    case 12: /* Overlay (cursor) colours */
+        if (addr & 8) {
+            index = (s->dac_index & 3) + 256;
+        } else {
+            index = s->dac_index;
+        }
         switch (s->dac_state) {
         case 0:
-            s->r[s->dac_index] = val >> 24;
-            update_palette_entries(s, s->dac_index, s->dac_index + 1);
+            s->r[index] = val >> 24;
+            update_palette_entries(s, index, index + 1);
             s->dac_state++;
             break;
         case 1:
-            s->g[s->dac_index] = val >> 24;
-            update_palette_entries(s, s->dac_index, s->dac_index + 1);
+            s->g[index] = val >> 24;
+            update_palette_entries(s, index, index + 1);
             s->dac_state++;
             break;
         case 2:
-            s->b[s->dac_index] = val >> 24;
-            update_palette_entries(s, s->dac_index, s->dac_index + 1);
-            s->dac_index = (s->dac_index + 1) & 255; // Index autoincrement
+            s->b[index] = val >> 24;
+            update_palette_entries(s, index, index + 1);
+            s->dac_index = (s->dac_index + 1) & 255; /* Index autoincrement */
         default:
             s->dac_state = 0;
             break;
         }
         break;
-    default:
+    default: /* Control registers */
         break;
     }
 }
@@ -500,20 +655,267 @@ static const MemoryRegionOps tcx_dac_ops = {
     },
 };
 
-static uint64_t dummy_readl(void *opaque, hwaddr addr,
+static uint64_t tcx_stip_readl(void *opaque, hwaddr addr,
+                               unsigned size)
+{
+    return 0;
+}
+
+static void tcx_stip_writel(void *opaque, hwaddr addr,
+                            uint64_t val, unsigned size)
+{
+    TCXState *s = opaque;
+    int i;
+    uint32_t col;
+
+    if (!(addr & 4)) {
+        s->tmpblit = val;
+    } else {
+        addr = (addr / 8) & 0xFFFFF;
+        col = cpu_to_be32(s->tmpblit);
+        if (s->depth == 24) {
+            for (i = 0; i < 32; i++)  {
+                if (val & 0x80000000) {
+                    s->vram[addr + i] = s->tmpblit;
+                    s->vram24[addr + i] = col;
+                }
+                val <<= 1;
+            }
+        } else {
+            for (i = 0; i < 32; i++)  {
+                if (val & 0x80000000) {
+                    s->vram[addr + i] = s->tmpblit;
+                }
+                val <<= 1;
+            }
+        }
+        memory_region_set_dirty(&s->vram_mem, addr, 32);
+    }
+}
+
+static void tcx_rstip_writel(void *opaque, hwaddr addr,
+                             uint64_t val, unsigned size)
+{
+    TCXState *s = opaque;
+    int i;
+    uint32_t col;
+
+    if (!(addr & 4)) {
+        s->tmpblit = val;
+    } else {
+        addr = (addr / 8) & 0xFFFFF;
+        col = cpu_to_be32(s->tmpblit);
+        if (s->depth == 24) {
+            for (i = 0; i < 32; i++) {
+                if (val&0x80000000) {
+                    s->vram[addr + i] = s->tmpblit;
+                    s->vram24[addr + i] = col;
+                    s->cplane[addr + i] = col;
+                }
+                val <<= 1;
+            }
+        } else {
+            for (i = 0; i < 32; i++)  {
+                if (val&0x80000000) {
+                    s->vram[addr + i] = s->tmpblit;
+                }
+                val <<= 1;
+            }
+        }
+        memory_region_set_dirty(&s->vram_mem, addr, 32);
+    }
+}
+
+static const MemoryRegionOps tcx_stip_ops = {
+    .read = tcx_stip_readl,
+    .write = tcx_stip_writel,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static const MemoryRegionOps tcx_rstip_ops = {
+    .read = tcx_stip_readl,
+    .write = tcx_rstip_writel,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static uint64_t tcx_blit_readl(void *opaque, hwaddr addr,
+                               unsigned size)
+{
+    return 0;
+}
+
+static void tcx_blit_writel(void *opaque, hwaddr addr,
+                            uint64_t val, unsigned size)
+{
+    TCXState *s = opaque;
+    uint32_t adsr, len;
+    int i;
+
+    if (!(addr & 4)) {
+        s->tmpblit = val;
+    } else {
+        addr = (addr / 8) & 0xFFFFF;
+        adsr = val & 0xFFFFFF;
+        len = ((val >> 24) & 0x1F) + 1;
+        if (adsr == 0xFFFFFF) {
+            memset(&s->vram[addr], s->tmpblit, len);
+            if (s->depth == 24) {
+                val = s->tmpblit & 0xFFFFFF;
+                val = cpu_to_be32(val);
+                for (i = 0; i < len; i++) {
+                    s->vram24[addr + i] = val;
+                }
+            }
+        } else {
+            memcpy(&s->vram[addr], &s->vram[adsr], len);
+            if (s->depth == 24) {
+                memcpy(&s->vram24[addr], &s->vram24[adsr], len * 4);
+            }
+        }
+        memory_region_set_dirty(&s->vram_mem, addr, len);
+    }
+}
+
+static void tcx_rblit_writel(void *opaque, hwaddr addr,
+                         uint64_t val, unsigned size)
+{
+    TCXState *s = opaque;
+    uint32_t adsr, len;
+    int i;
+
+    if (!(addr & 4)) {
+        s->tmpblit = val;
+    } else {
+        addr = (addr / 8) & 0xFFFFF;
+        adsr = val & 0xFFFFFF;
+        len = ((val >> 24) & 0x1F) + 1;
+        if (adsr == 0xFFFFFF) {
+            memset(&s->vram[addr], s->tmpblit, len);
+            if (s->depth == 24) {
+                val = s->tmpblit & 0xFFFFFF;
+                val = cpu_to_be32(val);
+                for (i = 0; i < len; i++) {
+                    s->vram24[addr + i] = val;
+                    s->cplane[addr + i] = val;
+                }
+            }
+        } else {
+            memcpy(&s->vram[addr], &s->vram[adsr], len);
+            if (s->depth == 24) {
+                memcpy(&s->vram24[addr], &s->vram24[adsr], len * 4);
+                memcpy(&s->cplane[addr], &s->cplane[adsr], len * 4);
+            }
+        }
+        memory_region_set_dirty(&s->vram_mem, addr, len);
+    }
+}
+
+static const MemoryRegionOps tcx_blit_ops = {
+    .read = tcx_blit_readl,
+    .write = tcx_blit_writel,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static const MemoryRegionOps tcx_rblit_ops = {
+    .read = tcx_blit_readl,
+    .write = tcx_rblit_writel,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static void tcx_invalidate_cursor_position(TCXState *s)
+{
+    int ymin, ymax, start, end;
+
+    /* invalidate only near the cursor */
+    ymin = s->cursy;
+    if (ymin >= s->height) {
+        return;
+    }
+    ymax = MIN(s->height, ymin + 32);
+    start = ymin * 1024;
+    end   = ymax * 1024;
+
+    memory_region_set_dirty(&s->vram_mem, start, end-start);
+}
+
+static uint64_t tcx_thc_readl(void *opaque, hwaddr addr,
+                            unsigned size)
+{
+    TCXState *s = opaque;
+    uint64_t val;
+
+    if (addr == TCX_THC_MISC) {
+        val = s->thcmisc | 0x02000000;
+    } else {
+        val = 0;
+    }
+    return val;
+}
+
+static void tcx_thc_writel(void *opaque, hwaddr addr,
+                         uint64_t val, unsigned size)
+{
+    TCXState *s = opaque;
+
+    if (addr == TCX_THC_CURSXY) {
+        tcx_invalidate_cursor_position(s);
+        s->cursx = val >> 16;
+        s->cursy = val;
+        tcx_invalidate_cursor_position(s);
+    } else if (addr >= TCX_THC_CURSMASK && addr < TCX_THC_CURSMASK + 128) {
+        s->cursmask[(addr - TCX_THC_CURSMASK) / 4] = val;
+        tcx_invalidate_cursor_position(s);
+    } else if (addr >= TCX_THC_CURSBITS && addr < TCX_THC_CURSBITS + 128) {
+        s->cursbits[(addr - TCX_THC_CURSBITS) / 4] = val;
+        tcx_invalidate_cursor_position(s);
+    } else if (addr == TCX_THC_MISC) {
+        s->thcmisc = val;
+    }
+
+}
+
+static const MemoryRegionOps tcx_thc_ops = {
+    .read = tcx_thc_readl,
+    .write = tcx_thc_writel,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static uint64_t tcx_dummy_readl(void *opaque, hwaddr addr,
                             unsigned size)
 {
+    /* printf ("TCX Dummy (ALT,DHC,TEC) READ %X %i\n",(int)addr,(int)size);*/
     return 0;
 }
 
-static void dummy_writel(void *opaque, hwaddr addr,
+static void tcx_dummy_writel(void *opaque, hwaddr addr,
                          uint64_t val, unsigned size)
 {
+    /* printf ("TCX Dummy (ALT,DHC,TEC) WRITE %X %X\n",(int)addr,(int)val);*/
 }
 
-static const MemoryRegionOps dummy_ops = {
-    .read = dummy_readl,
-    .write = dummy_writel,
+static const MemoryRegionOps tcx_dummy_ops = {
+    .read = tcx_dummy_readl,
+    .write = tcx_dummy_writel,
     .endianness = DEVICE_NATIVE_ENDIAN,
     .valid = {
         .min_access_size = 4,
@@ -544,7 +946,7 @@ static int tcx_init1(SysBusDevice *dev)
     vmstate_register_ram_global(&s->vram_mem);
     vram_base = memory_region_get_ram_ptr(&s->vram_mem);
 
-    /* FCode ROM */
+    /* 10/ROM   : FCode ROM */
     memory_region_init_ram(&s->rom, NULL, "tcx.prom", FCODE_MAX_ROM_SIZE);
     vmstate_register_ram_global(&s->rom);
     memory_region_set_readonly(&s->rom, true);
@@ -560,7 +962,7 @@ static int tcx_init1(SysBusDevice *dev)
         }
     }
 
-    /* 8-bit plane */
+    /*  0/DFB8  : 8-bit plane */
     s->vram = vram_base;
     size = s->vram_size;
     memory_region_init_alias(&s->vram_8bit, OBJECT(s), "tcx.vram.8bit",
@@ -569,50 +971,85 @@ static int tcx_init1(SysBusDevice *dev)
     vram_offset += size;
     vram_base += size;
 
-    /* DAC */
-    memory_region_init_io(&s->dac, OBJECT(s), &tcx_dac_ops, s,
-                          "tcx.dac", TCX_DAC_NREGS);
-    sysbus_init_mmio(dev, &s->dac);
+    /*  1/DFB24 : 24bit plane */
+    size = s->vram_size * 4;
+    s->vram24 = (uint32_t *)vram_base;
+    s->vram24_offset = vram_offset;
+    memory_region_init_alias(&s->vram_24bit, OBJECT(s), "tcx.vram.24bit",
+                             &s->vram_mem, vram_offset, size);
+    sysbus_init_mmio(dev, &s->vram_24bit);
+    vram_offset += size;
+    vram_base += size;
+
+    /*  2/STIP : Stippler */
+    memory_region_init_io(&s->stip, OBJECT(s), &tcx_stip_ops, s, "tcx.stip",
+                          TCX_STIP_NREGS);
+    sysbus_init_mmio(dev, &s->stip);
+
+    /*  3/BLIT : Blitter */
+    memory_region_init_io(&s->blit, OBJECT(s), &tcx_blit_ops, s, "tcx.blit",
+                          TCX_BLIT_NREGS);
+    sysbus_init_mmio(dev, &s->blit);
+
+    /*  4/RDFB32 : Raw Framebuffer */
+    size = s->vram_size * 4;
+    s->cplane = (uint32_t *)vram_base;
+    s->cplane_offset = vram_offset;
+    memory_region_init_alias(&s->vram_cplane, OBJECT(s), "tcx.vram.cplane",
+                             &s->vram_mem, vram_offset, size);
+    sysbus_init_mmio(dev, &s->vram_cplane);
 
-    /* TEC (dummy) */
-    memory_region_init_io(&s->tec, OBJECT(s), &dummy_ops, s,
+    /*  5/RSTIP : Raw Stippler */
+    memory_region_init_io(&s->rstip, OBJECT(s), &tcx_rstip_ops, s, "tcx.rstip",
+                          TCX_RSTIP_NREGS);
+    sysbus_init_mmio(dev, &s->rstip);
+
+    /*  6/RBLIT : Raw Blitter */
+    memory_region_init_io(&s->rblit, OBJECT(s), &tcx_rblit_ops, s, "tcx.rblit",
+                          TCX_RBLIT_NREGS);
+    sysbus_init_mmio(dev, &s->rblit);
+
+    /*  7/TEC : ??? */
+    memory_region_init_io(&s->tec, OBJECT(s), &tcx_dummy_ops, s,
                           "tcx.tec", TCX_TEC_NREGS);
     sysbus_init_mmio(dev, &s->tec);
-    /* THC: NetBSD writes here even with 8-bit display: dummy */
-    memory_region_init_io(&s->thc24, OBJECT(s), &dummy_ops, s, "tcx.thc24",
-                          TCX_THC_NREGS_24);
-    sysbus_init_mmio(dev, &s->thc24);
-
-    if (s->depth == 24) {
-        /* 24-bit plane */
-        size = s->vram_size * 4;
-        s->vram24 = (uint32_t *)vram_base;
-        s->vram24_offset = vram_offset;
-        memory_region_init_alias(&s->vram_24bit, OBJECT(s), "tcx.vram.24bit",
-                                 &s->vram_mem, vram_offset, size);
-        sysbus_init_mmio(dev, &s->vram_24bit);
-        vram_offset += size;
-        vram_base += size;
-
-        /* Control plane */
-        size = s->vram_size * 4;
-        s->cplane = (uint32_t *)vram_base;
-        s->cplane_offset = vram_offset;
-        memory_region_init_alias(&s->vram_cplane, OBJECT(s), "tcx.vram.cplane",
-                                 &s->vram_mem, vram_offset, size);
-        sysbus_init_mmio(dev, &s->vram_cplane);
 
-        s->con = graphic_console_init(DEVICE(dev), &tcx24_ops, s);
-    } else {
-        /* THC 8 bit (dummy) */
-        memory_region_init_io(&s->thc8, OBJECT(s), &dummy_ops, s, "tcx.thc8",
-                              TCX_THC_NREGS_8);
-        sysbus_init_mmio(dev, &s->thc8);
+    /*  8/CMAP : DAC */
+    memory_region_init_io(&s->dac, OBJECT(s), &tcx_dac_ops, s,
+                          "tcx.dac", TCX_DAC_NREGS);
+    sysbus_init_mmio(dev, &s->dac);
+
+    /*  9/THC : Cursor */
+    memory_region_init_io(&s->thc, OBJECT(s), &tcx_thc_ops, s, "tcx.thc",
+                          TCX_THC_NREGS);
+    sysbus_init_mmio(dev, &s->thc);
+
+    /* 11/DHC : ??? */
+    memory_region_init_io(&s->dhc, OBJECT(s), &tcx_dummy_ops, s, "tcx.dhc",
+                          TCX_DHC_NREGS);
+    sysbus_init_mmio(dev, &s->dhc);
+
+    /* 12/ALT : ??? */
+    memory_region_init_io(&s->alt, OBJECT(s), &tcx_dummy_ops, s, "tcx.alt",
+                          TCX_ALT_NREGS);
+    sysbus_init_mmio(dev, &s->alt);
+
+    /* 9/THC24bits : NetBSD writes here even with 8-bit display: dummy */
+    if (s->depth == 8) {
+        memory_region_init_io(&s->thc24, OBJECT(s), &tcx_dummy_ops, s,
+                              "tcx.thc24", TCX_THC_NREGS);
+        sysbus_init_mmio(dev, &s->thc24);
+    }
 
+    if (s->depth == 8) {
         s->con = graphic_console_init(DEVICE(dev), &tcx_ops, s);
+    } else {
+        s->con = graphic_console_init(DEVICE(dev), &tcx24_ops, s);
     }
+    s->thcmisc = 0;
 
     qemu_console_resize(s->con, s->width, s->height);
+
     return 0;
 }
 
diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c
index 2957d90..7eee180 100644
--- a/hw/sparc/sun4m.c
+++ b/hw/sparc/sun4m.c
@@ -540,24 +540,40 @@ static void tcx_init(hwaddr addr, int vram_size, int width,
     qdev_prop_set_uint64(dev, "prom_addr", addr);
     qdev_init_nofail(dev);
     s = SYS_BUS_DEVICE(dev);
-    /* FCode ROM */
+
+    /* 10/ROM   : FCode ROM */
     sysbus_mmio_map(s, 0, addr);
-    /* 8-bit plane */
+    /*  0/DFB8  : 8-bit plane */
     sysbus_mmio_map(s, 1, addr + 0x00800000ULL);
-    /* DAC */
-    sysbus_mmio_map(s, 2, addr + 0x00200000ULL);
-    /* TEC (dummy) */
-    sysbus_mmio_map(s, 3, addr + 0x00700000ULL);
-    /* THC 24 bit: NetBSD writes here even with 8-bit display: dummy */
-    sysbus_mmio_map(s, 4, addr + 0x00301000ULL);
-    if (depth == 24) {
-        /* 24-bit plane */
-        sysbus_mmio_map(s, 5, addr + 0x02000000ULL);
-        /* Control plane */
-        sysbus_mmio_map(s, 6, addr + 0x0a000000ULL);
+    /*  1/DFB24 : 24bit plane */
+    sysbus_mmio_map(s, 2, addr + 0x02000000ULL);
+    /*  2/STIP  : Stipple */
+    sysbus_mmio_map(s, 3, addr + 0x04000000ULL);
+    /*  3/BLIT  : Blitter */
+    sysbus_mmio_map(s, 4, addr + 0x06000000ULL);
+    /*  4/RDFB32: Raw framebuffer. Control plane */
+    sysbus_mmio_map(s, 5, addr + 0x0A000000ULL);
+    /*  5/RSTIP : Raw Stipple */
+    sysbus_mmio_map(s, 6, addr + 0x0C000000ULL);
+    /*  6/RBLIT : Raw Blitter */
+    sysbus_mmio_map(s, 7, addr + 0x0E000000ULL);
+    /*  7/TEC   : Transform Engine */
+    sysbus_mmio_map(s, 8, addr + 0x00700000ULL);
+    /*  8/CMAP  : DAC */
+    sysbus_mmio_map(s, 9, addr + 0x00200000ULL);
+    /*  9/THC   : */
+    if (depth == 8) {
+        sysbus_mmio_map(s, 10, addr + 0x00300000ULL);
     } else {
-        /* THC 8 bit (dummy) */
-        sysbus_mmio_map(s, 5, addr + 0x00300000ULL);
+        sysbus_mmio_map(s, 10, addr + 0x00301000ULL);
+    }
+    /* 11/DHC   : */
+    sysbus_mmio_map(s, 11, addr + 0x00240000ULL);
+    /* 12/ALT   : */
+    sysbus_mmio_map(s, 12, addr + 0x00280000ULL);
+    /* 9/THC24bits : NetBSD writes here even with 8-bit display: dummy */
+    if (depth == 8) {
+        sysbus_mmio_map(s, 13, addr + 0x00301000ULL);
     }
 }
 
-- 
1.8.1.5

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [Qemu-devel] [PATCH 1/1] Sun4m : TCX framebuffer hardware acceleration
  2014-05-25 13:20   ` Olivier Danet
@ 2014-05-25 20:28     ` Mark Cave-Ayland
       [not found]       ` <53878F06.5030902@caramail.com>
  0 siblings, 1 reply; 6+ messages in thread
From: Mark Cave-Ayland @ 2014-05-25 20:28 UTC (permalink / raw)
  To: Olivier Danet, qemu-devel, Blue Swirl, Artyom Tarasenko

On 25/05/14 14:20, Olivier Danet wrote:

> Here is the original patch, I have changed email settings since then, it should work better.
> Alas, I have not merged latest QEMU changes (your CG3/TCX patches), so it will probably not compile as-is...

Thanks for this - don't worry about my latest patches for the moment as 
they are still awaiting review. As it was, I still had to modify the 
patch by hand to get it to apply to master, I think because of the extra 
parameter added to graphic_console_init().

Once applied, I tested the patch with NetBSD 6.1.3 which is what I had 
lying around in my test suite, along with your binary QEMU,tcx.bin ROM.

First impressions are good in that your work fixes the missing white 
background on NetBSD boot. I did notice a problem with the text colour 
though in that the main console text in that it appears green until the 
welcome banner appears, at which point it correctly changes to black. I 
wonder if this is the same problem with the DAC programming I had on the 
cg3 where you can have byte accesses to the DAC registers, and not just 
32-bit accesses? Have a look at the cg3 source for more information.

> [PATCH 1/1] Sun4m : TCX framebuffer hardware acceleration
>
> The S24/TCX framebuffer is a mildly accelerated video card, with
> blitter, stippler and hardware cursor.
> * Solaris and NetBSD 6.x use all the hardware acceleration features.
> * The Xorg driver (used by Linux) can use the hardware cursor only.
>
> This patch implements hardware acceleration in both 8bits and 24bits
> modes. It is based on the NetBSD driver sources and from tests with Solaris.
>
> Signed-off-by: Olivier Danet <odanet@caramail.com>
> ---
>   hw/display/tcx.c | 679 +++++++++++++++++++++++++++++++++++++++++++++----------
>   hw/sparc/sun4m.c |  46 ++--
>   2 files changed, 589 insertions(+), 136 deletions(-)
>
> diff --git a/hw/display/tcx.c b/hw/display/tcx.c
> index 873b82c..bcd64e5 100644
> --- a/hw/display/tcx.c
> +++ b/hw/display/tcx.c
> @@ -33,17 +33,26 @@
>
>   #define MAXX 1024
>   #define MAXY 768
> -#define TCX_DAC_NREGS 16
> -#define TCX_THC_NREGS_8  0x081c
> -#define TCX_THC_NREGS_24 0x1000
> +#define TCX_DAC_NREGS    16
> +#define TCX_THC_NREGS    0x1000
> +#define TCX_DHC_NREGS    0x4000
>   #define TCX_TEC_NREGS    0x1000
> +#define TCX_ALT_NREGS    0x8000
> +#define TCX_STIP_NREGS   0x800000
> +#define TCX_BLIT_NREGS   0x800000
> +#define TCX_RSTIP_NREGS  0x800000
> +#define TCX_RBLIT_NREGS  0x800000
> +
> +#define TCX_THC_MISC     0x818
> +#define TCX_THC_CURSXY   0x8fc
> +#define TCX_THC_CURSMASK 0x900
> +#define TCX_THC_CURSBITS 0x980
>
>   #define TYPE_TCX "SUNW,tcx"
>   #define TCX(obj) OBJECT_CHECK(TCXState, (obj), TYPE_TCX)
>
>   typedef struct TCXState {
>       SysBusDevice parent_obj;
> -
>       QemuConsole *con;
>       uint8_t *vram;
>       uint32_t *vram24, *cplane;
> @@ -52,17 +61,30 @@ typedef struct TCXState {
>       MemoryRegion vram_mem;
>       MemoryRegion vram_8bit;
>       MemoryRegion vram_24bit;
> +    MemoryRegion stip;
> +    MemoryRegion blit;
>       MemoryRegion vram_cplane;
> -    MemoryRegion dac;
> +    MemoryRegion rstip;
> +    MemoryRegion rblit;
>       MemoryRegion tec;
> +    MemoryRegion dac;
> +    MemoryRegion thc;
> +    MemoryRegion dhc;
> +    MemoryRegion alt;
>       MemoryRegion thc24;
> -    MemoryRegion thc8;
> +
>       ram_addr_t vram24_offset, cplane_offset;
> +    uint32_t tmpblit;
>       uint32_t vram_size;
> -    uint32_t palette[256];
> -    uint8_t r[256], g[256], b[256];
> +    uint32_t palette[260];
> +    uint8_t r[260], g[260], b[260];
>       uint16_t width, height, depth;
>       uint8_t dac_index, dac_state;
> +    uint32_t thcmisc;
> +    uint32_t cursmask[32];
> +    uint32_t cursbits[32];
> +    uint16_t cursx;
> +    uint16_t cursy;
>   } TCXState;
>
>   static void tcx_set_dirty(TCXState *s)
> @@ -70,10 +92,36 @@ static void tcx_set_dirty(TCXState *s)
>       memory_region_set_dirty(&s->vram_mem, 0, MAXX * MAXY);
>   }
>
> -static void tcx24_set_dirty(TCXState *s)
> +static inline int tcx24_check_dirty(TCXState *s, ram_addr_t page,
> +                                    ram_addr_t page24, ram_addr_t cpage)
>   {
> -    memory_region_set_dirty(&s->vram_mem, s->vram24_offset, MAXX * MAXY * 4);
> -    memory_region_set_dirty(&s->vram_mem, s->cplane_offset, MAXX * MAXY * 4);
> +    int ret;
> +
> +    ret = memory_region_get_dirty(&s->vram_mem, page, TARGET_PAGE_SIZE,
> +                                  DIRTY_MEMORY_VGA);
> +    ret |= memory_region_get_dirty(&s->vram_mem, page24, TARGET_PAGE_SIZE * 4,
> +                                   DIRTY_MEMORY_VGA);
> +    ret |= memory_region_get_dirty(&s->vram_mem, cpage, TARGET_PAGE_SIZE * 4,
> +                                   DIRTY_MEMORY_VGA);
> +    return ret;
> +}
> +
> +static inline void tcx24_reset_dirty(TCXState *ts, ram_addr_t page_min,
> +                               ram_addr_t page_max, ram_addr_t page24,
> +                              ram_addr_t cpage)
> +{
> +    memory_region_reset_dirty(&ts->vram_mem,
> +                              page_min,
> +                              (page_max - page_min) + TARGET_PAGE_SIZE,
> +                              DIRTY_MEMORY_VGA);
> +    memory_region_reset_dirty(&ts->vram_mem,
> +                              page24 + page_min * 4,
> +                              (page_max - page_min) * 4 + TARGET_PAGE_SIZE,
> +                              DIRTY_MEMORY_VGA);
> +    memory_region_reset_dirty(&ts->vram_mem,
> +                              cpage + page_min * 4,
> +                              (page_max - page_min) * 4 + TARGET_PAGE_SIZE,
> +                              DIRTY_MEMORY_VGA);
>   }

Some changes to the dirty functionality here.

>   static void update_palette_entries(TCXState *s, int start, int end)
> @@ -102,11 +150,7 @@ static void update_palette_entries(TCXState *s, int start, int end)
>               break;
>           }
>       }
> -    if (s->depth == 24) {
> -        tcx24_set_dirty(s);
> -    } else {
> -        tcx_set_dirty(s);
> -    }
> +    tcx_set_dirty(s);
>   }
>
>   static void tcx_draw_line32(TCXState *s1, uint8_t *d,
> @@ -116,7 +160,7 @@ static void tcx_draw_line32(TCXState *s1, uint8_t *d,
>       uint8_t val;
>       uint32_t *p = (uint32_t *)d;
>
> -    for(x = 0; x < width; x++) {
> +    for (x = 0; x < width; x++) {

Unnecessary white space change.

>           val = *s++;
>           *p++ = s1->palette[val];
>       }
> @@ -129,7 +173,7 @@ static void tcx_draw_line16(TCXState *s1, uint8_t *d,
>       uint8_t val;
>       uint16_t *p = (uint16_t *)d;
>
> -    for(x = 0; x < width; x++) {
> +    for (x = 0; x < width; x++) {

And here too.

>           val = *s++;
>           *p++ = s1->palette[val];
>       }
> @@ -147,6 +191,83 @@ static void tcx_draw_line8(TCXState *s1, uint8_t *d,
>       }
>   }
>
> +static void tcx_draw_cursor32(TCXState *s1, uint8_t *d,
> +                              int y, int width)
> +{
> +    int x, len;
> +    uint32_t mask, bits;
> +    uint32_t *p = (uint32_t *)d;
> +
> +    y = y - s1->cursy;
> +    mask = s1->cursmask[y];
> +    bits = s1->cursbits[y];
> +    len = MIN(width - s1->cursx, 32);
> +    p = &p[s1->cursx];
> +    for (x = 0; x < len; x++) {
> +        if (mask & 0x80000000) {
> +            if (bits & 0x80000000) {
> +                *p = s1->palette[259];
> +            } else {
> +                *p = s1->palette[258];
> +            }
> +        }
> +        p++;
> +        mask <<= 1;
> +        bits <<= 1;
> +    }
> +}
> +
> +static void tcx_draw_cursor16(TCXState *s1, uint8_t *d,
> +                              int y, int width)
> +{
> +    int x, len;
> +    uint32_t mask, bits;
> +    uint16_t *p = (uint16_t *)d;
> +
> +    y = y - s1->cursy;
> +    mask = s1->cursmask[y];
> +    bits = s1->cursbits[y];
> +    len = MIN(width - s1->cursx, 32);
> +    p = &p[s1->cursx];
> +    for (x = 0; x < len; x++) {
> +        if (mask & 0x80000000) {
> +            if (bits & 0x80000000) {
> +                *p = s1->palette[259];
> +            } else {
> +                *p = s1->palette[258];
> +            }
> +        }
> +        p++;
> +        mask <<= 1;
> +        bits <<= 1;
> +    }
> +}
> +
> +static void tcx_draw_cursor8(TCXState *s1, uint8_t *d,
> +                              int y, int width)
> +{
> +    int x, len;
> +    uint32_t mask, bits;
> +
> +    y = y - s1->cursy;
> +    mask = s1->cursmask[y];
> +    bits = s1->cursbits[y];
> +    len = MIN(width - s1->cursx, 32);
> +    d = &d[s1->cursx];
> +    for (x = 0; x < len; x++) {
> +        if (mask & 0x80000000) {
> +            if (bits & 0x80000000) {
> +                *d = s1->palette[259];
> +            } else {
> +                *d = s1->palette[258];
> +            }
> +        }
> +        d++;
> +        mask <<= 1;
> +        bits <<= 1;
> +    }
> +}
> +
>   /*
>     XXX Could be much more optimal:
>     * detect if line/page/whole screen is in 24 bit mode
> @@ -162,11 +283,10 @@ static inline void tcx24_draw_line32(TCXState *s1, uint8_t *d,
>       uint8_t val, *p8;
>       uint32_t *p = (uint32_t *)d;
>       uint32_t dval;
> -
>       bgr = is_surface_bgr(surface);
>       for(x = 0; x < width; x++, s++, s24++) {
> -        if ((be32_to_cpu(*cplane++) & 0xff000000) == 0x03000000) {
> -            // 24-bit direct, BGR order
> +        if (be32_to_cpu(*cplane) & 0x03000000) {
> +            /* 24-bit direct, BGR order */
>               p8 = (uint8_t *)s24;
>               p8++;
>               b = *p8++;
> @@ -177,47 +297,18 @@ static inline void tcx24_draw_line32(TCXState *s1, uint8_t *d,
>               else
>                   dval = rgb_to_pixel32(r, g, b);
>           } else {
> +            /* 8 bits pseudocolor */
>               val = *s;
>               dval = s1->palette[val];
>           }
>           *p++ = dval;
> +        cplane++;

AFAICT the change here for cplane isn't required?

>       }
>   }
>
> -static inline int check_dirty(TCXState *s, ram_addr_t page, ram_addr_t page24,
> -                              ram_addr_t cpage)
> -{
> -    int ret;
> -
> -    ret = memory_region_get_dirty(&s->vram_mem, page, TARGET_PAGE_SIZE,
> -                                  DIRTY_MEMORY_VGA);
> -    ret |= memory_region_get_dirty(&s->vram_mem, page24, TARGET_PAGE_SIZE * 4,
> -                                   DIRTY_MEMORY_VGA);
> -    ret |= memory_region_get_dirty(&s->vram_mem, cpage, TARGET_PAGE_SIZE * 4,
> -                                   DIRTY_MEMORY_VGA);
> -    return ret;
> -}
> -
> -static inline void reset_dirty(TCXState *ts, ram_addr_t page_min,
> -                               ram_addr_t page_max, ram_addr_t page24,
> -                              ram_addr_t cpage)
> -{
> -    memory_region_reset_dirty(&ts->vram_mem,
> -                              page_min,
> -                              (page_max - page_min) + TARGET_PAGE_SIZE,
> -                              DIRTY_MEMORY_VGA);
> -    memory_region_reset_dirty(&ts->vram_mem,
> -                              page24 + page_min * 4,
> -                              (page_max - page_min) * 4 + TARGET_PAGE_SIZE,
> -                              DIRTY_MEMORY_VGA);
> -    memory_region_reset_dirty(&ts->vram_mem,
> -                              cpage + page_min * 4,
> -                              (page_max - page_min) * 4 + TARGET_PAGE_SIZE,
> -                              DIRTY_MEMORY_VGA);
> -}
> -
>   /* Fixed line length 1024 allows us to do nice tricks not possible on
>      VGA... */
> +

More unneeded whitespace changes.

>   static void tcx_update_display(void *opaque)
>   {
>       TCXState *ts = opaque;
> @@ -226,6 +317,7 @@ static void tcx_update_display(void *opaque)
>       int y, y_start, dd, ds;
>       uint8_t *d, *s;
>       void (*f)(TCXState *s1, uint8_t *dst, const uint8_t *src, int width);
> +    void (*fc)(TCXState *s1, uint8_t *dst, int y, int width);
>
>       if (surface_bits_per_pixel(surface) == 0) {
>           return;
> @@ -243,20 +335,23 @@ static void tcx_update_display(void *opaque)
>       switch (surface_bits_per_pixel(surface)) {
>       case 32:
>           f = tcx_draw_line32;
> +        fc = tcx_draw_cursor32;
>           break;
>       case 15:
>       case 16:
>           f = tcx_draw_line16;
> +        fc = tcx_draw_cursor16;
>           break;
>       default:
>       case 8:
>           f = tcx_draw_line8;
> +        fc = tcx_draw_cursor8;
>           break;
>       case 0:
>           return;
>       }
>
> -    for(y = 0; y < ts->height; y += 4, page += TARGET_PAGE_SIZE) {
> +    for (y = 0; y < ts->height; page += TARGET_PAGE_SIZE) {

And another whitespace.

>           if (memory_region_get_dirty(&ts->vram_mem, page, TARGET_PAGE_SIZE,
>                                       DIRTY_MEMORY_VGA)) {
>               if (y_start < 0)
> @@ -265,18 +360,35 @@ static void tcx_update_display(void *opaque)
>                   page_min = page;
>               if (page > page_max)
>                   page_max = page;
> +
>               f(ts, d, s, ts->width);
> +            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {

Check whitespace for cursy+32 in coding style for this entire section.

> +                fc(ts, d, y, ts->width);
> +            }
>               d += dd;
>               s += ds;
> +            y++;
>               f(ts, d, s, ts->width);
> +            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
> +                fc(ts, d, y, ts->width);
> +            }
>               d += dd;
>               s += ds;
> +            y++;
>               f(ts, d, s, ts->width);
> +            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
> +                fc(ts, d, y, ts->width);
> +            }
>               d += dd;
>               s += ds;
> +            y++;
>               f(ts, d, s, ts->width);
> +            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
> +                fc(ts, d, y, ts->width);
> +            }
>               d += dd;
>               s += ds;
> +            y++;
>           } else {
>               if (y_start >= 0) {
>                   /* flush to display */
> @@ -286,6 +398,7 @@ static void tcx_update_display(void *opaque)
>               }
>               d += dd * 4;
>               s += ds * 4;
> +            y += 4;
>           }
>       }
>       if (y_start >= 0) {
> @@ -328,9 +441,9 @@ static void tcx24_update_display(void *opaque)
>       dd = surface_stride(surface);
>       ds = 1024;
>
> -    for(y = 0; y < ts->height; y += 4, page += TARGET_PAGE_SIZE,
> +    for (y = 0; y < ts->height; page += TARGET_PAGE_SIZE,

And here too.

>               page24 += TARGET_PAGE_SIZE, cpage += TARGET_PAGE_SIZE) {
> -        if (check_dirty(ts, page, page24, cpage)) {
> +        if (tcx24_check_dirty(ts, page, page24, cpage)) {
>               if (y_start < 0)
>                   y_start = y;
>               if (page < page_min)
> @@ -338,25 +451,41 @@ static void tcx24_update_display(void *opaque)
>               if (page > page_max)
>                   page_max = page;
>               tcx24_draw_line32(ts, d, s, ts->width, cptr, s24);
> +            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
> +                tcx_draw_cursor32(ts, d, y, ts->width);
> +            }
>               d += dd;
>               s += ds;
>               cptr += ds;
>               s24 += ds;
> +            y++;
>               tcx24_draw_line32(ts, d, s, ts->width, cptr, s24);
> +            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
> +                tcx_draw_cursor32(ts, d, y, ts->width);
> +            }
>               d += dd;
>               s += ds;
>               cptr += ds;
>               s24 += ds;
> +            y++;
>               tcx24_draw_line32(ts, d, s, ts->width, cptr, s24);
> +            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
> +                tcx_draw_cursor32(ts, d, y, ts->width);
> +            }
>               d += dd;
>               s += ds;
>               cptr += ds;
>               s24 += ds;
> +            y++;
>               tcx24_draw_line32(ts, d, s, ts->width, cptr, s24);
> +            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
> +                tcx_draw_cursor32(ts, d, y, ts->width);
> +            }
>               d += dd;
>               s += ds;
>               cptr += ds;
>               s24 += ds;
> +            y++;
>           } else {
>               if (y_start >= 0) {
>                   /* flush to display */
> @@ -368,6 +497,7 @@ static void tcx24_update_display(void *opaque)
>               s += ds * 4;
>               cptr += ds * 4;
>               s24 += ds * 4;
> +            y += 4;
>           }
>       }
>       if (y_start >= 0) {
> @@ -377,7 +507,7 @@ static void tcx24_update_display(void *opaque)
>       }
>       /* reset modified pages */
>       if (page_max >= page_min) {
> -        reset_dirty(ts, page_min, page_max, page24, cpage);
> +        tcx24_reset_dirty(ts, page_min, page_max, page24, cpage);
>       }
>   }
>
> @@ -394,7 +524,6 @@ static void tcx24_invalidate_display(void *opaque)
>       TCXState *s = opaque;
>
>       tcx_set_dirty(s);
> -    tcx24_set_dirty(s);

Is this change correct for a 24-bit display? I guess that as 
tcx_set_dirty() only touches the VRAM now then it should be valid for 
both 8/24 bit displays.

>       qemu_console_resize(s->con, s->width, s->height);
>   }
>
> @@ -403,12 +532,7 @@ static int vmstate_tcx_post_load(void *opaque, int version_id)
>       TCXState *s = opaque;
>
>       update_palette_entries(s, 0, 256);
> -    if (s->depth == 24) {
> -        tcx24_set_dirty(s);
> -    } else {
> -        tcx_set_dirty(s);
> -    }
> -
> +    tcx_set_dirty(s);
>       return 0;
>   }
>
> @@ -436,56 +560,87 @@ static void tcx_reset(DeviceState *d)
>       TCXState *s = TCX(d);
>
>       /* Initialize palette */
> -    memset(s->r, 0, 256);
> -    memset(s->g, 0, 256);
> -    memset(s->b, 0, 256);
> +    memset(s->r, 0, 260);
> +    memset(s->g, 0, 260);
> +    memset(s->b, 0, 260);
>       s->r[255] = s->g[255] = s->b[255] = 255;
> -    update_palette_entries(s, 0, 256);
> +    s->r[256] = s->g[256] = s->b[256] = 255;
> +    s->r[258] = s->g[258] = s->b[258] = 255;
> +    update_palette_entries(s, 0, 260);
>       memset(s->vram, 0, MAXX*MAXY);
>       memory_region_reset_dirty(&s->vram_mem, 0, MAXX * MAXY * (1 + 4 + 4),
>                                 DIRTY_MEMORY_VGA);
>       s->dac_index = 0;
>       s->dac_state = 0;
> +    s->cursx = 0xF000; /*Put cursor off screen */
> +    s->cursy = 0xF000;
>   }

I guess the extra palette colours are for the cursor here?

>   static uint64_t tcx_dac_readl(void *opaque, hwaddr addr,
>                                 unsigned size)
>   {
> -    return 0;
> +    TCXState *s = opaque;
> +    uint32_t val = 0;
> +
> +    switch (s->dac_state) {
> +    case 0:
> +        val = s->r[s->dac_index] << 24;
> +        s->dac_state++;
> +        break;
> +    case 1:
> +        val = s->g[s->dac_index] << 24;
> +        s->dac_state++;
> +        break;
> +    case 2:
> +        val = s->b[s->dac_index] << 24;
> +        s->dac_index = (s->dac_index + 1) & 255; /* Index autoincrement */
> +    default:
> +        s->dac_state = 0;
> +        break;
> +    }
> +
> +    return val;
>   }
>
>   static void tcx_dac_writel(void *opaque, hwaddr addr, uint64_t val,
>                              unsigned size)
>   {
>       TCXState *s = opaque;
> +    unsigned index;
>
>       switch (addr) {
> -    case 0:
> +    case 0: /* Address */
>           s->dac_index = val >> 24;
>           s->dac_state = 0;
>           break;
> -    case 4:
> +    case 4:  /* Pixel colours */
> +    case 12: /* Overlay (cursor) colours */
> +        if (addr & 8) {
> +            index = (s->dac_index & 3) + 256;
> +        } else {
> +            index = s->dac_index;
> +        }
>           switch (s->dac_state) {
>           case 0:
> -            s->r[s->dac_index] = val >> 24;
> -            update_palette_entries(s, s->dac_index, s->dac_index + 1);
> +            s->r[index] = val >> 24;
> +            update_palette_entries(s, index, index + 1);
>               s->dac_state++;
>               break;
>           case 1:
> -            s->g[s->dac_index] = val >> 24;
> -            update_palette_entries(s, s->dac_index, s->dac_index + 1);
> +            s->g[index] = val >> 24;
> +            update_palette_entries(s, index, index + 1);
>               s->dac_state++;
>               break;
>           case 2:
> -            s->b[s->dac_index] = val >> 24;
> -            update_palette_entries(s, s->dac_index, s->dac_index + 1);
> -            s->dac_index = (s->dac_index + 1) & 255; // Index autoincrement
> +            s->b[index] = val >> 24;
> +            update_palette_entries(s, index, index + 1);
> +            s->dac_index = (s->dac_index + 1) & 255; /* Index autoincrement */
>           default:
>               s->dac_state = 0;
>               break;
>           }
>           break;
> -    default:
> +    default: /* Control registers */
>           break;
>       }
>   }
> @@ -500,20 +655,267 @@ static const MemoryRegionOps tcx_dac_ops = {
>       },
>   };
>
> -static uint64_t dummy_readl(void *opaque, hwaddr addr,
> +static uint64_t tcx_stip_readl(void *opaque, hwaddr addr,
> +                               unsigned size)
> +{
> +    return 0;
> +}
> +
> +static void tcx_stip_writel(void *opaque, hwaddr addr,
> +                            uint64_t val, unsigned size)
> +{
> +    TCXState *s = opaque;
> +    int i;
> +    uint32_t col;
> +
> +    if (!(addr & 4)) {
> +        s->tmpblit = val;
> +    } else {
> +        addr = (addr / 8) & 0xFFFFF;
> +        col = cpu_to_be32(s->tmpblit);
> +        if (s->depth == 24) {
> +            for (i = 0; i < 32; i++)  {
> +                if (val & 0x80000000) {
> +                    s->vram[addr + i] = s->tmpblit;
> +                    s->vram24[addr + i] = col;
> +                }
> +                val <<= 1;
> +            }
> +        } else {
> +            for (i = 0; i < 32; i++)  {
> +                if (val & 0x80000000) {
> +                    s->vram[addr + i] = s->tmpblit;
> +                }
> +                val <<= 1;
> +            }
> +        }
> +        memory_region_set_dirty(&s->vram_mem, addr, 32);
> +    }
> +}
> +
> +static void tcx_rstip_writel(void *opaque, hwaddr addr,
> +                             uint64_t val, unsigned size)
> +{
> +    TCXState *s = opaque;
> +    int i;
> +    uint32_t col;
> +
> +    if (!(addr & 4)) {
> +        s->tmpblit = val;
> +    } else {
> +        addr = (addr / 8) & 0xFFFFF;
> +        col = cpu_to_be32(s->tmpblit);
> +        if (s->depth == 24) {
> +            for (i = 0; i < 32; i++) {
> +                if (val&0x80000000) {
> +                    s->vram[addr + i] = s->tmpblit;
> +                    s->vram24[addr + i] = col;
> +                    s->cplane[addr + i] = col;
> +                }
> +                val <<= 1;
> +            }
> +        } else {
> +            for (i = 0; i < 32; i++)  {
> +                if (val&0x80000000) {
> +                    s->vram[addr + i] = s->tmpblit;
> +                }
> +                val <<= 1;
> +            }
> +        }
> +        memory_region_set_dirty(&s->vram_mem, addr, 32);
> +    }
> +}
> +
> +static const MemoryRegionOps tcx_stip_ops = {
> +    .read = tcx_stip_readl,
> +    .write = tcx_stip_writel,
> +    .endianness = DEVICE_NATIVE_ENDIAN,
> +    .valid = {
> +        .min_access_size = 4,
> +        .max_access_size = 4,
> +    },
> +};
> +
> +static const MemoryRegionOps tcx_rstip_ops = {
> +    .read = tcx_stip_readl,
> +    .write = tcx_rstip_writel,
> +    .endianness = DEVICE_NATIVE_ENDIAN,
> +    .valid = {
> +        .min_access_size = 4,
> +        .max_access_size = 4,
> +    },
> +};
> +
> +static uint64_t tcx_blit_readl(void *opaque, hwaddr addr,
> +                               unsigned size)
> +{
> +    return 0;
> +}
> +
> +static void tcx_blit_writel(void *opaque, hwaddr addr,
> +                            uint64_t val, unsigned size)
> +{
> +    TCXState *s = opaque;
> +    uint32_t adsr, len;
> +    int i;
> +
> +    if (!(addr & 4)) {
> +        s->tmpblit = val;
> +    } else {
> +        addr = (addr / 8) & 0xFFFFF;
> +        adsr = val & 0xFFFFFF;
> +        len = ((val >> 24) & 0x1F) + 1;
> +        if (adsr == 0xFFFFFF) {
> +            memset(&s->vram[addr], s->tmpblit, len);
> +            if (s->depth == 24) {
> +                val = s->tmpblit & 0xFFFFFF;
> +                val = cpu_to_be32(val);
> +                for (i = 0; i < len; i++) {
> +                    s->vram24[addr + i] = val;
> +                }
> +            }
> +        } else {
> +            memcpy(&s->vram[addr], &s->vram[adsr], len);
> +            if (s->depth == 24) {
> +                memcpy(&s->vram24[addr], &s->vram24[adsr], len * 4);
> +            }
> +        }
> +        memory_region_set_dirty(&s->vram_mem, addr, len);
> +    }
> +}
> +
> +static void tcx_rblit_writel(void *opaque, hwaddr addr,
> +                         uint64_t val, unsigned size)
> +{
> +    TCXState *s = opaque;
> +    uint32_t adsr, len;
> +    int i;
> +
> +    if (!(addr & 4)) {
> +        s->tmpblit = val;
> +    } else {
> +        addr = (addr / 8) & 0xFFFFF;
> +        adsr = val & 0xFFFFFF;
> +        len = ((val >> 24) & 0x1F) + 1;
> +        if (adsr == 0xFFFFFF) {
> +            memset(&s->vram[addr], s->tmpblit, len);
> +            if (s->depth == 24) {
> +                val = s->tmpblit & 0xFFFFFF;
> +                val = cpu_to_be32(val);
> +                for (i = 0; i < len; i++) {
> +                    s->vram24[addr + i] = val;
> +                    s->cplane[addr + i] = val;
> +                }
> +            }
> +        } else {
> +            memcpy(&s->vram[addr], &s->vram[adsr], len);
> +            if (s->depth == 24) {
> +                memcpy(&s->vram24[addr], &s->vram24[adsr], len * 4);
> +                memcpy(&s->cplane[addr], &s->cplane[adsr], len * 4);
> +            }
> +        }
> +        memory_region_set_dirty(&s->vram_mem, addr, len);
> +    }
> +}
> +
> +static const MemoryRegionOps tcx_blit_ops = {
> +    .read = tcx_blit_readl,
> +    .write = tcx_blit_writel,
> +    .endianness = DEVICE_NATIVE_ENDIAN,
> +    .valid = {
> +        .min_access_size = 4,
> +        .max_access_size = 4,
> +    },
> +};
> +
> +static const MemoryRegionOps tcx_rblit_ops = {
> +    .read = tcx_blit_readl,
> +    .write = tcx_rblit_writel,
> +    .endianness = DEVICE_NATIVE_ENDIAN,
> +    .valid = {
> +        .min_access_size = 4,
> +        .max_access_size = 4,
> +    },
> +};
> +
> +static void tcx_invalidate_cursor_position(TCXState *s)
> +{
> +    int ymin, ymax, start, end;
> +
> +    /* invalidate only near the cursor */
> +    ymin = s->cursy;
> +    if (ymin >= s->height) {
> +        return;
> +    }
> +    ymax = MIN(s->height, ymin + 32);
> +    start = ymin * 1024;
> +    end   = ymax * 1024;
> +
> +    memory_region_set_dirty(&s->vram_mem, start, end-start);
> +}
> +
> +static uint64_t tcx_thc_readl(void *opaque, hwaddr addr,
> +                            unsigned size)
> +{
> +    TCXState *s = opaque;
> +    uint64_t val;
> +
> +    if (addr == TCX_THC_MISC) {
> +        val = s->thcmisc | 0x02000000;
> +    } else {
> +        val = 0;
> +    }
> +    return val;
> +}
> +
> +static void tcx_thc_writel(void *opaque, hwaddr addr,
> +                         uint64_t val, unsigned size)
> +{
> +    TCXState *s = opaque;
> +
> +    if (addr == TCX_THC_CURSXY) {
> +        tcx_invalidate_cursor_position(s);
> +        s->cursx = val >> 16;
> +        s->cursy = val;
> +        tcx_invalidate_cursor_position(s);
> +    } else if (addr >= TCX_THC_CURSMASK && addr < TCX_THC_CURSMASK + 128) {
> +        s->cursmask[(addr - TCX_THC_CURSMASK) / 4] = val;
> +        tcx_invalidate_cursor_position(s);
> +    } else if (addr >= TCX_THC_CURSBITS && addr < TCX_THC_CURSBITS + 128) {
> +        s->cursbits[(addr - TCX_THC_CURSBITS) / 4] = val;
> +        tcx_invalidate_cursor_position(s);
> +    } else if (addr == TCX_THC_MISC) {
> +        s->thcmisc = val;
> +    }
> +
> +}
> +
> +static const MemoryRegionOps tcx_thc_ops = {
> +    .read = tcx_thc_readl,
> +    .write = tcx_thc_writel,
> +    .endianness = DEVICE_NATIVE_ENDIAN,
> +    .valid = {
> +        .min_access_size = 4,
> +        .max_access_size = 4,
> +    },
> +};
> +
> +static uint64_t tcx_dummy_readl(void *opaque, hwaddr addr,
>                               unsigned size)
>   {
> +    /* printf ("TCX Dummy (ALT,DHC,TEC) READ %X %i\n",(int)addr,(int)size);*/
>       return 0;
>   }
>
> -static void dummy_writel(void *opaque, hwaddr addr,
> +static void tcx_dummy_writel(void *opaque, hwaddr addr,
>                            uint64_t val, unsigned size)
>   {
> +    /* printf ("TCX Dummy (ALT,DHC,TEC) WRITE %X %X\n",(int)addr,(int)val);*/
>   }
>
> -static const MemoryRegionOps dummy_ops = {
> -    .read = dummy_readl,
> -    .write = dummy_writel,
> +static const MemoryRegionOps tcx_dummy_ops = {
> +    .read = tcx_dummy_readl,
> +    .write = tcx_dummy_writel,
>       .endianness = DEVICE_NATIVE_ENDIAN,
>       .valid = {
>           .min_access_size = 4,
> @@ -544,7 +946,7 @@ static int tcx_init1(SysBusDevice *dev)
>       vmstate_register_ram_global(&s->vram_mem);
>       vram_base = memory_region_get_ram_ptr(&s->vram_mem);
>
> -    /* FCode ROM */
> +    /* 10/ROM   : FCode ROM */
>       memory_region_init_ram(&s->rom, NULL, "tcx.prom", FCODE_MAX_ROM_SIZE);
>       vmstate_register_ram_global(&s->rom);
>       memory_region_set_readonly(&s->rom, true);
> @@ -560,7 +962,7 @@ static int tcx_init1(SysBusDevice *dev)
>           }
>       }
>
> -    /* 8-bit plane */
> +    /*  0/DFB8  : 8-bit plane */
>       s->vram = vram_base;
>       size = s->vram_size;
>       memory_region_init_alias(&s->vram_8bit, OBJECT(s), "tcx.vram.8bit",
> @@ -569,50 +971,85 @@ static int tcx_init1(SysBusDevice *dev)
>       vram_offset += size;
>       vram_base += size;
>
> -    /* DAC */
> -    memory_region_init_io(&s->dac, OBJECT(s), &tcx_dac_ops, s,
> -                          "tcx.dac", TCX_DAC_NREGS);
> -    sysbus_init_mmio(dev, &s->dac);
> +    /*  1/DFB24 : 24bit plane */
> +    size = s->vram_size * 4;
> +    s->vram24 = (uint32_t *)vram_base;
> +    s->vram24_offset = vram_offset;
> +    memory_region_init_alias(&s->vram_24bit, OBJECT(s), "tcx.vram.24bit",
> +                             &s->vram_mem, vram_offset, size);
> +    sysbus_init_mmio(dev, &s->vram_24bit);
> +    vram_offset += size;
> +    vram_base += size;
> +
> +    /*  2/STIP : Stippler */
> +    memory_region_init_io(&s->stip, OBJECT(s), &tcx_stip_ops, s, "tcx.stip",
> +                          TCX_STIP_NREGS);
> +    sysbus_init_mmio(dev, &s->stip);
> +
> +    /*  3/BLIT : Blitter */
> +    memory_region_init_io(&s->blit, OBJECT(s), &tcx_blit_ops, s, "tcx.blit",
> +                          TCX_BLIT_NREGS);
> +    sysbus_init_mmio(dev, &s->blit);
> +
> +    /*  4/RDFB32 : Raw Framebuffer */
> +    size = s->vram_size * 4;
> +    s->cplane = (uint32_t *)vram_base;
> +    s->cplane_offset = vram_offset;
> +    memory_region_init_alias(&s->vram_cplane, OBJECT(s), "tcx.vram.cplane",
> +                             &s->vram_mem, vram_offset, size);
> +    sysbus_init_mmio(dev, &s->vram_cplane);
>
> -    /* TEC (dummy) */
> -    memory_region_init_io(&s->tec, OBJECT(s), &dummy_ops, s,
> +    /*  5/RSTIP : Raw Stippler */
> +    memory_region_init_io(&s->rstip, OBJECT(s), &tcx_rstip_ops, s, "tcx.rstip",
> +                          TCX_RSTIP_NREGS);
> +    sysbus_init_mmio(dev, &s->rstip);
> +
> +    /*  6/RBLIT : Raw Blitter */
> +    memory_region_init_io(&s->rblit, OBJECT(s), &tcx_rblit_ops, s, "tcx.rblit",
> +                          TCX_RBLIT_NREGS);
> +    sysbus_init_mmio(dev, &s->rblit);
> +
> +    /*  7/TEC : ??? */
> +    memory_region_init_io(&s->tec, OBJECT(s), &tcx_dummy_ops, s,
>                             "tcx.tec", TCX_TEC_NREGS);
>       sysbus_init_mmio(dev, &s->tec);
> -    /* THC: NetBSD writes here even with 8-bit display: dummy */
> -    memory_region_init_io(&s->thc24, OBJECT(s), &dummy_ops, s, "tcx.thc24",
> -                          TCX_THC_NREGS_24);
> -    sysbus_init_mmio(dev, &s->thc24);
> -
> -    if (s->depth == 24) {
> -        /* 24-bit plane */
> -        size = s->vram_size * 4;
> -        s->vram24 = (uint32_t *)vram_base;
> -        s->vram24_offset = vram_offset;
> -        memory_region_init_alias(&s->vram_24bit, OBJECT(s), "tcx.vram.24bit",
> -                                 &s->vram_mem, vram_offset, size);
> -        sysbus_init_mmio(dev, &s->vram_24bit);
> -        vram_offset += size;
> -        vram_base += size;
> -
> -        /* Control plane */
> -        size = s->vram_size * 4;
> -        s->cplane = (uint32_t *)vram_base;
> -        s->cplane_offset = vram_offset;
> -        memory_region_init_alias(&s->vram_cplane, OBJECT(s), "tcx.vram.cplane",
> -                                 &s->vram_mem, vram_offset, size);
> -        sysbus_init_mmio(dev, &s->vram_cplane);
>
> -        s->con = graphic_console_init(DEVICE(dev), &tcx24_ops, s);
> -    } else {
> -        /* THC 8 bit (dummy) */
> -        memory_region_init_io(&s->thc8, OBJECT(s), &dummy_ops, s, "tcx.thc8",
> -                              TCX_THC_NREGS_8);
> -        sysbus_init_mmio(dev, &s->thc8);
> +    /*  8/CMAP : DAC */
> +    memory_region_init_io(&s->dac, OBJECT(s), &tcx_dac_ops, s,
> +                          "tcx.dac", TCX_DAC_NREGS);
> +    sysbus_init_mmio(dev, &s->dac);
> +
> +    /*  9/THC : Cursor */
> +    memory_region_init_io(&s->thc, OBJECT(s), &tcx_thc_ops, s, "tcx.thc",
> +                          TCX_THC_NREGS);
> +    sysbus_init_mmio(dev, &s->thc);
> +
> +    /* 11/DHC : ??? */
> +    memory_region_init_io(&s->dhc, OBJECT(s), &tcx_dummy_ops, s, "tcx.dhc",
> +                          TCX_DHC_NREGS);
> +    sysbus_init_mmio(dev, &s->dhc);
> +
> +    /* 12/ALT : ??? */
> +    memory_region_init_io(&s->alt, OBJECT(s), &tcx_dummy_ops, s, "tcx.alt",
> +                          TCX_ALT_NREGS);
> +    sysbus_init_mmio(dev, &s->alt);
> +
> +    /* 9/THC24bits : NetBSD writes here even with 8-bit display: dummy */
> +    if (s->depth == 8) {
> +        memory_region_init_io(&s->thc24, OBJECT(s), &tcx_dummy_ops, s,
> +                              "tcx.thc24", TCX_THC_NREGS);
> +        sysbus_init_mmio(dev, &s->thc24);
> +    }
>
> +    if (s->depth == 8) {
>           s->con = graphic_console_init(DEVICE(dev), &tcx_ops, s);
> +    } else {
> +        s->con = graphic_console_init(DEVICE(dev), &tcx24_ops, s);
>       }
> +    s->thcmisc = 0;
>
>       qemu_console_resize(s->con, s->width, s->height);
> +
>       return 0;
>   }
>
> diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c
> index 2957d90..7eee180 100644
> --- a/hw/sparc/sun4m.c
> +++ b/hw/sparc/sun4m.c
> @@ -540,24 +540,40 @@ static void tcx_init(hwaddr addr, int vram_size, int width,
>       qdev_prop_set_uint64(dev, "prom_addr", addr);
>       qdev_init_nofail(dev);
>       s = SYS_BUS_DEVICE(dev);
> -    /* FCode ROM */
> +
> +    /* 10/ROM   : FCode ROM */
>       sysbus_mmio_map(s, 0, addr);
> -    /* 8-bit plane */
> +    /*  0/DFB8  : 8-bit plane */
>       sysbus_mmio_map(s, 1, addr + 0x00800000ULL);
> -    /* DAC */
> -    sysbus_mmio_map(s, 2, addr + 0x00200000ULL);
> -    /* TEC (dummy) */
> -    sysbus_mmio_map(s, 3, addr + 0x00700000ULL);
> -    /* THC 24 bit: NetBSD writes here even with 8-bit display: dummy */
> -    sysbus_mmio_map(s, 4, addr + 0x00301000ULL);
> -    if (depth == 24) {
> -        /* 24-bit plane */
> -        sysbus_mmio_map(s, 5, addr + 0x02000000ULL);
> -        /* Control plane */
> -        sysbus_mmio_map(s, 6, addr + 0x0a000000ULL);
> +    /*  1/DFB24 : 24bit plane */
> +    sysbus_mmio_map(s, 2, addr + 0x02000000ULL);
> +    /*  2/STIP  : Stipple */
> +    sysbus_mmio_map(s, 3, addr + 0x04000000ULL);
> +    /*  3/BLIT  : Blitter */
> +    sysbus_mmio_map(s, 4, addr + 0x06000000ULL);
> +    /*  4/RDFB32: Raw framebuffer. Control plane */
> +    sysbus_mmio_map(s, 5, addr + 0x0A000000ULL);
> +    /*  5/RSTIP : Raw Stipple */
> +    sysbus_mmio_map(s, 6, addr + 0x0C000000ULL);
> +    /*  6/RBLIT : Raw Blitter */
> +    sysbus_mmio_map(s, 7, addr + 0x0E000000ULL);
> +    /*  7/TEC   : Transform Engine */
> +    sysbus_mmio_map(s, 8, addr + 0x00700000ULL);
> +    /*  8/CMAP  : DAC */
> +    sysbus_mmio_map(s, 9, addr + 0x00200000ULL);
> +    /*  9/THC   : */
> +    if (depth == 8) {
> +        sysbus_mmio_map(s, 10, addr + 0x00300000ULL);
>       } else {
> -        /* THC 8 bit (dummy) */
> -        sysbus_mmio_map(s, 5, addr + 0x00300000ULL);
> +        sysbus_mmio_map(s, 10, addr + 0x00301000ULL);
> +    }
> +    /* 11/DHC   : */
> +    sysbus_mmio_map(s, 11, addr + 0x00240000ULL);
> +    /* 12/ALT   : */
> +    sysbus_mmio_map(s, 12, addr + 0x00280000ULL);
> +    /* 9/THC24bits : NetBSD writes here even with 8-bit display: dummy */
> +    if (depth == 8) {
> +        sysbus_mmio_map(s, 13, addr + 0x00301000ULL);
>       }
>   }

I think in summary that while the patch appears to work well (indeed it 
managed to boot into my Solaris 8 installer with TCX in both 8 and 24 
bit modes!), the colour problem in NetBSD may need some further 
investigation and the patch needs to be broken down into more manageable 
chunks and rebased on master.

As a starting point I would suggest a breakdown like this:

1) Reorganisation of memory region dirty handling
2) Tidy-ups/comments (please avoid unnecessary whitespace changes!)
3) Cursor functionality
4) Remaining hardware registers

Before resubmitting, please also run the patchset through 
scripts/checkpatch.pl which should catch most of the changes that don't 
match the QEMU coding style.


ATB,

Mark.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Qemu-devel] [PATCH 1/1] Sun4m : TCX framebuffer hardware acceleration
       [not found]       ` <53878F06.5030902@caramail.com>
@ 2014-07-08 23:40         ` Mark Cave-Ayland
  2014-07-12 22:23           ` Olivier Danet
  0 siblings, 1 reply; 6+ messages in thread
From: Mark Cave-Ayland @ 2014-07-08 23:40 UTC (permalink / raw)
  To: Olivier Danet, qemu-devel, Blue Swirl, Artyom Tarasenko

[-- Attachment #1: Type: text/plain, Size: 2634 bytes --]

On 29/05/14 20:48, Olivier Danet wrote:

> Hello Mark
>
> - Don't you like green ?
> It looks fine for me : http://temlib.org/pub/boot_netbsd6.jpg
>
> - "checkpatch.pl" did not find anything wrong with this patch. I will adjust style and spacings anyway.
>
> - AFAIU, it is impossible to implement exactly this video board on QEMU with reasonable performance.
>
> The S24/TCX has a 1Meg * 26 bits framebuffer.
>
> For each pixel, two bits are used for selecting between 256 indexed and 24bits truecolor.
> The RAMDAC/palette handles this selection, as well as an overlay plane for the cursor
> (with 4 additional colours).
> This enable handling different "visuals" for each X11 window. Text mode is also 8bits indexed
> even on 24bits mode.
>
> The memory is simultaneously accessible at several addresses, with different alignment :
>
>      RDFB32: Each pixel occupies 32bits. D[31:26]=0000_00, D[25:24]=MODE, D[23:0]=Colour
>      DFB24 : Each pixel occupies 32bits. D[31:24]=0000_0000, D[23:0]=Colour
>      DFB8  : Each pixel occupies 8bits, mapped to D[7:0] of the framebuffer.
>
>      MODE=D[25:24] (-> datasheet ATT20C567)
>        00 : 256 colours "pseudocolor"     : R=pal_r(D[ 7: 0]) G=pal_g(D[ 7:0]) B=pal_b(D[ 7:0])
>        01 : 16M colours "directcolor"     : R=pal_r(D[ 7: 0]) G=pal_g(D[15:8]) B=pal_b(D[23:16])
>        10 : 16M colours "truecolor" gamma : R=gamma(D[ 7: 0]) G=gamma(D[15:8]) B=gamma(D[23:16])
>        11 : 16M colours "truecolor" raw   : R=      D[ 7: 0]  G=      D[15:8]  B=      D[23:16]
>
>   QEMU cannot directly imitate this behavior, so :
>   - The RDFB32 area is only used for the MODE bits
>   - 8  colours applications are expected to write only in the DFB8 area
>   - 24 colours applications are expected to write only in the DFB24 area
>   - the blitter and stippler accelerators update all areas simultaneously in 24bits mode
>   - As the 24bits mode is simultaneously both a 8bits and 24bits mode, dirtying the (smaller) 8bits area
> is sufficient.
>
> Olivier

Hi Olivier,

So sorry for the delay on this - I just realised replying to another 
email earlier today that I'd totally forgotten about this one :/

Please find attached what I believe is your patch rebased onto git 
master, which when applied to my local repository (and using your binary 
QEMU,tcx.bin from http://temlib.org/pub/QEMU,tcx.bin) gives me the green 
text when booting NetBSD 6.1.3 as can be seen at 
http://www.ilande.co.uk/tmp/netbsd.png.

Can you double-check and make sure that I haven't accidentally broken 
anything during the rebase? It required quite a bit of work to fix up by 
hand.


ATB,

Mark.


[-- Attachment #2: qemu-tcx-accel-rebase.patch --]
[-- Type: text/x-diff, Size: 32983 bytes --]

diff --git a/hw/display/tcx.c b/hw/display/tcx.c
index 28c742c..084bb4c 100644
--- a/hw/display/tcx.c
+++ b/hw/display/tcx.c
@@ -33,17 +33,26 @@
 
 #define MAXX 1024
 #define MAXY 768
-#define TCX_DAC_NREGS 16
-#define TCX_THC_NREGS_8  0x081c
-#define TCX_THC_NREGS_24 0x1000
+#define TCX_DAC_NREGS    16
+#define TCX_THC_NREGS    0x1000
+#define TCX_DHC_NREGS    0x4000
 #define TCX_TEC_NREGS    0x1000
+#define TCX_ALT_NREGS    0x8000
+#define TCX_STIP_NREGS   0x800000
+#define TCX_BLIT_NREGS   0x800000
+#define TCX_RSTIP_NREGS  0x800000
+#define TCX_RBLIT_NREGS  0x800000
+
+#define TCX_THC_MISC     0x818
+#define TCX_THC_CURSXY   0x8fc
+#define TCX_THC_CURSMASK 0x900
+#define TCX_THC_CURSBITS 0x980
 
 #define TYPE_TCX "SUNW,tcx"
 #define TCX(obj) OBJECT_CHECK(TCXState, (obj), TYPE_TCX)
 
 typedef struct TCXState {
     SysBusDevice parent_obj;
-
     QemuConsole *con;
     uint8_t *vram;
     uint32_t *vram24, *cplane;
@@ -52,17 +61,30 @@ typedef struct TCXState {
     MemoryRegion vram_mem;
     MemoryRegion vram_8bit;
     MemoryRegion vram_24bit;
+    MemoryRegion stip;
+    MemoryRegion blit;
     MemoryRegion vram_cplane;
-    MemoryRegion dac;
+    MemoryRegion rstip;
+    MemoryRegion rblit;
     MemoryRegion tec;
+    MemoryRegion dac;
+    MemoryRegion thc;
+    MemoryRegion dhc;
+    MemoryRegion alt;
     MemoryRegion thc24;
-    MemoryRegion thc8;
+
     ram_addr_t vram24_offset, cplane_offset;
+    uint32_t tmpblit;
     uint32_t vram_size;
-    uint32_t palette[256];
-    uint8_t r[256], g[256], b[256];
+    uint32_t palette[260];
+    uint8_t r[260], g[260], b[260];
     uint16_t width, height, depth;
     uint8_t dac_index, dac_state;
+    uint32_t thcmisc;
+    uint32_t cursmask[32];
+    uint32_t cursbits[32];
+    uint16_t cursx;
+    uint16_t cursy;
 } TCXState;
 
 static void tcx_set_dirty(TCXState *s)
@@ -70,10 +92,36 @@ static void tcx_set_dirty(TCXState *s)
     memory_region_set_dirty(&s->vram_mem, 0, MAXX * MAXY);
 }
 
-static void tcx24_set_dirty(TCXState *s)
+static inline int tcx24_check_dirty(TCXState *s, ram_addr_t page,
+                                    ram_addr_t page24, ram_addr_t cpage)
+{
+    int ret;
+
+    ret = memory_region_get_dirty(&s->vram_mem, page, TARGET_PAGE_SIZE,
+                                  DIRTY_MEMORY_VGA);
+    ret |= memory_region_get_dirty(&s->vram_mem, page24, TARGET_PAGE_SIZE * 4,
+                                   DIRTY_MEMORY_VGA);
+    ret |= memory_region_get_dirty(&s->vram_mem, cpage, TARGET_PAGE_SIZE * 4,
+                                   DIRTY_MEMORY_VGA);
+    return ret;
+}
+
+static inline void tcx24_reset_dirty(TCXState *ts, ram_addr_t page_min,
+                               ram_addr_t page_max, ram_addr_t page24,
+                              ram_addr_t cpage)
 {
-    memory_region_set_dirty(&s->vram_mem, s->vram24_offset, MAXX * MAXY * 4);
-    memory_region_set_dirty(&s->vram_mem, s->cplane_offset, MAXX * MAXY * 4);
+    memory_region_reset_dirty(&ts->vram_mem,
+                              page_min,
+                              (page_max - page_min) + TARGET_PAGE_SIZE,
+                              DIRTY_MEMORY_VGA);
+    memory_region_reset_dirty(&ts->vram_mem,
+                              page24 + page_min * 4,
+                              (page_max - page_min) * 4 + TARGET_PAGE_SIZE,
+                              DIRTY_MEMORY_VGA);
+    memory_region_reset_dirty(&ts->vram_mem,
+                              cpage + page_min * 4,
+                              (page_max - page_min) * 4 + TARGET_PAGE_SIZE,
+                              DIRTY_MEMORY_VGA);
 }
 
 static void update_palette_entries(TCXState *s, int start, int end)
@@ -102,11 +150,7 @@ static void update_palette_entries(TCXState *s, int start, int end)
             break;
         }
     }
-    if (s->depth == 24) {
-        tcx24_set_dirty(s);
-    } else {
-        tcx_set_dirty(s);
-    }
+    tcx_set_dirty(s);
 }
 
 static void tcx_draw_line32(TCXState *s1, uint8_t *d,
@@ -116,7 +160,7 @@ static void tcx_draw_line32(TCXState *s1, uint8_t *d,
     uint8_t val;
     uint32_t *p = (uint32_t *)d;
 
-    for(x = 0; x < width; x++) {
+    for (x = 0; x < width; x++) {
         val = *s++;
         *p++ = s1->palette[val];
     }
@@ -129,7 +173,7 @@ static void tcx_draw_line16(TCXState *s1, uint8_t *d,
     uint8_t val;
     uint16_t *p = (uint16_t *)d;
 
-    for(x = 0; x < width; x++) {
+    for (x = 0; x < width; x++) {
         val = *s++;
         *p++ = s1->palette[val];
     }
@@ -147,6 +191,83 @@ static void tcx_draw_line8(TCXState *s1, uint8_t *d,
     }
 }
 
+static void tcx_draw_cursor32(TCXState *s1, uint8_t *d,
+                              int y, int width)
+{
+    int x, len;
+    uint32_t mask, bits;
+    uint32_t *p = (uint32_t *)d;
+
+    y = y - s1->cursy;
+    mask = s1->cursmask[y];
+    bits = s1->cursbits[y];
+    len = MIN(width - s1->cursx, 32);
+    p = &p[s1->cursx];
+    for (x = 0; x < len; x++) {
+        if (mask & 0x80000000) {
+            if (bits & 0x80000000) {
+                *p = s1->palette[259];
+            } else {
+                *p = s1->palette[258];
+            }
+        }
+        p++;
+        mask <<= 1;
+        bits <<= 1;
+    }
+}
+
+static void tcx_draw_cursor16(TCXState *s1, uint8_t *d,
+                              int y, int width)
+{
+    int x, len;
+    uint32_t mask, bits;
+    uint16_t *p = (uint16_t *)d;
+
+    y = y - s1->cursy;
+    mask = s1->cursmask[y];
+    bits = s1->cursbits[y];
+    len = MIN(width - s1->cursx, 32);
+    p = &p[s1->cursx];
+    for (x = 0; x < len; x++) {
+        if (mask & 0x80000000) {
+            if (bits & 0x80000000) {
+                *p = s1->palette[259];
+            } else {
+                *p = s1->palette[258];
+            }
+        }
+        p++;
+        mask <<= 1;
+        bits <<= 1;
+    }
+}
+
+static void tcx_draw_cursor8(TCXState *s1, uint8_t *d,
+                              int y, int width)
+{
+    int x, len;
+    uint32_t mask, bits;
+
+    y = y - s1->cursy;
+    mask = s1->cursmask[y];
+    bits = s1->cursbits[y];
+    len = MIN(width - s1->cursx, 32);
+    d = &d[s1->cursx];
+    for (x = 0; x < len; x++) {
+        if (mask & 0x80000000) {
+            if (bits & 0x80000000) {
+                *d = s1->palette[259];
+            } else {
+                *d = s1->palette[258];
+            }
+        }
+        d++;
+        mask <<= 1;
+        bits <<= 1;
+    }
+}
+
 /*
   XXX Could be much more optimal:
   * detect if line/page/whole screen is in 24 bit mode
@@ -162,11 +283,10 @@ static inline void tcx24_draw_line32(TCXState *s1, uint8_t *d,
     uint8_t val, *p8;
     uint32_t *p = (uint32_t *)d;
     uint32_t dval;
-
     bgr = is_surface_bgr(surface);
     for(x = 0; x < width; x++, s++, s24++) {
-        if ((be32_to_cpu(*cplane++) & 0xff000000) == 0x03000000) {
-            // 24-bit direct, BGR order
+        if (be32_to_cpu(*cplane) & 0x03000000) {
+            /* 24-bit direct, BGR order */
             p8 = (uint8_t *)s24;
             p8++;
             b = *p8++;
@@ -177,47 +297,18 @@ static inline void tcx24_draw_line32(TCXState *s1, uint8_t *d,
             else
                 dval = rgb_to_pixel32(r, g, b);
         } else {
+            /* 8 bits pseudocolor */
             val = *s;
             dval = s1->palette[val];
         }
         *p++ = dval;
+        cplane++;
     }
 }
 
-static inline int check_dirty(TCXState *s, ram_addr_t page, ram_addr_t page24,
-                              ram_addr_t cpage)
-{
-    int ret;
-
-    ret = memory_region_get_dirty(&s->vram_mem, page, TARGET_PAGE_SIZE,
-                                  DIRTY_MEMORY_VGA);
-    ret |= memory_region_get_dirty(&s->vram_mem, page24, TARGET_PAGE_SIZE * 4,
-                                   DIRTY_MEMORY_VGA);
-    ret |= memory_region_get_dirty(&s->vram_mem, cpage, TARGET_PAGE_SIZE * 4,
-                                   DIRTY_MEMORY_VGA);
-    return ret;
-}
-
-static inline void reset_dirty(TCXState *ts, ram_addr_t page_min,
-                               ram_addr_t page_max, ram_addr_t page24,
-                              ram_addr_t cpage)
-{
-    memory_region_reset_dirty(&ts->vram_mem,
-                              page_min,
-                              (page_max - page_min) + TARGET_PAGE_SIZE,
-                              DIRTY_MEMORY_VGA);
-    memory_region_reset_dirty(&ts->vram_mem,
-                              page24 + page_min * 4,
-                              (page_max - page_min) * 4 + TARGET_PAGE_SIZE,
-                              DIRTY_MEMORY_VGA);
-    memory_region_reset_dirty(&ts->vram_mem,
-                              cpage + page_min * 4,
-                              (page_max - page_min) * 4 + TARGET_PAGE_SIZE,
-                              DIRTY_MEMORY_VGA);
-}
-
 /* Fixed line length 1024 allows us to do nice tricks not possible on
    VGA... */
+
 static void tcx_update_display(void *opaque)
 {
     TCXState *ts = opaque;
@@ -226,6 +317,7 @@ static void tcx_update_display(void *opaque)
     int y, y_start, dd, ds;
     uint8_t *d, *s;
     void (*f)(TCXState *s1, uint8_t *dst, const uint8_t *src, int width);
+    void (*fc)(TCXState *s1, uint8_t *dst, int y, int width);
 
     if (surface_bits_per_pixel(surface) == 0) {
         return;
@@ -243,20 +335,23 @@ static void tcx_update_display(void *opaque)
     switch (surface_bits_per_pixel(surface)) {
     case 32:
         f = tcx_draw_line32;
+        fc = tcx_draw_cursor32;
         break;
     case 15:
     case 16:
         f = tcx_draw_line16;
+        fc = tcx_draw_cursor16;
         break;
     default:
     case 8:
         f = tcx_draw_line8;
+        fc = tcx_draw_cursor8;
         break;
     case 0:
         return;
     }
 
-    for(y = 0; y < ts->height; y += 4, page += TARGET_PAGE_SIZE) {
+    for (y = 0; y < ts->height; page += TARGET_PAGE_SIZE) {
         if (memory_region_get_dirty(&ts->vram_mem, page, TARGET_PAGE_SIZE,
                                     DIRTY_MEMORY_VGA)) {
             if (y_start < 0)
@@ -265,18 +360,35 @@ static void tcx_update_display(void *opaque)
                 page_min = page;
             if (page > page_max)
                 page_max = page;
+
             f(ts, d, s, ts->width);
+            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
+                fc(ts, d, y, ts->width);
+            }
             d += dd;
             s += ds;
+            y++;
             f(ts, d, s, ts->width);
+            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
+                fc(ts, d, y, ts->width);
+            }
             d += dd;
             s += ds;
+            y++;
             f(ts, d, s, ts->width);
+            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
+                fc(ts, d, y, ts->width);
+            }
             d += dd;
             s += ds;
+            y++;
             f(ts, d, s, ts->width);
+            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
+                fc(ts, d, y, ts->width);
+            }
             d += dd;
             s += ds;
+            y++;
         } else {
             if (y_start >= 0) {
                 /* flush to display */
@@ -286,6 +398,7 @@ static void tcx_update_display(void *opaque)
             }
             d += dd * 4;
             s += ds * 4;
+            y += 4;
         }
     }
     if (y_start >= 0) {
@@ -328,9 +441,9 @@ static void tcx24_update_display(void *opaque)
     dd = surface_stride(surface);
     ds = 1024;
 
-    for(y = 0; y < ts->height; y += 4, page += TARGET_PAGE_SIZE,
+    for (y = 0; y < ts->height; page += TARGET_PAGE_SIZE,
             page24 += TARGET_PAGE_SIZE, cpage += TARGET_PAGE_SIZE) {
-        if (check_dirty(ts, page, page24, cpage)) {
+        if (tcx24_check_dirty(ts, page, page24, cpage)) {
             if (y_start < 0)
                 y_start = y;
             if (page < page_min)
@@ -338,25 +451,41 @@ static void tcx24_update_display(void *opaque)
             if (page > page_max)
                 page_max = page;
             tcx24_draw_line32(ts, d, s, ts->width, cptr, s24);
+            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
+                tcx_draw_cursor32(ts, d, y, ts->width);
+            }
             d += dd;
             s += ds;
             cptr += ds;
             s24 += ds;
+            y++;
             tcx24_draw_line32(ts, d, s, ts->width, cptr, s24);
+            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
+                tcx_draw_cursor32(ts, d, y, ts->width);
+            }
             d += dd;
             s += ds;
             cptr += ds;
             s24 += ds;
+            y++;
             tcx24_draw_line32(ts, d, s, ts->width, cptr, s24);
+            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
+                tcx_draw_cursor32(ts, d, y, ts->width);
+            }
             d += dd;
             s += ds;
             cptr += ds;
             s24 += ds;
+            y++;
             tcx24_draw_line32(ts, d, s, ts->width, cptr, s24);
+            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
+                tcx_draw_cursor32(ts, d, y, ts->width);
+            }
             d += dd;
             s += ds;
             cptr += ds;
             s24 += ds;
+            y++;
         } else {
             if (y_start >= 0) {
                 /* flush to display */
@@ -368,6 +497,7 @@ static void tcx24_update_display(void *opaque)
             s += ds * 4;
             cptr += ds * 4;
             s24 += ds * 4;
+            y += 4;
         }
     }
     if (y_start >= 0) {
@@ -377,7 +507,7 @@ static void tcx24_update_display(void *opaque)
     }
     /* reset modified pages */
     if (page_max >= page_min) {
-        reset_dirty(ts, page_min, page_max, page24, cpage);
+        tcx24_reset_dirty(ts, page_min, page_max, page24, cpage);
     }
 }
 
@@ -394,7 +524,6 @@ static void tcx24_invalidate_display(void *opaque)
     TCXState *s = opaque;
 
     tcx_set_dirty(s);
-    tcx24_set_dirty(s);
     qemu_console_resize(s->con, s->width, s->height);
 }
 
@@ -403,12 +532,7 @@ static int vmstate_tcx_post_load(void *opaque, int version_id)
     TCXState *s = opaque;
 
     update_palette_entries(s, 0, 256);
-    if (s->depth == 24) {
-        tcx24_set_dirty(s);
-    } else {
-        tcx_set_dirty(s);
-    }
-
+    tcx_set_dirty(s);
     return 0;
 }
 
@@ -435,56 +559,87 @@ static void tcx_reset(DeviceState *d)
     TCXState *s = TCX(d);
 
     /* Initialize palette */
-    memset(s->r, 0, 256);
-    memset(s->g, 0, 256);
-    memset(s->b, 0, 256);
+    memset(s->r, 0, 260);
+    memset(s->g, 0, 260);
+    memset(s->b, 0, 260);
     s->r[255] = s->g[255] = s->b[255] = 255;
-    update_palette_entries(s, 0, 256);
+    s->r[256] = s->g[256] = s->b[256] = 255;
+    s->r[258] = s->g[258] = s->b[258] = 255;
+    update_palette_entries(s, 0, 260);
     memset(s->vram, 0, MAXX*MAXY);
     memory_region_reset_dirty(&s->vram_mem, 0, MAXX * MAXY * (1 + 4 + 4),
                               DIRTY_MEMORY_VGA);
     s->dac_index = 0;
     s->dac_state = 0;
+    s->cursx = 0xF000; /*Put cursor off screen */
+    s->cursy = 0xF000;
 }
 
 static uint64_t tcx_dac_readl(void *opaque, hwaddr addr,
                               unsigned size)
 {
-    return 0;
+    TCXState *s = opaque;
+    uint32_t val = 0;
+
+    switch (s->dac_state) {
+    case 0:
+        val = s->r[s->dac_index] << 24;
+        s->dac_state++;
+        break;
+    case 1:
+        val = s->g[s->dac_index] << 24;
+        s->dac_state++;
+        break;
+    case 2:
+        val = s->b[s->dac_index] << 24;
+        s->dac_index = (s->dac_index + 1) & 255; /* Index autoincrement */
+    default:
+        s->dac_state = 0;
+        break;
+    }
+
+    return val;
 }
 
 static void tcx_dac_writel(void *opaque, hwaddr addr, uint64_t val,
                            unsigned size)
 {
     TCXState *s = opaque;
+    unsigned index;
 
     switch (addr) {
-    case 0:
+    case 0: /* Address */
         s->dac_index = val >> 24;
         s->dac_state = 0;
         break;
-    case 4:
+    case 4:  /* Pixel colours */
+    case 12: /* Overlay (cursor) colours */
+        if (addr & 8) {
+            index = (s->dac_index & 3) + 256;
+        } else {
+            index = s->dac_index;
+        }
         switch (s->dac_state) {
         case 0:
-            s->r[s->dac_index] = val >> 24;
-            update_palette_entries(s, s->dac_index, s->dac_index + 1);
+            s->r[index] = val >> 24;
+            update_palette_entries(s, index, index + 1);
             s->dac_state++;
             break;
         case 1:
-            s->g[s->dac_index] = val >> 24;
-            update_palette_entries(s, s->dac_index, s->dac_index + 1);
+            s->g[index] = val >> 24;
+            update_palette_entries(s, index, index + 1);
             s->dac_state++;
             break;
         case 2:
-            s->b[s->dac_index] = val >> 24;
-            update_palette_entries(s, s->dac_index, s->dac_index + 1);
-            s->dac_index = (s->dac_index + 1) & 255; // Index autoincrement
+            s->b[index] = val >> 24;
+            update_palette_entries(s, index, index + 1);
+            s->dac_index = (s->dac_index + 1) & 255; /* Index autoincrement */
         default:
             s->dac_state = 0;
             break;
         }
         break;
-    default:
+    default: /* Control registers */
         break;
     }
 }
@@ -499,20 +654,267 @@ static const MemoryRegionOps tcx_dac_ops = {
     },
 };
 
-static uint64_t dummy_readl(void *opaque, hwaddr addr,
+static uint64_t tcx_stip_readl(void *opaque, hwaddr addr,
+                               unsigned size)
+{
+    return 0;
+}
+
+static void tcx_stip_writel(void *opaque, hwaddr addr,
+                            uint64_t val, unsigned size)
+{
+    TCXState *s = opaque;
+    int i;
+    uint32_t col;
+
+    if (!(addr & 4)) {
+        s->tmpblit = val;
+    } else {
+        addr = (addr / 8) & 0xFFFFF;
+        col = cpu_to_be32(s->tmpblit);
+        if (s->depth == 24) {
+            for (i = 0; i < 32; i++)  {
+                if (val & 0x80000000) {
+                    s->vram[addr + i] = s->tmpblit;
+                    s->vram24[addr + i] = col;
+                }
+                val <<= 1;
+            }
+        } else {
+            for (i = 0; i < 32; i++)  {
+                if (val & 0x80000000) {
+                    s->vram[addr + i] = s->tmpblit;
+                }
+                val <<= 1;
+            }
+        }
+        memory_region_set_dirty(&s->vram_mem, addr, 32);
+    }
+}
+
+static void tcx_rstip_writel(void *opaque, hwaddr addr,
+                             uint64_t val, unsigned size)
+{
+    TCXState *s = opaque;
+    int i;
+    uint32_t col;
+
+    if (!(addr & 4)) {
+        s->tmpblit = val;
+    } else {
+        addr = (addr / 8) & 0xFFFFF;
+        col = cpu_to_be32(s->tmpblit);
+        if (s->depth == 24) {
+            for (i = 0; i < 32; i++) {
+                if (val&0x80000000) {
+                    s->vram[addr + i] = s->tmpblit;
+                    s->vram24[addr + i] = col;
+                    s->cplane[addr + i] = col;
+                }
+                val <<= 1;
+            }
+        } else {
+            for (i = 0; i < 32; i++)  {
+                if (val&0x80000000) {
+                    s->vram[addr + i] = s->tmpblit;
+                }
+                val <<= 1;
+            }
+        }
+        memory_region_set_dirty(&s->vram_mem, addr, 32);
+    }
+}
+
+static const MemoryRegionOps tcx_stip_ops = {
+    .read = tcx_stip_readl,
+    .write = tcx_stip_writel,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static const MemoryRegionOps tcx_rstip_ops = {
+    .read = tcx_stip_readl,
+    .write = tcx_rstip_writel,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static uint64_t tcx_blit_readl(void *opaque, hwaddr addr,
+                               unsigned size)
+{
+    return 0;
+}
+
+static void tcx_blit_writel(void *opaque, hwaddr addr,
+                            uint64_t val, unsigned size)
+{
+    TCXState *s = opaque;
+    uint32_t adsr, len;
+    int i;
+
+    if (!(addr & 4)) {
+        s->tmpblit = val;
+    } else {
+        addr = (addr / 8) & 0xFFFFF;
+        adsr = val & 0xFFFFFF;
+        len = ((val >> 24) & 0x1F) + 1;
+        if (adsr == 0xFFFFFF) {
+            memset(&s->vram[addr], s->tmpblit, len);
+            if (s->depth == 24) {
+                val = s->tmpblit & 0xFFFFFF;
+                val = cpu_to_be32(val);
+                for (i = 0; i < len; i++) {
+                    s->vram24[addr + i] = val;
+                }
+            }
+        } else {
+            memcpy(&s->vram[addr], &s->vram[adsr], len);
+            if (s->depth == 24) {
+                memcpy(&s->vram24[addr], &s->vram24[adsr], len * 4);
+            }
+        }
+        memory_region_set_dirty(&s->vram_mem, addr, len);
+    }
+}
+
+static void tcx_rblit_writel(void *opaque, hwaddr addr,
+                         uint64_t val, unsigned size)
+{
+    TCXState *s = opaque;
+    uint32_t adsr, len;
+    int i;
+
+    if (!(addr & 4)) {
+        s->tmpblit = val;
+    } else {
+        addr = (addr / 8) & 0xFFFFF;
+        adsr = val & 0xFFFFFF;
+        len = ((val >> 24) & 0x1F) + 1;
+        if (adsr == 0xFFFFFF) {
+            memset(&s->vram[addr], s->tmpblit, len);
+            if (s->depth == 24) {
+                val = s->tmpblit & 0xFFFFFF;
+                val = cpu_to_be32(val);
+                for (i = 0; i < len; i++) {
+                    s->vram24[addr + i] = val;
+                    s->cplane[addr + i] = val;
+                }
+            }
+        } else {
+            memcpy(&s->vram[addr], &s->vram[adsr], len);
+            if (s->depth == 24) {
+                memcpy(&s->vram24[addr], &s->vram24[adsr], len * 4);
+                memcpy(&s->cplane[addr], &s->cplane[adsr], len * 4);
+            }
+        }
+        memory_region_set_dirty(&s->vram_mem, addr, len);
+    }
+}
+
+static const MemoryRegionOps tcx_blit_ops = {
+    .read = tcx_blit_readl,
+    .write = tcx_blit_writel,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static const MemoryRegionOps tcx_rblit_ops = {
+    .read = tcx_blit_readl,
+    .write = tcx_rblit_writel,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static void tcx_invalidate_cursor_position(TCXState *s)
+{
+    int ymin, ymax, start, end;
+
+    /* invalidate only near the cursor */
+    ymin = s->cursy;
+    if (ymin >= s->height) {
+        return;
+    }
+    ymax = MIN(s->height, ymin + 32);
+    start = ymin * 1024;
+    end   = ymax * 1024;
+
+    memory_region_set_dirty(&s->vram_mem, start, end-start);
+}
+
+static uint64_t tcx_thc_readl(void *opaque, hwaddr addr,
                             unsigned size)
 {
+    TCXState *s = opaque;
+    uint64_t val;
+
+    if (addr == TCX_THC_MISC) {
+        val = s->thcmisc | 0x02000000;
+    } else {
+        val = 0;
+    }
+    return val;
+}
+
+static void tcx_thc_writel(void *opaque, hwaddr addr,
+                         uint64_t val, unsigned size)
+{
+    TCXState *s = opaque;
+
+    if (addr == TCX_THC_CURSXY) {
+        tcx_invalidate_cursor_position(s);
+        s->cursx = val >> 16;
+        s->cursy = val;
+        tcx_invalidate_cursor_position(s);
+    } else if (addr >= TCX_THC_CURSMASK && addr < TCX_THC_CURSMASK + 128) {
+        s->cursmask[(addr - TCX_THC_CURSMASK) / 4] = val;
+        tcx_invalidate_cursor_position(s);
+    } else if (addr >= TCX_THC_CURSBITS && addr < TCX_THC_CURSBITS + 128) {
+        s->cursbits[(addr - TCX_THC_CURSBITS) / 4] = val;
+        tcx_invalidate_cursor_position(s);
+    } else if (addr == TCX_THC_MISC) {
+        s->thcmisc = val;
+    }
+
+}
+
+static const MemoryRegionOps tcx_thc_ops = {
+    .read = tcx_thc_readl,
+    .write = tcx_thc_writel,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static uint64_t tcx_dummy_readl(void *opaque, hwaddr addr,
+                            unsigned size)
+{
+    /* printf ("TCX Dummy (ALT,DHC,TEC) READ %X %i\n",(int)addr,(int)size);*/
     return 0;
 }
 
-static void dummy_writel(void *opaque, hwaddr addr,
+static void tcx_dummy_writel(void *opaque, hwaddr addr,
                          uint64_t val, unsigned size)
 {
+    /* printf ("TCX Dummy (ALT,DHC,TEC) WRITE %X %X\n",(int)addr,(int)val);*/
 }
 
-static const MemoryRegionOps dummy_ops = {
-    .read = dummy_readl,
-    .write = dummy_writel,
+static const MemoryRegionOps tcx_dummy_ops = {
+    .read = tcx_dummy_readl,
+    .write = tcx_dummy_writel,
     .endianness = DEVICE_NATIVE_ENDIAN,
     .valid = {
         .min_access_size = 4,
@@ -539,20 +941,50 @@ static void tcx_initfn(Object *obj)
     memory_region_set_readonly(&s->rom, true);
     sysbus_init_mmio(sbd, &s->rom);
 
-    /* DAC */
+    /*  2/STIP : Stippler */
+    memory_region_init_io(&s->stip, OBJECT(s), &tcx_stip_ops, s, "tcx.stip",
+                          TCX_STIP_NREGS);
+    sysbus_init_mmio(sbd, &s->stip);
+
+    /*  3/BLIT : Blitter */
+    memory_region_init_io(&s->blit, OBJECT(s), &tcx_blit_ops, s, "tcx.blit",
+                          TCX_BLIT_NREGS);
+    sysbus_init_mmio(sbd, &s->blit);
+    
+    /*  5/RSTIP : Raw Stippler */
+    memory_region_init_io(&s->rstip, OBJECT(s), &tcx_rstip_ops, s, "tcx.rstip",
+                          TCX_RSTIP_NREGS);
+    sysbus_init_mmio(sbd, &s->rstip);
+
+    /*  6/RBLIT : Raw Blitter */
+    memory_region_init_io(&s->rblit, OBJECT(s), &tcx_rblit_ops, s, "tcx.rblit",
+                          TCX_RBLIT_NREGS);
+    sysbus_init_mmio(sbd, &s->rblit);
+
+    /*  7/TEC : ??? */
+    memory_region_init_io(&s->tec, OBJECT(s), &tcx_dummy_ops, s,
+                          "tcx.tec", TCX_TEC_NREGS);
+    sysbus_init_mmio(sbd, &s->tec);
+
+    /*  8/CMAP : DAC */
     memory_region_init_io(&s->dac, OBJECT(s), &tcx_dac_ops, s,
                           "tcx.dac", TCX_DAC_NREGS);
     sysbus_init_mmio(sbd, &s->dac);
 
-    /* TEC (dummy) */
-    memory_region_init_io(&s->tec, OBJECT(s), &dummy_ops, s,
-                          "tcx.tec", TCX_TEC_NREGS);
-    sysbus_init_mmio(sbd, &s->tec);
+    /*  9/THC : Cursor */
+    memory_region_init_io(&s->thc, OBJECT(s), &tcx_thc_ops, s, "tcx.thc",
+                          TCX_THC_NREGS);
+    sysbus_init_mmio(sbd, &s->thc);
 
-    /* THC: NetBSD writes here even with 8-bit display: dummy */
-    memory_region_init_io(&s->thc24, OBJECT(s), &dummy_ops, s, "tcx.thc24",
-                          TCX_THC_NREGS_24);
-    sysbus_init_mmio(sbd, &s->thc24);
+    /* 11/DHC : ??? */
+    memory_region_init_io(&s->dhc, OBJECT(s), &tcx_dummy_ops, s, "tcx.dhc",
+                          TCX_DHC_NREGS);
+    sysbus_init_mmio(sbd, &s->dhc);
+
+    /* 12/ALT : ??? */
+    memory_region_init_io(&s->alt, OBJECT(s), &tcx_dummy_ops, s, "tcx.alt",
+                          TCX_ALT_NREGS);
+    sysbus_init_mmio(sbd, &s->alt);
 
     return;
 }
@@ -571,7 +1003,7 @@ static void tcx_realizefn(DeviceState *dev, Error **errp)
     vmstate_register_ram_global(&s->vram_mem);
     vram_base = memory_region_get_ram_ptr(&s->vram_mem);
 
-    /* FCode ROM */
+    /* 10/ROM   : FCode ROM */
     vmstate_register_ram_global(&s->rom);
     fcode_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, TCX_ROM_FILE);
     if (fcode_filename) {
@@ -582,7 +1014,7 @@ static void tcx_realizefn(DeviceState *dev, Error **errp)
         }
     }
 
-    /* 8-bit plane */
+    /*  0/DFB8  : 8-bit plane */
     s->vram = vram_base;
     size = s->vram_size;
     memory_region_init_alias(&s->vram_8bit, OBJECT(s), "tcx.vram.8bit",
@@ -591,34 +1023,37 @@ static void tcx_realizefn(DeviceState *dev, Error **errp)
     vram_offset += size;
     vram_base += size;
 
-    if (s->depth == 24) {
-        /* 24-bit plane */
-        size = s->vram_size * 4;
-        s->vram24 = (uint32_t *)vram_base;
-        s->vram24_offset = vram_offset;
-        memory_region_init_alias(&s->vram_24bit, OBJECT(s), "tcx.vram.24bit",
-                                 &s->vram_mem, vram_offset, size);
-        sysbus_init_mmio(sbd, &s->vram_24bit);
-        vram_offset += size;
-        vram_base += size;
-
-        /* Control plane */
-        size = s->vram_size * 4;
-        s->cplane = (uint32_t *)vram_base;
-        s->cplane_offset = vram_offset;
-        memory_region_init_alias(&s->vram_cplane, OBJECT(s), "tcx.vram.cplane",
-                                 &s->vram_mem, vram_offset, size);
-        sysbus_init_mmio(sbd, &s->vram_cplane);
+    /*  1/DFB24 : 24bit plane */
+    size = s->vram_size * 4;
+    s->vram24 = (uint32_t *)vram_base;
+    s->vram24_offset = vram_offset;
+    memory_region_init_alias(&s->vram_24bit, OBJECT(s), "tcx.vram.24bit",
+                             &s->vram_mem, vram_offset, size);
+    sysbus_init_mmio(sbd, &s->vram_24bit);
+    vram_offset += size;
+    vram_base += size;
 
-        s->con = graphic_console_init(DEVICE(dev), 0, &tcx24_ops, s);
-    } else {
-        /* THC 8 bit (dummy) */
-        memory_region_init_io(&s->thc8, OBJECT(s), &dummy_ops, s, "tcx.thc8",
-                              TCX_THC_NREGS_8);
-        sysbus_init_mmio(sbd, &s->thc8);
+    /*  4/RDFB32 : Raw Framebuffer */
+    size = s->vram_size * 4;
+    s->cplane = (uint32_t *)vram_base;
+    s->cplane_offset = vram_offset;
+    memory_region_init_alias(&s->vram_cplane, OBJECT(s), "tcx.vram.cplane",
+                             &s->vram_mem, vram_offset, size);
+    sysbus_init_mmio(sbd, &s->vram_cplane);
 
+    /* 9/THC24bits : NetBSD writes here even with 8-bit display: dummy */
+    if (s->depth == 8) {
+        memory_region_init_io(&s->thc24, OBJECT(s), &tcx_dummy_ops, s,
+                              "tcx.thc24", TCX_THC_NREGS);
+        sysbus_init_mmio(sbd, &s->thc24);
+    }
+
+    if (s->depth == 8) {
         s->con = graphic_console_init(DEVICE(dev), 0, &tcx_ops, s);
+    } else {
+        s->con = graphic_console_init(DEVICE(dev), 0, &tcx24_ops, s);
     }
+    s->thcmisc = 0;
 
     qemu_console_resize(s->con, s->width, s->height);
 }
diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c
index 67e3663..386da82 100644
--- a/hw/sparc/sun4m.c
+++ b/hw/sparc/sun4m.c
@@ -541,24 +541,40 @@ static void tcx_init(hwaddr addr, int vram_size, int width,
     qdev_prop_set_uint64(dev, "prom_addr", addr);
     qdev_init_nofail(dev);
     s = SYS_BUS_DEVICE(dev);
-    /* FCode ROM */
+
+    /* 10/ROM   : FCode ROM */
     sysbus_mmio_map(s, 0, addr);
-    /* DAC */
-    sysbus_mmio_map(s, 1, addr + 0x00200000ULL);
-    /* TEC (dummy) */
-    sysbus_mmio_map(s, 2, addr + 0x00700000ULL);
-    /* THC 24 bit: NetBSD writes here even with 8-bit display: dummy */
-    sysbus_mmio_map(s, 3, addr + 0x00301000ULL);
-    /* 8-bit plane */
-    sysbus_mmio_map(s, 4, addr + 0x00800000ULL);
-    if (depth == 24) {
-        /* 24-bit plane */
-        sysbus_mmio_map(s, 5, addr + 0x02000000ULL);
-        /* Control plane */
-        sysbus_mmio_map(s, 6, addr + 0x0a000000ULL);
+    /*  2/STIP  : Stipple */
+    sysbus_mmio_map(s, 1, addr + 0x04000000ULL);
+    /*  3/BLIT  : Blitter */
+    sysbus_mmio_map(s, 2, addr + 0x06000000ULL);
+    /*  5/RSTIP : Raw Stipple */
+    sysbus_mmio_map(s, 3, addr + 0x0C000000ULL);
+    /*  6/RBLIT : Raw Blitter */
+    sysbus_mmio_map(s, 4, addr + 0x0E000000ULL);
+    /*  7/TEC   : Transform Engine */
+    sysbus_mmio_map(s, 5, addr + 0x00700000ULL);    
+    /*  8/CMAP  : DAC */
+    sysbus_mmio_map(s, 6, addr + 0x00200000ULL);
+    /*  9/THC   : */
+    if (depth == 8) {
+        sysbus_mmio_map(s, 7, addr + 0x00300000ULL);
     } else {
-        /* THC 8 bit (dummy) */
-        sysbus_mmio_map(s, 5, addr + 0x00300000ULL);
+        sysbus_mmio_map(s, 7, addr + 0x00301000ULL);
+    }    
+    /* 11/DHC   : */
+    sysbus_mmio_map(s, 8, addr + 0x00240000ULL);
+    /* 12/ALT   : */
+    sysbus_mmio_map(s, 9, addr + 0x00280000ULL);
+    /*  0/DFB8  : 8-bit plane */
+    sysbus_mmio_map(s, 10, addr + 0x00800000ULL);
+    /*  1/DFB24 : 24bit plane */
+    sysbus_mmio_map(s, 11, addr + 0x02000000ULL);
+    /*  4/RDFB32: Raw framebuffer. Control plane */
+    sysbus_mmio_map(s, 12, addr + 0x0A000000ULL);    
+    /* 9/THC24bits : NetBSD writes here even with 8-bit display: dummy */
+    if (depth == 8) {
+        sysbus_mmio_map(s, 13, addr + 0x00301000ULL);
     }
 }
 

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [Qemu-devel] [PATCH 1/1] Sun4m : TCX framebuffer hardware acceleration
  2014-07-08 23:40         ` Mark Cave-Ayland
@ 2014-07-12 22:23           ` Olivier Danet
  0 siblings, 0 replies; 6+ messages in thread
From: Olivier Danet @ 2014-07-12 22:23 UTC (permalink / raw)
  To: Mark Cave-Ayland, qemu-devel, Blue Swirl, Artyom Tarasenko

On 09/07/2014 01:40, Mark Cave-Ayland wrote:
> On 29/05/14 20:48, Olivier Danet wrote:
> 
>> Hello Mark
>>
>> - Don't you like green ?
>> It looks fine for me : http://temlib.org/pub/boot_netbsd6.jpg
>>
>> - "checkpatch.pl" did not find anything wrong with this patch. I will adjust style and spacings anyway.
>>
>> - AFAIU, it is impossible to implement exactly this video board on QEMU with reasonable performance.
>>
>> The S24/TCX has a 1Meg * 26 bits framebuffer.
>>
>> For each pixel, two bits are used for selecting between 256 indexed and 24bits truecolor.
>> The RAMDAC/palette handles this selection, as well as an overlay plane for the cursor
>> (with 4 additional colours).
>> This enable handling different "visuals" for each X11 window. Text mode is also 8bits indexed
>> even on 24bits mode.
>>
>> The memory is simultaneously accessible at several addresses, with different alignment :
>>
>>      RDFB32: Each pixel occupies 32bits. D[31:26]=0000_00, D[25:24]=MODE, D[23:0]=Colour
>>      DFB24 : Each pixel occupies 32bits. D[31:24]=0000_0000, D[23:0]=Colour
>>      DFB8  : Each pixel occupies 8bits, mapped to D[7:0] of the framebuffer.
>>
>>      MODE=D[25:24] (-> datasheet ATT20C567)
>>        00 : 256 colours "pseudocolor"     : R=pal_r(D[ 7: 0]) G=pal_g(D[ 7:0]) B=pal_b(D[ 7:0])
>>        01 : 16M colours "directcolor"     : R=pal_r(D[ 7: 0]) G=pal_g(D[15:8]) B=pal_b(D[23:16])
>>        10 : 16M colours "truecolor" gamma : R=gamma(D[ 7: 0]) G=gamma(D[15:8]) B=gamma(D[23:16])
>>        11 : 16M colours "truecolor" raw   : R=      D[ 7: 0]  G=      D[15:8]  B=      D[23:16]
>>
>>   QEMU cannot directly imitate this behavior, so :
>>   - The RDFB32 area is only used for the MODE bits
>>   - 8  colours applications are expected to write only in the DFB8 area
>>   - 24 colours applications are expected to write only in the DFB24 area
>>   - the blitter and stippler accelerators update all areas simultaneously in 24bits mode
>>   - As the 24bits mode is simultaneously both a 8bits and 24bits mode, dirtying the (smaller) 8bits area
>> is sufficient.
>>
>> Olivier
> 
> Hi Olivier,
> 
> So sorry for the delay on this - I just realised replying to another email earlier today that I'd totally forgotten about this one :/
> 
> Please find attached what I believe is your patch rebased onto git master, which when applied to my local repository (and using your binary QEMU,tcx.bin from http://temlib.org/pub/QEMU,tcx.bin) gives me the green text when booting NetBSD 6.1.3 as can be seen at http://www.ilande.co.uk/tmp/netbsd.png.
> 
> Can you double-check and make sure that I haven't accidentally broken anything during the rebase? It required quite a bit of work to fix up by hand.
> 
> 
> ATB,
> 
> Mark.
> 

Thank you Mark for updating this patch.

I found no regression, acceleration seems to work both in 8bits and 24bits modes for NetBSD and Solaris.

As [barely] visible in this photo, text should be green until the "root file system type: ffs" sentence
is displayed : http://temlib.org/pub/boot_netbsd6.jpg

The modified OpenBIOS QEMU,tcx.bin file is necessary because of several quirks :
- Change a few addresses to match actual hardware.
- Remove the "address" property from TCX, because of some almost-bug in NetBSD when detecting framebuffers.
(actual TCX has no address property, so it works by chance on real hardware)
- Add the hardware cursor properties
(see http://temlib.org/pub/openbios_tcx.diff)

The hardware cursor is the only part that should concern Linux (which uses Xorg drivers).

Regards
Olivier

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2014-07-12 22:19 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-02-16 23:15 [Qemu-devel] [PATCH 1/1] Sun4m : TCX framebuffer hardware acceleration Olivier Danet
2014-05-25  9:50 ` Mark Cave-Ayland
2014-05-25 13:20   ` Olivier Danet
2014-05-25 20:28     ` Mark Cave-Ayland
     [not found]       ` <53878F06.5030902@caramail.com>
2014-07-08 23:40         ` Mark Cave-Ayland
2014-07-12 22:23           ` Olivier Danet

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).