* [Qemu-devel] [PATCH 0/3] ui/vnc: update optimizations
@ 2013-11-18 8:17 Peter Lieven
2013-11-18 8:17 ` [Qemu-devel] [PATCH 1/3] ui/vnc: introduce VNC_DIRTY_PIXELS_PER_BIT macro Peter Lieven
` (2 more replies)
0 siblings, 3 replies; 8+ messages in thread
From: Peter Lieven @ 2013-11-18 8:17 UTC (permalink / raw)
To: qemu-devel; +Cc: corentincj, aliguori, Peter Lieven
this series includes 2 optimizations for the ui/vnc guest to server and server to client
update cycles. comments/reviews appreciated.
Peter
Peter Lieven (3):
ui/vnc: introduce VNC_DIRTY_PIXELS_PER_BIT macro
ui/vnc: optimize dirty bitmap tracking
ui/vnc: disable adaptive update calculations if not needed
ui/vnc.c | 172 +++++++++++++++++++++++++++++++++++++++++---------------------
ui/vnc.h | 9 +++-
2 files changed, 121 insertions(+), 60 deletions(-)
--
1.7.9.5
^ permalink raw reply [flat|nested] 8+ messages in thread
* [Qemu-devel] [PATCH 1/3] ui/vnc: introduce VNC_DIRTY_PIXELS_PER_BIT macro
2013-11-18 8:17 [Qemu-devel] [PATCH 0/3] ui/vnc: update optimizations Peter Lieven
@ 2013-11-18 8:17 ` Peter Lieven
2013-11-18 8:17 ` [Qemu-devel] [PATCH 2/3] ui/vnc: optimize dirty bitmap tracking Peter Lieven
2013-11-18 8:17 ` [Qemu-devel] [PATCH 3/3] ui/vnc: disable adaptive update calculations if not needed Peter Lieven
2 siblings, 0 replies; 8+ messages in thread
From: Peter Lieven @ 2013-11-18 8:17 UTC (permalink / raw)
To: qemu-devel; +Cc: corentincj, aliguori, Peter Lieven
Signed-off-by: Peter Lieven <pl@kamp.de>
---
ui/vnc.c | 55 ++++++++++++++++++++++++++++++++++---------------------
ui/vnc.h | 6 +++++-
2 files changed, 39 insertions(+), 22 deletions(-)
diff --git a/ui/vnc.c b/ui/vnc.c
index 5601cc3..67b1f75 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -442,17 +442,19 @@ static void vnc_dpy_update(DisplayChangeListener *dcl,
iteration. otherwise, if (x % 16) != 0, the last iteration may span
two 16-pixel blocks but we only mark the first as dirty
*/
- w += (x % 16);
- x -= (x % 16);
+ w += (x % VNC_DIRTY_PIXELS_PER_BIT);
+ x -= (x % VNC_DIRTY_PIXELS_PER_BIT);
x = MIN(x, width);
y = MIN(y, height);
w = MIN(x + w, width) - x;
h = MIN(h, height);
- for (; y < h; y++)
- for (i = 0; i < w; i += 16)
- set_bit((x + i) / 16, s->dirty[y]);
+ for (; y < h; y++) {
+ for (i = 0; i < w; i += VNC_DIRTY_PIXELS_PER_BIT) {
+ set_bit((x + i) / VNC_DIRTY_PIXELS_PER_BIT, s->dirty[y]);
+ }
+ }
}
void vnc_framebuffer_update(VncState *vs, int x, int y, int w, int h,
@@ -769,11 +771,11 @@ static void vnc_dpy_copy(DisplayChangeListener *dcl,
y = dst_y + h - 1;
inc = -1;
}
- w_lim = w - (16 - (dst_x % 16));
+ w_lim = w - (VNC_DIRTY_PIXELS_PER_BIT - (dst_x % VNC_DIRTY_PIXELS_PER_BIT));
if (w_lim < 0)
w_lim = w;
else
- w_lim = w - (w_lim % 16);
+ w_lim = w - (w_lim % VNC_DIRTY_PIXELS_PER_BIT);
for (i = 0; i < h; i++) {
for (x = 0; x <= w_lim;
x += s, src_row += cmp_bytes, dst_row += cmp_bytes) {
@@ -781,10 +783,10 @@ static void vnc_dpy_copy(DisplayChangeListener *dcl,
if ((s = w - w_lim) == 0)
break;
} else if (!x) {
- s = (16 - (dst_x % 16));
+ s = (16 - (dst_x % VNC_DIRTY_PIXELS_PER_BIT));
s = MIN(s, w_lim);
} else {
- s = 16;
+ s = VNC_DIRTY_PIXELS_PER_BIT;
}
cmp_bytes = s * VNC_SERVER_FB_BYTES;
if (memcmp(src_row, dst_row, cmp_bytes) == 0)
@@ -911,7 +913,7 @@ static int vnc_update_client(VncState *vs, int has_dirty)
for (y = 0; y < height; y++) {
int x;
int last_x = -1;
- for (x = 0; x < width / 16; x++) {
+ for (x = 0; x < width / VNC_DIRTY_PIXELS_PER_BIT; x++) {
if (test_and_clear_bit(x, vs->dirty[y])) {
if (last_x == -1) {
last_x = x;
@@ -921,16 +923,21 @@ static int vnc_update_client(VncState *vs, int has_dirty)
int h = find_and_clear_dirty_height(vs, y, last_x, x,
height);
- n += vnc_job_add_rect(job, last_x * 16, y,
- (x - last_x) * 16, h);
+ n += vnc_job_add_rect(job,
+ last_x * VNC_DIRTY_PIXELS_PER_BIT,
+ y,
+ (x - last_x) * VNC_DIRTY_PIXELS_PER_BIT,
+ h);
}
last_x = -1;
}
}
if (last_x != -1) {
int h = find_and_clear_dirty_height(vs, y, last_x, x, height);
- n += vnc_job_add_rect(job, last_x * 16, y,
- (x - last_x) * 16, h);
+ n += vnc_job_add_rect(job, last_x * VNC_DIRTY_PIXELS_PER_BIT,
+ y,
+ (x - last_x) * VNC_DIRTY_PIXELS_PER_BIT,
+ h);
}
}
@@ -1861,7 +1868,7 @@ static void framebuffer_update_request(VncState *vs, int incremental,
int w, int h)
{
int i;
- const size_t width = surface_width(vs->vd->ds) / 16;
+ const size_t width = surface_width(vs->vd->ds) / VNC_DIRTY_PIXELS_PER_BIT;
const size_t height = surface_height(vs->vd->ds);
if (y_position > height) {
@@ -2563,7 +2570,9 @@ static int vnc_refresh_lossy_rect(VncDisplay *vd, int x, int y)
vs->lossy_rect[sty][stx] = 0;
for (j = 0; j < VNC_STAT_RECT; ++j) {
- bitmap_set(vs->dirty[y + j], x / 16, VNC_STAT_RECT / 16);
+ bitmap_set(vs->dirty[y + j],
+ x / VNC_DIRTY_PIXELS_PER_BIT,
+ VNC_STAT_RECT / VNC_DIRTY_PIXELS_PER_BIT);
}
has_dirty++;
}
@@ -2710,17 +2719,21 @@ static int vnc_refresh_server_surface(VncDisplay *vd)
}
server_ptr = server_row;
- for (x = 0; x + 15 < width;
- x += 16, guest_ptr += cmp_bytes, server_ptr += cmp_bytes) {
- if (!test_and_clear_bit((x / 16), vd->guest.dirty[y]))
+ for (x = 0; x + VNC_DIRTY_PIXELS_PER_BIT - 1 < width;
+ x += VNC_DIRTY_PIXELS_PER_BIT, guest_ptr += cmp_bytes,
+ server_ptr += cmp_bytes) {
+ if (!test_and_clear_bit((x / VNC_DIRTY_PIXELS_PER_BIT),
+ vd->guest.dirty[y])) {
continue;
- if (memcmp(server_ptr, guest_ptr, cmp_bytes) == 0)
+ }
+ if (memcmp(server_ptr, guest_ptr, cmp_bytes) == 0) {
continue;
+ }
memcpy(server_ptr, guest_ptr, cmp_bytes);
if (!vd->non_adaptive)
vnc_rect_updated(vd, x, y, &tv);
QTAILQ_FOREACH(vs, &vd->clients, next) {
- set_bit((x / 16), vs->dirty[y]);
+ set_bit((x / VNC_DIRTY_PIXELS_PER_BIT), vs->dirty[y]);
}
has_dirty++;
}
diff --git a/ui/vnc.h b/ui/vnc.h
index 6e99213..4a8f33c 100644
--- a/ui/vnc.h
+++ b/ui/vnc.h
@@ -81,8 +81,12 @@ typedef void VncSendHextileTile(VncState *vs,
#define VNC_MAX_WIDTH 2560
#define VNC_MAX_HEIGHT 2048
+/* VNC_DIRTY_PIXELS_PER_BIT is the number of dirty pixels represented
+ * by one bit in the dirty bitmap */
+#define VNC_DIRTY_PIXELS_PER_BIT 16
+
/* VNC_DIRTY_BITS is the number of bits in the dirty bitmap. */
-#define VNC_DIRTY_BITS (VNC_MAX_WIDTH / 16)
+#define VNC_DIRTY_BITS (VNC_MAX_WIDTH / VNC_DIRTY_PIXELS_PER_BIT)
#define VNC_STAT_RECT 64
#define VNC_STAT_COLS (VNC_MAX_WIDTH / VNC_STAT_RECT)
--
1.7.9.5
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [Qemu-devel] [PATCH 2/3] ui/vnc: optimize dirty bitmap tracking
2013-11-18 8:17 [Qemu-devel] [PATCH 0/3] ui/vnc: update optimizations Peter Lieven
2013-11-18 8:17 ` [Qemu-devel] [PATCH 1/3] ui/vnc: introduce VNC_DIRTY_PIXELS_PER_BIT macro Peter Lieven
@ 2013-11-18 8:17 ` Peter Lieven
2013-11-18 16:27 ` Anthony Liguori
2013-11-18 8:17 ` [Qemu-devel] [PATCH 3/3] ui/vnc: disable adaptive update calculations if not needed Peter Lieven
2 siblings, 1 reply; 8+ messages in thread
From: Peter Lieven @ 2013-11-18 8:17 UTC (permalink / raw)
To: qemu-devel; +Cc: corentincj, aliguori, Peter Lieven
vnc_update_client currently scans the dirty bitmap of each client
bitwise which is a very costly operation if only few bits are dirty.
vnc_refresh_server_surface does almost the same.
this patch optimizes both by utilizing the heavily optimized
function find_next_bit to find the offset of the next dirty
bit in the dirty bitmaps.
Signed-off-by: Peter Lieven <pl@kamp.de>
---
ui/vnc.c | 146 ++++++++++++++++++++++++++++++++++++++------------------------
ui/vnc.h | 3 ++
2 files changed, 92 insertions(+), 57 deletions(-)
diff --git a/ui/vnc.c b/ui/vnc.c
index 67b1f75..edf33be 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -572,6 +572,16 @@ void *vnc_server_fb_ptr(VncDisplay *vd, int x, int y)
ptr += x * VNC_SERVER_FB_BYTES;
return ptr;
}
+/* this sets only the visible pixels of a dirty bitmap */
+#define VNC_SET_VISIBLE_PIXELS_DIRTY(bitmap, w, h) {\
+ int x, y;\
+ memset(bitmap, 0x00, sizeof(bitmap));\
+ for (y = 0; y < h; y++) {\
+ for (x = 0; x < w / VNC_DIRTY_PIXELS_PER_BIT; x++) {\
+ set_bit(x, bitmap[y]);\
+ } \
+ } \
+ }
static void vnc_dpy_switch(DisplayChangeListener *dcl,
DisplaySurface *surface)
@@ -597,7 +607,9 @@ static void vnc_dpy_switch(DisplayChangeListener *dcl,
qemu_pixman_image_unref(vd->guest.fb);
vd->guest.fb = pixman_image_ref(surface->image);
vd->guest.format = surface->format;
- memset(vd->guest.dirty, 0xFF, sizeof(vd->guest.dirty));
+ VNC_SET_VISIBLE_PIXELS_DIRTY(vd->guest.dirty,
+ surface_width(vd->ds),
+ surface_height(vd->ds));
QTAILQ_FOREACH(vs, &vd->clients, next) {
vnc_colordepth(vs);
@@ -605,7 +617,9 @@ static void vnc_dpy_switch(DisplayChangeListener *dcl,
if (vs->vd->cursor) {
vnc_cursor_define(vs);
}
- memset(vs->dirty, 0xFF, sizeof(vs->dirty));
+ VNC_SET_VISIBLE_PIXELS_DIRTY(vs->dirty,
+ surface_width(vd->ds),
+ surface_height(vd->ds));
}
}
@@ -882,6 +896,14 @@ static int vnc_update_client_sync(VncState *vs, int has_dirty)
return ret;
}
+#define VNC_CLIENT_UPDATE_RECT() \
+ if (last_x != -1) {\
+ int h = find_and_clear_dirty_height(vs, y, last_x, x, height);\
+ n += vnc_job_add_rect(job,\
+ last_x * VNC_DIRTY_PIXELS_PER_BIT, y,\
+ (x - last_x) * VNC_DIRTY_PIXELS_PER_BIT, h);\
+ }
+
static int vnc_update_client(VncState *vs, int has_dirty)
{
if (vs->need_update && vs->csock != -1) {
@@ -910,35 +932,32 @@ static int vnc_update_client(VncState *vs, int has_dirty)
width = MIN(pixman_image_get_width(vd->server), vs->client_width);
height = MIN(pixman_image_get_height(vd->server), vs->client_height);
- for (y = 0; y < height; y++) {
+ y = 0;
+ for (;;) {
int x;
int last_x = -1;
- for (x = 0; x < width / VNC_DIRTY_PIXELS_PER_BIT; x++) {
+ unsigned long offset = find_next_bit((unsigned long *) &vs->dirty,
+ height * VNC_DIRTY_BITS_PER_LINE(vs),
+ y * VNC_DIRTY_BITS_PER_LINE(vs));
+ if (offset == height * VNC_DIRTY_BITS_PER_LINE(vs)) {
+ /* no more dirty bits */
+ break;
+ }
+ y = offset / VNC_DIRTY_BITS_PER_LINE(vs);
+
+ for (x = offset % VNC_DIRTY_BITS_PER_LINE(vs);
+ x < width / VNC_DIRTY_PIXELS_PER_BIT; x++) {
if (test_and_clear_bit(x, vs->dirty[y])) {
if (last_x == -1) {
last_x = x;
}
} else {
- if (last_x != -1) {
- int h = find_and_clear_dirty_height(vs, y, last_x, x,
- height);
-
- n += vnc_job_add_rect(job,
- last_x * VNC_DIRTY_PIXELS_PER_BIT,
- y,
- (x - last_x) * VNC_DIRTY_PIXELS_PER_BIT,
- h);
- }
+ VNC_CLIENT_UPDATE_RECT();
last_x = -1;
}
}
- if (last_x != -1) {
- int h = find_and_clear_dirty_height(vs, y, last_x, x, height);
- n += vnc_job_add_rect(job, last_x * VNC_DIRTY_PIXELS_PER_BIT,
- y,
- (x - last_x) * VNC_DIRTY_PIXELS_PER_BIT,
- h);
- }
+ VNC_CLIENT_UPDATE_RECT();
+ y++;
}
vnc_job_push(job);
@@ -2676,8 +2695,8 @@ static int vnc_refresh_server_surface(VncDisplay *vd)
int width = pixman_image_get_width(vd->guest.fb);
int height = pixman_image_get_height(vd->guest.fb);
int y;
- uint8_t *guest_row;
- uint8_t *server_row;
+ uint8_t *guest_row0 = NULL, *server_row0;
+ int guest_stride, server_stride;
int cmp_bytes;
VncState *vs;
int has_dirty = 0;
@@ -2702,44 +2721,57 @@ static int vnc_refresh_server_surface(VncDisplay *vd)
if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
int width = pixman_image_get_width(vd->server);
tmpbuf = qemu_pixman_linebuf_create(VNC_SERVER_FB_FORMAT, width);
- }
- guest_row = (uint8_t *)pixman_image_get_data(vd->guest.fb);
- server_row = (uint8_t *)pixman_image_get_data(vd->server);
- for (y = 0; y < height; y++) {
- if (!bitmap_empty(vd->guest.dirty[y], VNC_DIRTY_BITS)) {
- int x;
- uint8_t *guest_ptr;
- uint8_t *server_ptr;
+ } else {
+ guest_row0 = (uint8_t *)pixman_image_get_data(vd->guest.fb);
+ }
+ server_row0 = (uint8_t *)pixman_image_get_data(vd->server);
+ guest_stride = pixman_image_get_stride(vd->guest.fb);
+ server_stride = pixman_image_get_stride(vd->server);
+
+ y = 0;
+ for (;;) {
+ int x;
+ uint8_t *guest_ptr, *server_ptr;
+ unsigned long offset = find_next_bit((unsigned long *) &vd->guest.dirty,
+ height * VNC_DIRTY_BITS_PER_LINE(&vd->guest),
+ y * VNC_DIRTY_BITS_PER_LINE(&vd->guest));
+ if (offset == height * VNC_DIRTY_BITS_PER_LINE(&vd->guest)) {
+ /* no more dirty bits */
+ break;
+ }
+ y = offset / VNC_DIRTY_BITS_PER_LINE(&vd->guest);
- if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
- qemu_pixman_linebuf_fill(tmpbuf, vd->guest.fb, width, 0, y);
- guest_ptr = (uint8_t *)pixman_image_get_data(tmpbuf);
- } else {
- guest_ptr = guest_row;
- }
- server_ptr = server_row;
+ server_ptr = server_row0 + y * server_stride;
- for (x = 0; x + VNC_DIRTY_PIXELS_PER_BIT - 1 < width;
- x += VNC_DIRTY_PIXELS_PER_BIT, guest_ptr += cmp_bytes,
- server_ptr += cmp_bytes) {
- if (!test_and_clear_bit((x / VNC_DIRTY_PIXELS_PER_BIT),
- vd->guest.dirty[y])) {
- continue;
- }
- if (memcmp(server_ptr, guest_ptr, cmp_bytes) == 0) {
- continue;
- }
- memcpy(server_ptr, guest_ptr, cmp_bytes);
- if (!vd->non_adaptive)
- vnc_rect_updated(vd, x, y, &tv);
- QTAILQ_FOREACH(vs, &vd->clients, next) {
- set_bit((x / VNC_DIRTY_PIXELS_PER_BIT), vs->dirty[y]);
- }
- has_dirty++;
+ if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
+ qemu_pixman_linebuf_fill(tmpbuf, vd->guest.fb, width, 0, y);
+ guest_ptr = (uint8_t *)pixman_image_get_data(tmpbuf);
+ } else {
+ guest_ptr = guest_row0 + y * guest_stride;
+ }
+
+ for (x = offset % VNC_DIRTY_BITS_PER_LINE(&vd->guest);
+ x + VNC_DIRTY_PIXELS_PER_BIT - 1 < width;
+ x += VNC_DIRTY_PIXELS_PER_BIT, guest_ptr += cmp_bytes,
+ server_ptr += cmp_bytes) {
+ if (!test_and_clear_bit((x / VNC_DIRTY_PIXELS_PER_BIT),
+ vd->guest.dirty[y])) {
+ continue;
+ }
+ if (memcmp(server_ptr, guest_ptr, cmp_bytes) == 0) {
+ continue;
+ }
+ memcpy(server_ptr, guest_ptr, cmp_bytes);
+ if (!vd->non_adaptive) {
+ vnc_rect_updated(vd, x, y, &tv);
}
+ QTAILQ_FOREACH(vs, &vd->clients, next) {
+ set_bit((x / VNC_DIRTY_PIXELS_PER_BIT), vs->dirty[y]);
+ }
+ has_dirty++;
}
- guest_row += pixman_image_get_stride(vd->guest.fb);
- server_row += pixman_image_get_stride(vd->server);
+
+ y++;
}
qemu_pixman_image_unref(tmpbuf);
return has_dirty;
diff --git a/ui/vnc.h b/ui/vnc.h
index 4a8f33c..82c8ea8 100644
--- a/ui/vnc.h
+++ b/ui/vnc.h
@@ -88,6 +88,9 @@ typedef void VncSendHextileTile(VncState *vs,
/* VNC_DIRTY_BITS is the number of bits in the dirty bitmap. */
#define VNC_DIRTY_BITS (VNC_MAX_WIDTH / VNC_DIRTY_PIXELS_PER_BIT)
+/* VNC_DIRTY_BITS_PER_LINE might be greater than VNC_DIRTY_BITS due to alignment */
+#define VNC_DIRTY_BITS_PER_LINE(x) (sizeof((x)->dirty) / VNC_MAX_HEIGHT * BITS_PER_BYTE)
+
#define VNC_STAT_RECT 64
#define VNC_STAT_COLS (VNC_MAX_WIDTH / VNC_STAT_RECT)
#define VNC_STAT_ROWS (VNC_MAX_HEIGHT / VNC_STAT_RECT)
--
1.7.9.5
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [Qemu-devel] [PATCH 3/3] ui/vnc: disable adaptive update calculations if not needed
2013-11-18 8:17 [Qemu-devel] [PATCH 0/3] ui/vnc: update optimizations Peter Lieven
2013-11-18 8:17 ` [Qemu-devel] [PATCH 1/3] ui/vnc: introduce VNC_DIRTY_PIXELS_PER_BIT macro Peter Lieven
2013-11-18 8:17 ` [Qemu-devel] [PATCH 2/3] ui/vnc: optimize dirty bitmap tracking Peter Lieven
@ 2013-11-18 8:17 ` Peter Lieven
2 siblings, 0 replies; 8+ messages in thread
From: Peter Lieven @ 2013-11-18 8:17 UTC (permalink / raw)
To: qemu-devel; +Cc: corentincj, aliguori, Peter Lieven
Signed-off-by: Peter Lieven <pl@kamp.de>
---
ui/vnc.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/ui/vnc.c b/ui/vnc.c
index edf33be..6683ae9 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -3194,7 +3194,9 @@ void vnc_display_open(DisplayState *ds, const char *display, Error **errp)
acl = 1;
#endif
} else if (strncmp(options, "lossy", 5) == 0) {
+#ifdef CONFIG_VNC_JPEG
vs->lossy = true;
+#endif
} else if (strncmp(options, "non-adaptive", 12) == 0) {
vs->non_adaptive = true;
} else if (strncmp(options, "share=", 6) == 0) {
@@ -3211,6 +3213,13 @@ void vnc_display_open(DisplayState *ds, const char *display, Error **errp)
}
}
+ /* adaptive updates are only used with tight encoding and
+ * if lossy updates are enabled so we can disable all the
+ * calculations otherwise */
+ if (!vs->lossy) {
+ vs->non_adaptive = true;
+ }
+
#ifdef CONFIG_VNC_TLS
if (acl && x509 && vs->tls.x509verify) {
if (!(vs->tls.acl = qemu_acl_init("vnc.x509dname"))) {
--
1.7.9.5
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [Qemu-devel] [PATCH 2/3] ui/vnc: optimize dirty bitmap tracking
2013-11-18 8:17 ` [Qemu-devel] [PATCH 2/3] ui/vnc: optimize dirty bitmap tracking Peter Lieven
@ 2013-11-18 16:27 ` Anthony Liguori
2013-11-18 19:55 ` Peter Lieven
2013-11-19 13:48 ` Peter Lieven
0 siblings, 2 replies; 8+ messages in thread
From: Anthony Liguori @ 2013-11-18 16:27 UTC (permalink / raw)
To: Peter Lieven; +Cc: corentincj, Anthony Liguori, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 10501 bytes --]
On Nov 18, 2013 12:20 AM, "Peter Lieven" <pl@kamp.de> wrote:
>
> vnc_update_client currently scans the dirty bitmap of each client
> bitwise which is a very costly operation if only few bits are dirty.
> vnc_refresh_server_surface does almost the same.
> this patch optimizes both by utilizing the heavily optimized
> function find_next_bit to find the offset of the next dirty
> bit in the dirty bitmaps.
>
> Signed-off-by: Peter Lieven <pl@kamp.de>
Can you include performance data?
Regards,
Anthony Liguori
> ---
> ui/vnc.c | 146
++++++++++++++++++++++++++++++++++++++------------------------
> ui/vnc.h | 3 ++
> 2 files changed, 92 insertions(+), 57 deletions(-)
>
> diff --git a/ui/vnc.c b/ui/vnc.c
> index 67b1f75..edf33be 100644
> --- a/ui/vnc.c
> +++ b/ui/vnc.c
> @@ -572,6 +572,16 @@ void *vnc_server_fb_ptr(VncDisplay *vd, int x, int y)
> ptr += x * VNC_SERVER_FB_BYTES;
> return ptr;
> }
> +/* this sets only the visible pixels of a dirty bitmap */
> +#define VNC_SET_VISIBLE_PIXELS_DIRTY(bitmap, w, h) {\
> + int x, y;\
> + memset(bitmap, 0x00, sizeof(bitmap));\
> + for (y = 0; y < h; y++) {\
> + for (x = 0; x < w / VNC_DIRTY_PIXELS_PER_BIT; x++) {\
> + set_bit(x, bitmap[y]);\
> + } \
> + } \
> + }
>
> static void vnc_dpy_switch(DisplayChangeListener *dcl,
> DisplaySurface *surface)
> @@ -597,7 +607,9 @@ static void vnc_dpy_switch(DisplayChangeListener *dcl,
> qemu_pixman_image_unref(vd->guest.fb);
> vd->guest.fb = pixman_image_ref(surface->image);
> vd->guest.format = surface->format;
> - memset(vd->guest.dirty, 0xFF, sizeof(vd->guest.dirty));
> + VNC_SET_VISIBLE_PIXELS_DIRTY(vd->guest.dirty,
> + surface_width(vd->ds),
> + surface_height(vd->ds));
>
> QTAILQ_FOREACH(vs, &vd->clients, next) {
> vnc_colordepth(vs);
> @@ -605,7 +617,9 @@ static void vnc_dpy_switch(DisplayChangeListener *dcl,
> if (vs->vd->cursor) {
> vnc_cursor_define(vs);
> }
> - memset(vs->dirty, 0xFF, sizeof(vs->dirty));
> + VNC_SET_VISIBLE_PIXELS_DIRTY(vs->dirty,
> + surface_width(vd->ds),
> + surface_height(vd->ds));
> }
> }
>
> @@ -882,6 +896,14 @@ static int vnc_update_client_sync(VncState *vs, int
has_dirty)
> return ret;
> }
>
> +#define VNC_CLIENT_UPDATE_RECT() \
> + if (last_x != -1) {\
> + int h = find_and_clear_dirty_height(vs, y, last_x, x, height);\
> + n += vnc_job_add_rect(job,\
> + last_x * VNC_DIRTY_PIXELS_PER_BIT, y,\
> + (x - last_x) * VNC_DIRTY_PIXELS_PER_BIT, h);\
> + }
> +
> static int vnc_update_client(VncState *vs, int has_dirty)
> {
> if (vs->need_update && vs->csock != -1) {
> @@ -910,35 +932,32 @@ static int vnc_update_client(VncState *vs, int
has_dirty)
> width = MIN(pixman_image_get_width(vd->server),
vs->client_width);
> height = MIN(pixman_image_get_height(vd->server),
vs->client_height);
>
> - for (y = 0; y < height; y++) {
> + y = 0;
> + for (;;) {
> int x;
> int last_x = -1;
> - for (x = 0; x < width / VNC_DIRTY_PIXELS_PER_BIT; x++) {
> + unsigned long offset = find_next_bit((unsigned long *)
&vs->dirty,
> + height *
VNC_DIRTY_BITS_PER_LINE(vs),
> + y *
VNC_DIRTY_BITS_PER_LINE(vs));
> + if (offset == height * VNC_DIRTY_BITS_PER_LINE(vs)) {
> + /* no more dirty bits */
> + break;
> + }
> + y = offset / VNC_DIRTY_BITS_PER_LINE(vs);
> +
> + for (x = offset % VNC_DIRTY_BITS_PER_LINE(vs);
> + x < width / VNC_DIRTY_PIXELS_PER_BIT; x++) {
> if (test_and_clear_bit(x, vs->dirty[y])) {
> if (last_x == -1) {
> last_x = x;
> }
> } else {
> - if (last_x != -1) {
> - int h = find_and_clear_dirty_height(vs, y,
last_x, x,
> - height);
> -
> - n += vnc_job_add_rect(job,
> - last_x *
VNC_DIRTY_PIXELS_PER_BIT,
> - y,
> - (x - last_x) *
VNC_DIRTY_PIXELS_PER_BIT,
> - h);
> - }
> + VNC_CLIENT_UPDATE_RECT();
> last_x = -1;
> }
> }
> - if (last_x != -1) {
> - int h = find_and_clear_dirty_height(vs, y, last_x, x,
height);
> - n += vnc_job_add_rect(job, last_x *
VNC_DIRTY_PIXELS_PER_BIT,
> - y,
> - (x - last_x) *
VNC_DIRTY_PIXELS_PER_BIT,
> - h);
> - }
> + VNC_CLIENT_UPDATE_RECT();
> + y++;
> }
>
> vnc_job_push(job);
> @@ -2676,8 +2695,8 @@ static int vnc_refresh_server_surface(VncDisplay
*vd)
> int width = pixman_image_get_width(vd->guest.fb);
> int height = pixman_image_get_height(vd->guest.fb);
> int y;
> - uint8_t *guest_row;
> - uint8_t *server_row;
> + uint8_t *guest_row0 = NULL, *server_row0;
> + int guest_stride, server_stride;
> int cmp_bytes;
> VncState *vs;
> int has_dirty = 0;
> @@ -2702,44 +2721,57 @@ static int vnc_refresh_server_surface(VncDisplay
*vd)
> if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
> int width = pixman_image_get_width(vd->server);
> tmpbuf = qemu_pixman_linebuf_create(VNC_SERVER_FB_FORMAT, width);
> - }
> - guest_row = (uint8_t *)pixman_image_get_data(vd->guest.fb);
> - server_row = (uint8_t *)pixman_image_get_data(vd->server);
> - for (y = 0; y < height; y++) {
> - if (!bitmap_empty(vd->guest.dirty[y], VNC_DIRTY_BITS)) {
> - int x;
> - uint8_t *guest_ptr;
> - uint8_t *server_ptr;
> + } else {
> + guest_row0 = (uint8_t *)pixman_image_get_data(vd->guest.fb);
> + }
> + server_row0 = (uint8_t *)pixman_image_get_data(vd->server);
> + guest_stride = pixman_image_get_stride(vd->guest.fb);
> + server_stride = pixman_image_get_stride(vd->server);
> +
> + y = 0;
> + for (;;) {
> + int x;
> + uint8_t *guest_ptr, *server_ptr;
> + unsigned long offset = find_next_bit((unsigned long *)
&vd->guest.dirty,
> + height *
VNC_DIRTY_BITS_PER_LINE(&vd->guest),
> + y *
VNC_DIRTY_BITS_PER_LINE(&vd->guest));
> + if (offset == height * VNC_DIRTY_BITS_PER_LINE(&vd->guest)) {
> + /* no more dirty bits */
> + break;
> + }
> + y = offset / VNC_DIRTY_BITS_PER_LINE(&vd->guest);
>
> - if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
> - qemu_pixman_linebuf_fill(tmpbuf, vd->guest.fb, width, 0,
y);
> - guest_ptr = (uint8_t *)pixman_image_get_data(tmpbuf);
> - } else {
> - guest_ptr = guest_row;
> - }
> - server_ptr = server_row;
> + server_ptr = server_row0 + y * server_stride;
>
> - for (x = 0; x + VNC_DIRTY_PIXELS_PER_BIT - 1 < width;
> - x += VNC_DIRTY_PIXELS_PER_BIT, guest_ptr += cmp_bytes,
> - server_ptr += cmp_bytes) {
> - if (!test_and_clear_bit((x / VNC_DIRTY_PIXELS_PER_BIT),
> - vd->guest.dirty[y])) {
> - continue;
> - }
> - if (memcmp(server_ptr, guest_ptr, cmp_bytes) == 0) {
> - continue;
> - }
> - memcpy(server_ptr, guest_ptr, cmp_bytes);
> - if (!vd->non_adaptive)
> - vnc_rect_updated(vd, x, y, &tv);
> - QTAILQ_FOREACH(vs, &vd->clients, next) {
> - set_bit((x / VNC_DIRTY_PIXELS_PER_BIT),
vs->dirty[y]);
> - }
> - has_dirty++;
> + if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
> + qemu_pixman_linebuf_fill(tmpbuf, vd->guest.fb, width, 0, y);
> + guest_ptr = (uint8_t *)pixman_image_get_data(tmpbuf);
> + } else {
> + guest_ptr = guest_row0 + y * guest_stride;
> + }
> +
> + for (x = offset % VNC_DIRTY_BITS_PER_LINE(&vd->guest);
> + x + VNC_DIRTY_PIXELS_PER_BIT - 1 < width;
> + x += VNC_DIRTY_PIXELS_PER_BIT, guest_ptr += cmp_bytes,
> + server_ptr += cmp_bytes) {
> + if (!test_and_clear_bit((x / VNC_DIRTY_PIXELS_PER_BIT),
> + vd->guest.dirty[y])) {
> + continue;
> + }
> + if (memcmp(server_ptr, guest_ptr, cmp_bytes) == 0) {
> + continue;
> + }
> + memcpy(server_ptr, guest_ptr, cmp_bytes);
> + if (!vd->non_adaptive) {
> + vnc_rect_updated(vd, x, y, &tv);
> }
> + QTAILQ_FOREACH(vs, &vd->clients, next) {
> + set_bit((x / VNC_DIRTY_PIXELS_PER_BIT), vs->dirty[y]);
> + }
> + has_dirty++;
> }
> - guest_row += pixman_image_get_stride(vd->guest.fb);
> - server_row += pixman_image_get_stride(vd->server);
> +
> + y++;
> }
> qemu_pixman_image_unref(tmpbuf);
> return has_dirty;
> diff --git a/ui/vnc.h b/ui/vnc.h
> index 4a8f33c..82c8ea8 100644
> --- a/ui/vnc.h
> +++ b/ui/vnc.h
> @@ -88,6 +88,9 @@ typedef void VncSendHextileTile(VncState *vs,
> /* VNC_DIRTY_BITS is the number of bits in the dirty bitmap. */
> #define VNC_DIRTY_BITS (VNC_MAX_WIDTH / VNC_DIRTY_PIXELS_PER_BIT)
>
> +/* VNC_DIRTY_BITS_PER_LINE might be greater than VNC_DIRTY_BITS due to
alignment */
> +#define VNC_DIRTY_BITS_PER_LINE(x) (sizeof((x)->dirty) / VNC_MAX_HEIGHT
* BITS_PER_BYTE)
> +
> #define VNC_STAT_RECT 64
> #define VNC_STAT_COLS (VNC_MAX_WIDTH / VNC_STAT_RECT)
> #define VNC_STAT_ROWS (VNC_MAX_HEIGHT / VNC_STAT_RECT)
> --
> 1.7.9.5
>
>
[-- Attachment #2: Type: text/html, Size: 12894 bytes --]
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [Qemu-devel] [PATCH 2/3] ui/vnc: optimize dirty bitmap tracking
2013-11-18 16:27 ` Anthony Liguori
@ 2013-11-18 19:55 ` Peter Lieven
2013-11-19 13:48 ` Peter Lieven
1 sibling, 0 replies; 8+ messages in thread
From: Peter Lieven @ 2013-11-18 19:55 UTC (permalink / raw)
To: Anthony Liguori; +Cc: corentincj, Anthony Liguori, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 945 bytes --]
Am 18.11.2013 17:27, schrieb Anthony Liguori:
>
>
> On Nov 18, 2013 12:20 AM, "Peter Lieven" <pl@kamp.de <mailto:pl@kamp.de>> wrote:
> >
> > vnc_update_client currently scans the dirty bitmap of each client
> > bitwise which is a very costly operation if only few bits are dirty.
> > vnc_refresh_server_surface does almost the same.
> > this patch optimizes both by utilizing the heavily optimized
> > function find_next_bit to find the offset of the next dirty
> > bit in the dirty bitmaps.
> >
> > Signed-off-by: Peter Lieven <pl@kamp.de <mailto:pl@kamp.de>>
>
> Can you include performance data?
>
I hoped that the checking 32bits (pipelined) at once compared to
checking 32bits one-by-one would be convincing enough ;-)
Do you have a special test in mind? Otherwise I could try
to create an artificial test case with e.g. no bits dirty, all bits
dirty, only a few bits dirty (cursor update) and compare the
timing for both versions.
Peter
[-- Attachment #2: Type: text/html, Size: 1670 bytes --]
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [Qemu-devel] [PATCH 2/3] ui/vnc: optimize dirty bitmap tracking
2013-11-18 16:27 ` Anthony Liguori
2013-11-18 19:55 ` Peter Lieven
@ 2013-11-19 13:48 ` Peter Lieven
2013-11-19 14:06 ` Peter Lieven
1 sibling, 1 reply; 8+ messages in thread
From: Peter Lieven @ 2013-11-19 13:48 UTC (permalink / raw)
To: Anthony Liguori; +Cc: corentincj, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 8840 bytes --]
On 18.11.2013 17:27, Anthony Liguori wrote:
>
>
> On Nov 18, 2013 12:20 AM, "Peter Lieven" <pl@kamp.de <mailto:pl@kamp.de>> wrote:
> >
> > vnc_update_client currently scans the dirty bitmap of each client
> > bitwise which is a very costly operation if only few bits are dirty.
> > vnc_refresh_server_surface does almost the same.
> > this patch optimizes both by utilizing the heavily optimized
> > function find_next_bit to find the offset of the next dirty
> > bit in the dirty bitmaps.
> >
> > Signed-off-by: Peter Lieven <pl@kamp.de <mailto:pl@kamp.de>>
>
> Can you include performance data?
>
I made some aritificial analysis of vnc_update_client with the attached test code.
$ gcc -O2 -o vnc_perf vnc_perf.c
$ ./vnc_perf
All bits clean - vnc_update_client_new: 0.07 secs
vnc_update_client_old: 10.82 secs
All bits dirty - vnc_update_client_new: 9.81 secs
vnc_update_client_old: 20.00 secs
Few bits dirty - vnc_update_client_new: 0.08 secs
vnc_update_client_old: 10.62 secs
find_and_clear_dirty_height() is still very slow, but I will look at this separately.
Peter
---
#define ITERATIONS 16*1024
#define VNC_MAX_WIDTH 2560
#define VNC_MAX_HEIGHT 2048
#define VNC_DIRTY_PIXELS_PER_BIT 16
/* VNC_DIRTY_BITS is the number of bits in the dirty bitmap. */
#define VNC_DIRTY_BITS (VNC_MAX_WIDTH / VNC_DIRTY_PIXELS_PER_BIT)
/* VNC_DIRTY_BITS_PER_LINE might be greater than VNC_DIRTY_BITS due to alignment */
#define VNC_DIRTY_BITS_PER_LINE(x) (sizeof(x) / VNC_MAX_HEIGHT * BITS_PER_BYTE)
#define BITS_PER_BYTE 8
#define BITS_PER_LONG (sizeof (unsigned long) * BITS_PER_BYTE)
#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
#define BIT(nr) (1UL << (nr))
#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
#define BIT_WORD(nr) ((nr) / BITS_PER_LONG)
#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
#define DECLARE_BITMAP(name,bits) \
unsigned long name[BITS_TO_LONGS(bits)]
#define ctzl ctz64
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include "time.h"
static inline int ctz64(uint64_t val)
{
return val ? __builtin_ctzll(val) : 64;
}
DECLARE_BITMAP(dirty[VNC_MAX_HEIGHT], VNC_DIRTY_BITS);
unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
unsigned long offset)
{
const unsigned long *p = addr + BITOP_WORD(offset);
unsigned long result = offset & ~(BITS_PER_LONG-1);
unsigned long tmp;
if (offset >= size) {
return size;
}
size -= result;
offset %= BITS_PER_LONG;
if (offset) {
tmp = *(p++);
tmp &= (~0UL << offset);
if (size < BITS_PER_LONG) {
goto found_first;
}
if (tmp) {
goto found_middle;
}
size -= BITS_PER_LONG;
result += BITS_PER_LONG;
}
while (size >= 4*BITS_PER_LONG) {
unsigned long d1, d2, d3;
tmp = *p;
d1 = *(p+1);
d2 = *(p+2);
d3 = *(p+3);
if (tmp) {
goto found_middle;
}
if (d1 | d2 | d3) {
break;
}
p += 4;
result += 4*BITS_PER_LONG;
size -= 4*BITS_PER_LONG;
}
while (size >= BITS_PER_LONG) {
if ((tmp = *(p++))) {
goto found_middle;
}
result += BITS_PER_LONG;
size -= BITS_PER_LONG;
}
if (!size) {
return result;
}
tmp = *p;
found_first:
tmp &= (~0UL >> (BITS_PER_LONG - size));
if (tmp == 0UL) { /* Are any bits set? */
return result + size; /* Nope. */
}
found_middle:
return result + ctzl(tmp);
}
static inline int test_bit(int nr, const unsigned long *addr)
{
return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
}
static inline void clear_bit(int nr, unsigned long *addr)
{
unsigned long mask = BIT_MASK(nr);
unsigned long *p = addr + BIT_WORD(nr);
*p &= ~mask;
}
static inline void set_bit(int nr, unsigned long *addr)
{
unsigned long mask = BIT_MASK(nr);
unsigned long *p = addr + BIT_WORD(nr);
*p |= mask;
}
static inline int test_and_clear_bit(int nr, unsigned long *addr)
{
unsigned long mask = BIT_MASK(nr);
unsigned long *p = addr + BIT_WORD(nr);
unsigned long old = *p;
*p = old & ~mask;
return (old & mask) != 0;
}
static int find_and_clear_dirty_height(int y, int last_x, int x, int height)
{
int h;
for (h = 1; h < (height - y); h++) {
int tmp_x;
if (!test_bit(last_x, dirty[y + h])) {
break;
}
for (tmp_x = last_x; tmp_x < x; tmp_x++) {
clear_bit(tmp_x, dirty[y + h]);
}
}
return h;
}
#define VNC_CLIENT_UPDATE_RECT() \
if (last_x != -1) {\
int h = find_and_clear_dirty_height(y, last_x, x, height);\
}
void vnc_update_client_new() {
int width = VNC_MAX_WIDTH;
int height = VNC_MAX_HEIGHT;
int y = 0;
for (;;) {
int x;
int last_x = -1;
unsigned long offset = find_next_bit((unsigned long *) &dirty,
height * VNC_DIRTY_BITS_PER_LINE(dirty),
y * VNC_DIRTY_BITS_PER_LINE(dirty));
if (offset == height * VNC_DIRTY_BITS_PER_LINE(dirty)) {
/* no more dirty bits */
break;
}
y = offset / VNC_DIRTY_BITS_PER_LINE(dirty);
for (x = offset % VNC_DIRTY_BITS_PER_LINE(dirty);
x < width / VNC_DIRTY_PIXELS_PER_BIT; x++) {
if (test_and_clear_bit(x, dirty[y])) {
if (last_x == -1) {
last_x = x;
}
} else {
VNC_CLIENT_UPDATE_RECT();
last_x = -1;
}
}
VNC_CLIENT_UPDATE_RECT();
y++;
}}
void vnc_update_client_old() {
int width = VNC_MAX_WIDTH;
int height = VNC_MAX_HEIGHT;
int y;
for (y = 0; y < height; y++) {
int x;
int last_x = -1;
for (x = 0; x < width / 16; x++) {
if (test_and_clear_bit(x, dirty[y])) {
if (last_x == -1) {
last_x = x;
}
} else {
VNC_CLIENT_UPDATE_RECT();
last_x = -1;
}
}
VNC_CLIENT_UPDATE_RECT();
}
}
void main() {
int i;
clock_t start, end;
start = clock();
for (i = 0; i < ITERATIONS; i++) {
memset(dirty, 0x00, sizeof(dirty));
vnc_update_client_new();
}
end = clock();
printf("All bits clean - vnc_update_client_new: %.2f secs\n", (double) (end-start)/CLOCKS_PER_SEC);
start = clock();
for (i = 0; i < ITERATIONS; i++) {
memset(dirty, 0x00, sizeof(dirty));
vnc_update_client_old();
}
end = clock();
printf(" vnc_update_client_old: %.2f secs\n\n", (double) (end-start)/CLOCKS_PER_SEC);
start = clock();
for (i = 0; i < ITERATIONS; i++) {
memset(dirty, 0xff, sizeof(dirty));
vnc_update_client_new();
}
end = clock();
printf("All bits dirty - vnc_update_client_new: %.2f secs\n", (double) (end-start)/CLOCKS_PER_SEC);
start = clock();
for (i = 0; i < ITERATIONS; i++) {
memset(dirty, 0xff, sizeof(dirty));
vnc_update_client_old();
}
end = clock();
printf(" vnc_update_client_old: %.2f secs\n\n", (double) (end-start)/CLOCKS_PER_SEC);
start = clock();
for (i = 0; i < ITERATIONS; i++) {
int y;
memset(dirty, 0x00, sizeof(dirty));
for (y = VNC_MAX_HEIGHT/2-8; y < VNC_MAX_HEIGHT/2+8; y++) {
set_bit(VNC_DIRTY_BITS/2,dirty[y]);
}
vnc_update_client_new();
}
end = clock();
printf("Few bits dirty - vnc_update_client_new: %.2f secs\n", (double) (end-start)/CLOCKS_PER_SEC);
start = clock();
for (i = 0; i < ITERATIONS; i++) {
int y;
memset(dirty, 0x00, sizeof(dirty));
for (y = VNC_MAX_HEIGHT/2-8; y < VNC_MAX_HEIGHT/2+8; y++) {
set_bit(VNC_DIRTY_BITS/2,dirty[y]);
}
vnc_update_client_old();
}
end = clock();
printf(" vnc_update_client_old: %.2f secs\n\n", (double) (end-start)/CLOCKS_PER_SEC);
return;
}
[-- Attachment #2: Type: text/html, Size: 19625 bytes --]
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [Qemu-devel] [PATCH 2/3] ui/vnc: optimize dirty bitmap tracking
2013-11-19 13:48 ` Peter Lieven
@ 2013-11-19 14:06 ` Peter Lieven
0 siblings, 0 replies; 8+ messages in thread
From: Peter Lieven @ 2013-11-19 14:06 UTC (permalink / raw)
To: Anthony Liguori; +Cc: corentincj, qemu-devel
[-- Attachment #1: Type: text/plain, Size: 10296 bytes --]
On 19.11.2013 14:48, Peter Lieven wrote:
> On 18.11.2013 17:27, Anthony Liguori wrote:
>>
>>
>> On Nov 18, 2013 12:20 AM, "Peter Lieven" <pl@kamp.de <mailto:pl@kamp.de>> wrote:
>> >
>> > vnc_update_client currently scans the dirty bitmap of each client
>> > bitwise which is a very costly operation if only few bits are dirty.
>> > vnc_refresh_server_surface does almost the same.
>> > this patch optimizes both by utilizing the heavily optimized
>> > function find_next_bit to find the offset of the next dirty
>> > bit in the dirty bitmaps.
>> >
>> > Signed-off-by: Peter Lieven <pl@kamp.de <mailto:pl@kamp.de>>
>>
>> Can you include performance data?
>>
>
> I made some aritificial analysis of vnc_update_client with the attached test code.
>
> $ gcc -O2 -o vnc_perf vnc_perf.c
> $ ./vnc_perf
> All bits clean - vnc_update_client_new: 0.07 secs
> vnc_update_client_old: 10.82 secs
>
> All bits dirty - vnc_update_client_new: 9.81 secs
> vnc_update_client_old: 20.00 secs
>
> Few bits dirty - vnc_update_client_new: 0.08 secs
> vnc_update_client_old: 10.62 secs
>
> find_and_clear_dirty_height() is still very slow, but I will look at this separately.
quite easy, but great effect:
replacing:
for (tmp_x = last_x; tmp_x < x; tmp_x++) {
clear_bit(tmp_x, dirty[y + h]);
}
with:
bitmap_clear(dirty[y + h], last_x, x - last_x);
in find_and_clear_dirty_height(), yields the following performance ;-)
All bits clean - vnc_update_client_new: 0.07 secs
vnc_update_client_old: 10.65 secs
All bits dirty - vnc_update_client_new: 0.69 secs
vnc_update_client_old: 19.86 secs
Few bits dirty - vnc_update_client_new: 0.07 secs
vnc_update_client_old: 10.69 secs
> Peter
>
> ---
>
> #define ITERATIONS 16*1024
>
> #define VNC_MAX_WIDTH 2560
> #define VNC_MAX_HEIGHT 2048
>
> #define VNC_DIRTY_PIXELS_PER_BIT 16
>
> /* VNC_DIRTY_BITS is the number of bits in the dirty bitmap. */
> #define VNC_DIRTY_BITS (VNC_MAX_WIDTH / VNC_DIRTY_PIXELS_PER_BIT)
>
> /* VNC_DIRTY_BITS_PER_LINE might be greater than VNC_DIRTY_BITS due to alignment */
> #define VNC_DIRTY_BITS_PER_LINE(x) (sizeof(x) / VNC_MAX_HEIGHT * BITS_PER_BYTE)
>
> #define BITS_PER_BYTE 8
> #define BITS_PER_LONG (sizeof (unsigned long) * BITS_PER_BYTE)
>
> #define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
>
> #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
>
> #define BIT(nr) (1UL << (nr))
> #define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
> #define BIT_WORD(nr) ((nr) / BITS_PER_LONG)
> #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
>
> #define DECLARE_BITMAP(name,bits) \
> unsigned long name[BITS_TO_LONGS(bits)]
>
> #define ctzl ctz64
>
> #include <stdio.h>
> #include <stdint.h>
> #include <string.h>
> #include "time.h"
>
> static inline int ctz64(uint64_t val)
> {
> return val ? __builtin_ctzll(val) : 64;
> }
>
> DECLARE_BITMAP(dirty[VNC_MAX_HEIGHT], VNC_DIRTY_BITS);
>
> unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
> unsigned long offset)
> {
> const unsigned long *p = addr + BITOP_WORD(offset);
> unsigned long result = offset & ~(BITS_PER_LONG-1);
> unsigned long tmp;
>
> if (offset >= size) {
> return size;
> }
> size -= result;
> offset %= BITS_PER_LONG;
> if (offset) {
> tmp = *(p++);
> tmp &= (~0UL << offset);
> if (size < BITS_PER_LONG) {
> goto found_first;
> }
> if (tmp) {
> goto found_middle;
> }
> size -= BITS_PER_LONG;
> result += BITS_PER_LONG;
> }
> while (size >= 4*BITS_PER_LONG) {
> unsigned long d1, d2, d3;
> tmp = *p;
> d1 = *(p+1);
> d2 = *(p+2);
> d3 = *(p+3);
> if (tmp) {
> goto found_middle;
> }
> if (d1 | d2 | d3) {
> break;
> }
> p += 4;
> result += 4*BITS_PER_LONG;
> size -= 4*BITS_PER_LONG;
> }
> while (size >= BITS_PER_LONG) {
> if ((tmp = *(p++))) {
> goto found_middle;
> }
> result += BITS_PER_LONG;
> size -= BITS_PER_LONG;
> }
> if (!size) {
> return result;
> }
> tmp = *p;
>
> found_first:
> tmp &= (~0UL >> (BITS_PER_LONG - size));
> if (tmp == 0UL) { /* Are any bits set? */
> return result + size; /* Nope. */
> }
> found_middle:
> return result + ctzl(tmp);
> }
>
> static inline int test_bit(int nr, const unsigned long *addr)
> {
> return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
> }
>
> static inline void clear_bit(int nr, unsigned long *addr)
> {
> unsigned long mask = BIT_MASK(nr);
> unsigned long *p = addr + BIT_WORD(nr);
>
> *p &= ~mask;
> }
>
> static inline void set_bit(int nr, unsigned long *addr)
> {
> unsigned long mask = BIT_MASK(nr);
> unsigned long *p = addr + BIT_WORD(nr);
>
> *p |= mask;
> }
>
> static inline int test_and_clear_bit(int nr, unsigned long *addr)
> {
> unsigned long mask = BIT_MASK(nr);
> unsigned long *p = addr + BIT_WORD(nr);
> unsigned long old = *p;
>
> *p = old & ~mask;
> return (old & mask) != 0;
> }
>
> static int find_and_clear_dirty_height(int y, int last_x, int x, int height)
> {
> int h;
>
> for (h = 1; h < (height - y); h++) {
> int tmp_x;
> if (!test_bit(last_x, dirty[y + h])) {
> break;
> }
> for (tmp_x = last_x; tmp_x < x; tmp_x++) {
> clear_bit(tmp_x, dirty[y + h]);
> }
> }
>
> return h;
> }
>
> #define VNC_CLIENT_UPDATE_RECT() \
> if (last_x != -1) {\
> int h = find_and_clear_dirty_height(y, last_x, x, height);\
> }
>
> void vnc_update_client_new() {
> int width = VNC_MAX_WIDTH;
> int height = VNC_MAX_HEIGHT;
> int y = 0;
> for (;;) {
> int x;
> int last_x = -1;
> unsigned long offset = find_next_bit((unsigned long *) &dirty,
> height * VNC_DIRTY_BITS_PER_LINE(dirty),
> y * VNC_DIRTY_BITS_PER_LINE(dirty));
> if (offset == height * VNC_DIRTY_BITS_PER_LINE(dirty)) {
> /* no more dirty bits */
> break;
> }
> y = offset / VNC_DIRTY_BITS_PER_LINE(dirty);
>
> for (x = offset % VNC_DIRTY_BITS_PER_LINE(dirty);
> x < width / VNC_DIRTY_PIXELS_PER_BIT; x++) {
> if (test_and_clear_bit(x, dirty[y])) {
> if (last_x == -1) {
> last_x = x;
> }
> } else {
> VNC_CLIENT_UPDATE_RECT();
> last_x = -1;
> }
> }
> VNC_CLIENT_UPDATE_RECT();
> y++;
> }}
>
> void vnc_update_client_old() {
> int width = VNC_MAX_WIDTH;
> int height = VNC_MAX_HEIGHT;
> int y;
> for (y = 0; y < height; y++) {
> int x;
> int last_x = -1;
> for (x = 0; x < width / 16; x++) {
> if (test_and_clear_bit(x, dirty[y])) {
> if (last_x == -1) {
> last_x = x;
> }
> } else {
> VNC_CLIENT_UPDATE_RECT();
> last_x = -1;
> }
> }
> VNC_CLIENT_UPDATE_RECT();
> }
> }
>
> void main() {
> int i;
> clock_t start, end;
> start = clock();
> for (i = 0; i < ITERATIONS; i++) {
> memset(dirty, 0x00, sizeof(dirty));
> vnc_update_client_new();
> }
> end = clock();
> printf("All bits clean - vnc_update_client_new: %.2f secs\n", (double) (end-start)/CLOCKS_PER_SEC);
> start = clock();
> for (i = 0; i < ITERATIONS; i++) {
> memset(dirty, 0x00, sizeof(dirty));
> vnc_update_client_old();
> }
> end = clock();
> printf(" vnc_update_client_old: %.2f secs\n\n", (double) (end-start)/CLOCKS_PER_SEC);
> start = clock();
> for (i = 0; i < ITERATIONS; i++) {
> memset(dirty, 0xff, sizeof(dirty));
> vnc_update_client_new();
> }
> end = clock();
> printf("All bits dirty - vnc_update_client_new: %.2f secs\n", (double) (end-start)/CLOCKS_PER_SEC);
> start = clock();
> for (i = 0; i < ITERATIONS; i++) {
> memset(dirty, 0xff, sizeof(dirty));
> vnc_update_client_old();
> }
> end = clock();
> printf(" vnc_update_client_old: %.2f secs\n\n", (double) (end-start)/CLOCKS_PER_SEC);
> start = clock();
> for (i = 0; i < ITERATIONS; i++) {
> int y;
> memset(dirty, 0x00, sizeof(dirty));
> for (y = VNC_MAX_HEIGHT/2-8; y < VNC_MAX_HEIGHT/2+8; y++) {
> set_bit(VNC_DIRTY_BITS/2,dirty[y]);
> }
> vnc_update_client_new();
> }
> end = clock();
> printf("Few bits dirty - vnc_update_client_new: %.2f secs\n", (double) (end-start)/CLOCKS_PER_SEC);
> start = clock();
> for (i = 0; i < ITERATIONS; i++) {
> int y;
> memset(dirty, 0x00, sizeof(dirty));
> for (y = VNC_MAX_HEIGHT/2-8; y < VNC_MAX_HEIGHT/2+8; y++) {
> set_bit(VNC_DIRTY_BITS/2,dirty[y]);
> }
> vnc_update_client_old();
> }
> end = clock();
> printf(" vnc_update_client_old: %.2f secs\n\n", (double) (end-start)/CLOCKS_PER_SEC);
> return;
> }
>
--
Mit freundlichen Grüßen
Peter Lieven
...........................................................
KAMP Netzwerkdienste GmbH
Vestische Str. 89-91 | 46117 Oberhausen
Tel: +49 (0) 208.89 402-50 | Fax: +49 (0) 208.89 402-40
pl@kamp.de | http://www.kamp.de
Geschäftsführer: Heiner Lante | Michael Lante
Amtsgericht Duisburg | HRB Nr. 12154
USt-Id-Nr.: DE 120607556
...........................................................
[-- Attachment #2: Type: text/html, Size: 22504 bytes --]
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2013-11-19 14:06 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-11-18 8:17 [Qemu-devel] [PATCH 0/3] ui/vnc: update optimizations Peter Lieven
2013-11-18 8:17 ` [Qemu-devel] [PATCH 1/3] ui/vnc: introduce VNC_DIRTY_PIXELS_PER_BIT macro Peter Lieven
2013-11-18 8:17 ` [Qemu-devel] [PATCH 2/3] ui/vnc: optimize dirty bitmap tracking Peter Lieven
2013-11-18 16:27 ` Anthony Liguori
2013-11-18 19:55 ` Peter Lieven
2013-11-19 13:48 ` Peter Lieven
2013-11-19 14:06 ` Peter Lieven
2013-11-18 8:17 ` [Qemu-devel] [PATCH 3/3] ui/vnc: disable adaptive update calculations if not needed Peter Lieven
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.