All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Lieven <pl@kamp.de>
To: Anthony Liguori <anthony@codemonkey.ws>
Cc: corentincj@iksaif.net, qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH 2/3] ui/vnc: optimize dirty bitmap tracking
Date: Tue, 19 Nov 2013 15:06:37 +0100	[thread overview]
Message-ID: <528B706D.3080705@kamp.de> (raw)
In-Reply-To: <528B6C40.2030401@kamp.de>

[-- Attachment #1: Type: text/plain, Size: 10296 bytes --]

On 19.11.2013 14:48, Peter Lieven wrote:
> On 18.11.2013 17:27, Anthony Liguori wrote:
>>
>>
>> On Nov 18, 2013 12:20 AM, "Peter Lieven" <pl@kamp.de <mailto:pl@kamp.de>> wrote:
>> >
>> > vnc_update_client currently scans the dirty bitmap of each client
>> > bitwise which is a very costly operation if only few bits are dirty.
>> > vnc_refresh_server_surface does almost the same.
>> > this patch optimizes both by utilizing the heavily optimized
>> > function find_next_bit to find the offset of the next dirty
>> > bit in the dirty bitmaps.
>> >
>> > Signed-off-by: Peter Lieven <pl@kamp.de <mailto:pl@kamp.de>>
>>
>> Can you include performance data?
>>
>
> I made some aritificial analysis of vnc_update_client with the attached test code.
>
> $ gcc -O2  -o vnc_perf vnc_perf.c
> $ ./vnc_perf
> All bits clean - vnc_update_client_new: 0.07 secs
>                  vnc_update_client_old: 10.82 secs
>
> All bits dirty - vnc_update_client_new: 9.81 secs
>                  vnc_update_client_old: 20.00 secs
>
> Few bits dirty - vnc_update_client_new: 0.08 secs
>                  vnc_update_client_old: 10.62 secs
>
> find_and_clear_dirty_height() is still very slow, but I will look at this separately.
quite easy, but great effect:

replacing:

          for (tmp_x = last_x; tmp_x < x; tmp_x++) {
              clear_bit(tmp_x, dirty[y + h]);
          }

with:

         bitmap_clear(dirty[y + h], last_x, x - last_x);

in find_and_clear_dirty_height(), yields the following performance ;-)

All bits clean - vnc_update_client_new: 0.07 secs
                  vnc_update_client_old: 10.65 secs

All bits dirty - vnc_update_client_new: 0.69 secs
                  vnc_update_client_old: 19.86 secs

Few bits dirty - vnc_update_client_new: 0.07 secs
                  vnc_update_client_old: 10.69 secs


> Peter
>
> ---
>
> #define ITERATIONS 16*1024
>
> #define VNC_MAX_WIDTH 2560
> #define VNC_MAX_HEIGHT 2048
>
> #define VNC_DIRTY_PIXELS_PER_BIT 16
>
> /* VNC_DIRTY_BITS is the number of bits in the dirty bitmap. */
> #define VNC_DIRTY_BITS (VNC_MAX_WIDTH / VNC_DIRTY_PIXELS_PER_BIT)
>
> /* VNC_DIRTY_BITS_PER_LINE might be greater than VNC_DIRTY_BITS due to alignment */
> #define VNC_DIRTY_BITS_PER_LINE(x) (sizeof(x) / VNC_MAX_HEIGHT * BITS_PER_BYTE)
>
> #define BITS_PER_BYTE           8
> #define BITS_PER_LONG           (sizeof (unsigned long) * BITS_PER_BYTE)
>
> #define BITOP_WORD(nr)        ((nr) / BITS_PER_LONG)
>
> #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
>
> #define BIT(nr)            (1UL << (nr))
> #define BIT_MASK(nr)        (1UL << ((nr) % BITS_PER_LONG))
> #define BIT_WORD(nr)        ((nr) / BITS_PER_LONG)
> #define BITS_TO_LONGS(nr)    DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
>
> #define DECLARE_BITMAP(name,bits)                  \
>     unsigned long name[BITS_TO_LONGS(bits)]
>
> #define ctzl ctz64
>
> #include <stdio.h>
> #include <stdint.h>
> #include <string.h>
> #include "time.h"
>
> static inline int ctz64(uint64_t val)
> {
>     return val ? __builtin_ctzll(val) : 64;
> }
>
> DECLARE_BITMAP(dirty[VNC_MAX_HEIGHT], VNC_DIRTY_BITS);
>
> unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
>                 unsigned long offset)
> {
>     const unsigned long *p = addr + BITOP_WORD(offset);
>     unsigned long result = offset & ~(BITS_PER_LONG-1);
>     unsigned long tmp;
>
>     if (offset >= size) {
>         return size;
>     }
>     size -= result;
>     offset %= BITS_PER_LONG;
>     if (offset) {
>         tmp = *(p++);
>         tmp &= (~0UL << offset);
>         if (size < BITS_PER_LONG) {
>             goto found_first;
>         }
>         if (tmp) {
>             goto found_middle;
>         }
>         size -= BITS_PER_LONG;
>         result += BITS_PER_LONG;
>     }
>     while (size >= 4*BITS_PER_LONG) {
>         unsigned long d1, d2, d3;
>         tmp = *p;
>         d1 = *(p+1);
>         d2 = *(p+2);
>         d3 = *(p+3);
>         if (tmp) {
>             goto found_middle;
>         }
>         if (d1 | d2 | d3) {
>             break;
>         }
>         p += 4;
>         result += 4*BITS_PER_LONG;
>         size -= 4*BITS_PER_LONG;
>     }
>     while (size >= BITS_PER_LONG) {
>         if ((tmp = *(p++))) {
>             goto found_middle;
>         }
>         result += BITS_PER_LONG;
>         size -= BITS_PER_LONG;
>     }
>     if (!size) {
>         return result;
>     }
>     tmp = *p;
>
> found_first:
>     tmp &= (~0UL >> (BITS_PER_LONG - size));
>     if (tmp == 0UL) {        /* Are any bits set? */
>         return result + size;    /* Nope. */
>     }
> found_middle:
>     return result + ctzl(tmp);
> }
>
> static inline int test_bit(int nr, const unsigned long *addr)
> {
>     return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
> }
>
> static inline void clear_bit(int nr, unsigned long *addr)
> {
>     unsigned long mask = BIT_MASK(nr);
>         unsigned long *p = addr + BIT_WORD(nr);
>
>     *p &= ~mask;
> }
>
> static inline void set_bit(int nr, unsigned long *addr)
> {
>     unsigned long mask = BIT_MASK(nr);
>         unsigned long *p = addr + BIT_WORD(nr);
>
>     *p  |= mask;
> }
>
> static inline int test_and_clear_bit(int nr, unsigned long *addr)
> {
>     unsigned long mask = BIT_MASK(nr);
>         unsigned long *p = addr + BIT_WORD(nr);
>     unsigned long old = *p;
>
>     *p = old & ~mask;
>     return (old & mask) != 0;
> }
>
> static int find_and_clear_dirty_height(int y, int last_x, int x, int height)
> {
>     int h;
>
>     for (h = 1; h < (height - y); h++) {
>         int tmp_x;
>         if (!test_bit(last_x, dirty[y + h])) {
>             break;
>         }
>         for (tmp_x = last_x; tmp_x < x; tmp_x++) {
>             clear_bit(tmp_x, dirty[y + h]);
>         }
>     }
>
>     return h;
> }
>
> #define VNC_CLIENT_UPDATE_RECT() \
>     if (last_x != -1) {\
>         int h = find_and_clear_dirty_height(y, last_x, x, height);\
>     }
>
> void vnc_update_client_new() {
>         int width = VNC_MAX_WIDTH;
>         int height = VNC_MAX_HEIGHT;
>         int y = 0;
>         for (;;) {
>             int x;
>             int last_x = -1;
>             unsigned long offset = find_next_bit((unsigned long *) &dirty,
>                                                  height * VNC_DIRTY_BITS_PER_LINE(dirty),
>                                                  y * VNC_DIRTY_BITS_PER_LINE(dirty));
>             if (offset == height * VNC_DIRTY_BITS_PER_LINE(dirty)) {
>                 /* no more dirty bits */
>                 break;
>             }
>             y = offset / VNC_DIRTY_BITS_PER_LINE(dirty);
>
>             for (x = offset % VNC_DIRTY_BITS_PER_LINE(dirty);
>                  x < width / VNC_DIRTY_PIXELS_PER_BIT; x++) {
>                 if (test_and_clear_bit(x, dirty[y])) {
>                     if (last_x == -1) {
>                         last_x = x;
>                     }
>                 } else {
>                     VNC_CLIENT_UPDATE_RECT();
>                     last_x = -1;
>                 }
>             }
>             VNC_CLIENT_UPDATE_RECT();
>             y++;
>         }}
>
> void vnc_update_client_old() {
>         int width = VNC_MAX_WIDTH;
>         int height = VNC_MAX_HEIGHT;
>         int y;
>         for (y = 0; y < height; y++) {
>             int x;
>             int last_x = -1;
>             for (x = 0; x < width / 16; x++) {
>                 if (test_and_clear_bit(x, dirty[y])) {
>                     if (last_x == -1) {
>                         last_x = x;
>                     }
>                 } else {
>                     VNC_CLIENT_UPDATE_RECT();
>                     last_x = -1;
>                 }
>             }
>             VNC_CLIENT_UPDATE_RECT();
>         }
> }
>
> void main() {
>     int i;
>     clock_t start, end;
>     start = clock();
>     for (i = 0; i < ITERATIONS; i++) {
>          memset(dirty, 0x00, sizeof(dirty));
>          vnc_update_client_new();
>     }
>     end = clock();
>     printf("All bits clean - vnc_update_client_new: %.2f secs\n", (double) (end-start)/CLOCKS_PER_SEC);
>     start = clock();
>     for (i = 0; i < ITERATIONS; i++) {
>          memset(dirty, 0x00, sizeof(dirty));
>          vnc_update_client_old();
>     }
>     end = clock();
>     printf("                 vnc_update_client_old: %.2f secs\n\n", (double) (end-start)/CLOCKS_PER_SEC);
>     start = clock();
>     for (i = 0; i < ITERATIONS; i++) {
>          memset(dirty, 0xff, sizeof(dirty));
>          vnc_update_client_new();
>     }
>     end = clock();
>     printf("All bits dirty - vnc_update_client_new: %.2f secs\n", (double) (end-start)/CLOCKS_PER_SEC);
>     start = clock();
>     for (i = 0; i < ITERATIONS; i++) {
>          memset(dirty, 0xff, sizeof(dirty));
>          vnc_update_client_old();
>     }
>     end = clock();
>     printf("                 vnc_update_client_old: %.2f secs\n\n", (double) (end-start)/CLOCKS_PER_SEC);
>     start = clock();
>     for (i = 0; i < ITERATIONS; i++) {
>          int y;
>          memset(dirty, 0x00, sizeof(dirty));
>          for (y = VNC_MAX_HEIGHT/2-8; y < VNC_MAX_HEIGHT/2+8; y++) {
>              set_bit(VNC_DIRTY_BITS/2,dirty[y]);
>          }
>          vnc_update_client_new();
>     }
>     end = clock();
>     printf("Few bits dirty - vnc_update_client_new: %.2f secs\n", (double) (end-start)/CLOCKS_PER_SEC);
>     start = clock();
>     for (i = 0; i < ITERATIONS; i++) {
>          int y;
>          memset(dirty, 0x00, sizeof(dirty));
>          for (y = VNC_MAX_HEIGHT/2-8; y < VNC_MAX_HEIGHT/2+8; y++) {
>              set_bit(VNC_DIRTY_BITS/2,dirty[y]);
>          }
>          vnc_update_client_old();
>     }
>     end = clock();
>     printf("                 vnc_update_client_old: %.2f secs\n\n", (double) (end-start)/CLOCKS_PER_SEC);
>     return;
> }
>


-- 

Mit freundlichen Grüßen

Peter Lieven

...........................................................

   KAMP Netzwerkdienste GmbH
   Vestische Str. 89-91 | 46117 Oberhausen
   Tel: +49 (0) 208.89 402-50 | Fax: +49 (0) 208.89 402-40
   pl@kamp.de | http://www.kamp.de

   Geschäftsführer: Heiner Lante | Michael Lante
   Amtsgericht Duisburg | HRB Nr. 12154
   USt-Id-Nr.: DE 120607556

...........................................................


[-- Attachment #2: Type: text/html, Size: 22504 bytes --]

  reply	other threads:[~2013-11-19 14:06 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-11-18  8:17 [Qemu-devel] [PATCH 0/3] ui/vnc: update optimizations Peter Lieven
2013-11-18  8:17 ` [Qemu-devel] [PATCH 1/3] ui/vnc: introduce VNC_DIRTY_PIXELS_PER_BIT macro Peter Lieven
2013-11-18  8:17 ` [Qemu-devel] [PATCH 2/3] ui/vnc: optimize dirty bitmap tracking Peter Lieven
2013-11-18 16:27   ` Anthony Liguori
2013-11-18 19:55     ` Peter Lieven
2013-11-19 13:48     ` Peter Lieven
2013-11-19 14:06       ` Peter Lieven [this message]
2013-11-18  8:17 ` [Qemu-devel] [PATCH 3/3] ui/vnc: disable adaptive update calculations if not needed Peter Lieven

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=528B706D.3080705@kamp.de \
    --to=pl@kamp.de \
    --cc=anthony@codemonkey.ws \
    --cc=corentincj@iksaif.net \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.