Re: [Qemu-devel] [PATCH] Convert ram_list to RCU DQ V3

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Paolo Bonzini <pbonzini@redhat.com>
To: Mike Day <ncmike@ncultra.org>
Cc: Paul Mckenney <paulmck@linux.vnet.ibm.com>,
	Mathew Desnoyers <mathieu.desnoyers@efficios.com>,
	qemu-devel@nongnu.org, Anthony Liguori <anthony@codemonkey.ws>
Subject: Re: [Qemu-devel] [PATCH] Convert ram_list to RCU DQ V3
Date: Wed, 04 Sep 2013 15:49:54 +0200	[thread overview]
Message-ID: <52273A82.7010006@redhat.com> (raw)
In-Reply-To: <1378301382-18677-1-git-send-email-ncmike@ncultra.org>

Il 04/09/2013 15:29, Mike Day ha scritto:
> @@ -717,19 +731,21 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
>      qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>      total_sent += 8;
>      bytes_transferred += total_sent;
> -
>      return total_sent;
>  }
>  
>  static int ram_save_complete(QEMUFile *f, void *opaque)
>  {
> -    qemu_mutex_lock_ramlist();
> +
> +    qemu_mutex_lock_iothread();
>      migration_bitmap_sync();
> +    qemu_mutex_unlock_iothread();

ram_save_complete runs with the iothread taken.  Since the lock is not
recursive, this will cause a deadlock.

To test migration, I suggest you use virt-test.  Install
autotest-framework if you run Fedora, clone
https://github.com/autotest/virt-test.git and do "./run -t qemu
--qemu-bin=/path/to/qemu-system-x86_64".

>      ram_control_before_iterate(f, RAM_CONTROL_FINISH);
>  
>      /* try transferring iterative blocks of memory */
>  
> +    rcu_read_lock();
>      /* flush all remaining blocks regardless of rate limiting */
>      while (true) {
>          int bytes_sent;
> @@ -741,11 +757,10 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
>          }
>          bytes_transferred += bytes_sent;
>      }
> -
> +    rcu_read_unlock();
>      ram_control_after_iterate(f, RAM_CONTROL_FINISH);
>      migration_end();
>  
> -    qemu_mutex_unlock_ramlist();
>      qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>  
>      return 0;
> diff --git a/exec.c b/exec.c
> index 5eebcc1..52b282a 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -46,7 +46,7 @@
>  #endif
>  #include "exec/cpu-all.h"
>  #include "qemu/tls.h"
> -
> +#include "qemu/rcu_queue.h"
>  #include "exec/cputlb.h"
>  #include "translate-all.h"
>  
> @@ -57,7 +57,10 @@
>  #if !defined(CONFIG_USER_ONLY)
>  static int in_migration;
>  
> -RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
> +/* ram_list is enabled for RCU access - writes should be protected
> + * by the iothread lock or an equivalent mutex.
> + */

/* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
 * are protected by a lock, currently the iothread lock.
 */

> +RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
>  
>  static MemoryRegion *system_memory;
>  static MemoryRegion *system_io;
> @@ -1123,16 +1138,18 @@ static int memory_try_enable_merging(void *addr, size_t len)
>      return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
>  }
>  
> +/* Called with the iothread lock being held, so we can
> + * foregoe protecting the ram_list. When that changes,

s/foregoe/forgo/ ...

> + * acquire the iothread mutex before writing the list below.

... but I don't think the comment is accurate.  Being a very coarse
lock, the iothread mutex will almost always be taken by the caller of a
function that requires it, for two reasons:

1) a function will almost always have callers that hold the lock and
callers that don't

2) the iothread mutex must always be taken _outside_ other locks to
prevent ABBA deadlock.

So I would just write /* Called with the iothread lock held */ in the
write sides (in fact the current standard is to document stuff that is
called _without_ the lock held! :)).  Reads that do not take the
rcu_read_lock() of course need more explanation, which is what you did
for find_ram_offset() and qemu_ram_set_idstr().

> + */
>  ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
>                                     MemoryRegion *mr)
>  {
> -    RAMBlock *block, *new_block;
> +    RAMBlock *block, *new_block, *last_block = 0;
>  
>      size = TARGET_PAGE_ALIGN(size);
>      new_block = g_malloc0(sizeof(*new_block));
>  
> -    /* This assumes the iothread lock is taken here too.  */
> -    qemu_mutex_lock_ramlist();
>      new_block->mr = mr;
>      new_block->offset = find_ram_offset(size);
>      if (host) {
> @@ -1200,33 +1220,40 @@ ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
>      return qemu_ram_alloc_from_ptr(size, NULL, mr);
>  }
>  
> +static void reclaim_ramblock(struct rcu_head *prcu)
> +{
> +    RAMBlock *block = container_of(prcu, struct RAMBlock, rcu);
> +    g_free(block);
> +}
> +
> +
>  void qemu_ram_free_from_ptr(ram_addr_t addr)
>  {
>      RAMBlock *block;
>  
> -    /* This assumes the iothread lock is taken here too.  */
> -    qemu_mutex_lock_ramlist();
> -    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> +    /* This assumes the iothread lock is taken here too. */
> +    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
>          if (addr == block->offset) {
> -            QTAILQ_REMOVE(&ram_list.blocks, block, next);
> +            QLIST_REMOVE_RCU(block, next);
>              ram_list.mru_block = NULL;
>              ram_list.version++;
> -            g_free(block);
> +            call_rcu1(&block->rcu, reclaim_ramblock);


You can use call_rcu too:

    call_rcu(block, reclaim_ramblock, rcu);

or possibly even this:

    call_rcu(block, g_free, rcu);

This removes the container_of from reclaim_ramblock, which can take a
RAMBlock * directly.

>              break;
>          }
>      }
> -    qemu_mutex_unlock_ramlist();
>  }
>  
>  void qemu_ram_free(ram_addr_t addr)
>  {
>      RAMBlock *block;
>  
> -    /* This assumes the iothread lock is taken here too.  */
> -    qemu_mutex_lock_ramlist();
> -    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> +    /* This assumes the iothread lock is taken here too.
> +     * if that changes, accesses to ram_list need to be protected
> +     * by a mutex (writes) or an rcu read lock (reads)
> +     */
> +    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
>          if (addr == block->offset) {
> -            QTAILQ_REMOVE(&ram_list.blocks, block, next);
> +            QLIST_REMOVE_RCU(block, next);
>              ram_list.mru_block = NULL;
>              ram_list.version++;
>              if (block->flags & RAM_PREALLOC_MASK) {
> @@ -1249,12 +1276,10 @@ void qemu_ram_free(ram_addr_t addr)
>                      qemu_anon_ram_free(block->host, block->length);
>                  }
>              }
> -            g_free(block);
> +            call_rcu1(&block->rcu, reclaim_ramblock);

Same here.

>              break;
>          }
>      }
> -    qemu_mutex_unlock_ramlist();
> -
>  }
>  
>  #ifndef _WIN32
> diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
> index e088089..53aa70d 100644
> --- a/include/exec/cpu-all.h
> +++ b/include/exec/cpu-all.h
> @@ -24,6 +24,7 @@
>  #include "qemu/thread.h"
>  #include "qom/cpu.h"
>  #include "qemu/tls.h"
> +#include "qemu/rcu.h"
>  
>  /* some important defines:
>   *
> @@ -448,6 +449,7 @@ extern ram_addr_t ram_size;
>  #define RAM_PREALLOC_MASK   (1 << 0)
>  
>  typedef struct RAMBlock {
> +    struct rcu_head rcu;
>      struct MemoryRegion *mr;
>      uint8_t *host;
>      ram_addr_t offset;
> @@ -457,7 +459,7 @@ typedef struct RAMBlock {
>      /* Reads can take either the iothread or the ramlist lock.
>       * Writes must take both locks.
>       */

Not true anymore---please remove the ramlist lock altogether.

> -    QTAILQ_ENTRY(RAMBlock) next;
> +    QLIST_ENTRY(RAMBlock) next;
>  #if defined(__linux__) && !defined(TARGET_S390X)
>      int fd;
>  #endif
> @@ -469,7 +471,7 @@ typedef struct RAMList {
>      uint8_t *phys_dirty;
>      RAMBlock *mru_block;
>      /* Protected by the ramlist lock.  */

Not true anymore.

Almost there!  I'm confident that v4 will be fine!

Paolo

> -    QTAILQ_HEAD(, RAMBlock) blocks;
> +    QLIST_HEAD(, RAMBlock) blocks;
>      uint32_t version;
>  } RAMList;
>  extern RAMList ram_list;
> diff --git a/include/qemu/rcu_queue.h b/include/qemu/rcu_queue.h
> index e2b8ba5..d159850 100644
> --- a/include/qemu/rcu_queue.h
> +++ b/include/qemu/rcu_queue.h
> @@ -37,6 +37,14 @@
>  extern "C" {
>  #endif
>  
> +
> +/*
> + * List access methods.
> + */
> +#define QLIST_EMPTY_RCU(head) (atomic_rcu_read(&(head)->lh_first) == NULL)
> +#define QLIST_FIRST_RCU(head) (atomic_rcu_read(&(head)->lh_first))
> +#define QLIST_NEXT_RCU(elm, field) (atomic_rcu_read(&(elm)->field.le_next))
> +
>  /*
>   * List functions.
>   */
>

     prev parent reply	other threads:[~2013-09-04 13:50 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-09-04 13:29 [Qemu-devel] [PATCH] Convert ram_list to RCU DQ V3 Mike Day
2013-09-04 13:49 ` Paolo Bonzini [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=52273A82.7010006@redhat.com \
    --to=pbonzini@redhat.com \
    --cc=anthony@codemonkey.ws \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=ncmike@ncultra.org \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.