From: Paolo Bonzini <pbonzini@redhat.com>
To: Liu Ping Fan <qemulist@gmail.com>
Cc: Vasilis Liaskovitis <vasilis.liaskovitis@profitbricks.com>,
Stefan Hajnoczi <stefanha@gmail.com>,
qemu-devel@nongnu.org, Anthony Liguori <anthony@codemonkey.ws>,
Avi Kivity <avi@redhat.com>
Subject: Re: [Qemu-devel] [RFC v1 2/3] ramlist: apply fine grain lock for ram_list
Date: Sat, 10 Nov 2012 02:54:54 +0100 [thread overview]
Message-ID: <509DB3EE.4060409@redhat.com> (raw)
In-Reply-To: <1352430870-10129-3-git-send-email-qemulist@gmail.com>
Il 09/11/2012 04:14, Liu Ping Fan ha scritto:
> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>
> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> ---
> cpu-all.h | 1 +
> exec.c | 46 +++++++++++++++++++++++++++++++++++++++-------
> 2 files changed, 40 insertions(+), 7 deletions(-)
The problem here is that the ram_list is a pretty critical bit for TCG.
The migration thread series has patches that split the list in two: a
MRU-accessed list that uses the BQL, and another that uses a separate lock.
address_space_map could use the latter list. In order to improve
performance further, we could sort the list from the biggest to the
smallest region, like KVM does in the kernel.
Paolo
> diff --git a/cpu-all.h b/cpu-all.h
> index 6aa7e58..d3ead99 100644
> --- a/cpu-all.h
> +++ b/cpu-all.h
> @@ -498,6 +498,7 @@ typedef struct RAMBlock {
> } RAMBlock;
>
> typedef struct RAMList {
> + QemuMutex lock;
> uint8_t *phys_dirty;
> QLIST_HEAD(, RAMBlock) blocks;
> } RAMList;
> diff --git a/exec.c b/exec.c
> index fe84718..e5f1c0f 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -2444,6 +2444,7 @@ static ram_addr_t find_ram_offset(ram_addr_t size)
> if (QLIST_EMPTY(&ram_list.blocks))
> return 0;
>
> + qemu_mutex_lock(&ram_list.lock);
> QLIST_FOREACH(block, &ram_list.blocks, next) {
> ram_addr_t end, next = RAM_ADDR_MAX;
>
> @@ -2459,6 +2460,7 @@ static ram_addr_t find_ram_offset(ram_addr_t size)
> mingap = next - end;
> }
> }
> + qemu_mutex_unlock(&ram_list.lock);
>
> if (offset == RAM_ADDR_MAX) {
> fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
> @@ -2474,8 +2476,10 @@ ram_addr_t last_ram_offset(void)
> RAMBlock *block;
> ram_addr_t last = 0;
>
> + qemu_mutex_lock(&ram_list.lock);
> QLIST_FOREACH(block, &ram_list.blocks, next)
> last = MAX(last, block->offset + block->length);
> + qemu_mutex_unlock(&ram_list.lock);
>
> return last;
> }
> @@ -2503,6 +2507,7 @@ void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
> RAMBlock *new_block, *block;
>
> new_block = NULL;
> + qemu_mutex_lock(&ram_list.lock);
> QLIST_FOREACH(block, &ram_list.blocks, next) {
> if (block->offset == addr) {
> new_block = block;
> @@ -2528,6 +2533,7 @@ void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
> abort();
> }
> }
> + qemu_mutex_unlock(&ram_list.lock);
> }
>
> static int memory_try_enable_merging(void *addr, size_t len)
> @@ -2582,12 +2588,6 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
> }
> new_block->length = size;
>
> - QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
> -
> - ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
> - last_ram_offset() >> TARGET_PAGE_BITS);
> - memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
> - 0, size >> TARGET_PAGE_BITS);
> cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
>
> qemu_ram_setup_dump(new_block->host, size);
> @@ -2596,6 +2596,15 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
> if (kvm_enabled())
> kvm_setup_guest_memory(new_block->host, size);
>
> + qemu_mutex_lock(&ram_list.lock);
> + QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
> +
> + ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
> + last_ram_offset() >> TARGET_PAGE_BITS);
> + memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
> + 0, size >> TARGET_PAGE_BITS);
> + qemu_mutex_unlock(&ram_list.lock);
> +
> return new_block->offset;
> }
>
> @@ -2608,19 +2617,23 @@ void qemu_ram_free_from_ptr(ram_addr_t addr)
> {
> RAMBlock *block;
>
> + qemu_mutex_lock(&ram_list.lock);
> QLIST_FOREACH(block, &ram_list.blocks, next) {
> if (addr == block->offset) {
> QLIST_REMOVE(block, next);
> g_free(block);
> + qemu_mutex_unlock(&ram_list.lock);
> return;
> }
> }
> + qemu_mutex_unlock(&ram_list.lock);
> }
>
> void qemu_ram_free(ram_addr_t addr)
> {
> RAMBlock *block;
>
> + qemu_mutex_lock(&ram_list.lock);
> QLIST_FOREACH(block, &ram_list.blocks, next) {
> if (addr == block->offset) {
> QLIST_REMOVE(block, next);
> @@ -2649,10 +2662,11 @@ void qemu_ram_free(ram_addr_t addr)
> #endif
> }
> g_free(block);
> + qemu_mutex_unlock(&ram_list.lock);
> return;
> }
> }
> -
> + qemu_mutex_unlock(&ram_list.lock);
> }
>
> #ifndef _WIN32
> @@ -2663,6 +2677,7 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
> int flags;
> void *area, *vaddr;
>
> + qemu_mutex_lock(&ram_list.lock);
> QLIST_FOREACH(block, &ram_list.blocks, next) {
> offset = addr - block->offset;
> if (offset < block->length) {
> @@ -2711,9 +2726,11 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
> memory_try_enable_merging(vaddr, length);
> qemu_ram_setup_dump(vaddr, length);
> }
> + qemu_mutex_unlock(&ram_list.lock);
> return;
> }
> }
> + qemu_mutex_unlock(&ram_list.lock);
> }
> #endif /* !_WIN32 */
>
> @@ -2729,6 +2746,7 @@ void *qemu_get_ram_ptr(ram_addr_t addr)
> {
> RAMBlock *block;
>
> + qemu_mutex_lock(&ram_list.lock);
> QLIST_FOREACH(block, &ram_list.blocks, next) {
> if (addr - block->offset < block->length) {
> /* Move this entry to to start of the list. */
> @@ -2742,15 +2760,18 @@ void *qemu_get_ram_ptr(ram_addr_t addr)
> * In that case just map until the end of the page.
> */
> if (block->offset == 0) {
> + qemu_mutex_unlock(&ram_list.lock);
> return xen_map_cache(addr, 0, 0);
> } else if (block->host == NULL) {
> block->host =
> xen_map_cache(block->offset, block->length, 1);
> }
> }
> + qemu_mutex_unlock(&ram_list.lock);
> return block->host + (addr - block->offset);
> }
> }
> + qemu_mutex_unlock(&ram_list.lock);
>
> fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
> abort();
> @@ -2765,6 +2786,7 @@ void *qemu_safe_ram_ptr(ram_addr_t addr)
> {
> RAMBlock *block;
>
> + qemu_mutex_lock(&ram_list.lock);
> QLIST_FOREACH(block, &ram_list.blocks, next) {
> if (addr - block->offset < block->length) {
> if (xen_enabled()) {
> @@ -2773,15 +2795,18 @@ void *qemu_safe_ram_ptr(ram_addr_t addr)
> * In that case just map until the end of the page.
> */
> if (block->offset == 0) {
> + qemu_mutex_unlock(&ram_list.lock);
> return xen_map_cache(addr, 0, 0);
> } else if (block->host == NULL) {
> block->host =
> xen_map_cache(block->offset, block->length, 1);
> }
> }
> + qemu_mutex_unlock(&ram_list.lock);
> return block->host + (addr - block->offset);
> }
> }
> + qemu_mutex_unlock(&ram_list.lock);
>
> fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
> abort();
> @@ -2801,13 +2826,16 @@ void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
> } else {
> RAMBlock *block;
>
> + qemu_mutex_lock(&ram_list.lock);
> QLIST_FOREACH(block, &ram_list.blocks, next) {
> if (addr - block->offset < block->length) {
> if (addr - block->offset + *size > block->length)
> *size = block->length - addr + block->offset;
> + qemu_mutex_lock(&ram_list.lock);
> return block->host + (addr - block->offset);
> }
> }
> + qemu_mutex_unlock(&ram_list.lock);
>
> fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
> abort();
> @@ -2829,6 +2857,7 @@ int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
> return 0;
> }
>
> + qemu_mutex_lock(&ram_list.lock);
> QLIST_FOREACH(block, &ram_list.blocks, next) {
> /* This case append when the block is not mapped. */
> if (block->host == NULL) {
> @@ -2836,9 +2865,11 @@ int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
> }
> if (host - block->host < block->length) {
> *ram_addr = block->offset + (host - block->host);
> + qemu_mutex_unlock(&ram_list.lock);
> return 0;
> }
> }
> + qemu_mutex_unlock(&ram_list.lock);
>
> return -1;
> }
> @@ -3318,6 +3349,7 @@ static void memory_map_init(void)
> address_space_io.name = "I/O";
>
> qemu_mutex_init(&bounce.lock);
> + qemu_mutex_unlock(&ram_list.lock);
>
> memory_listener_register(&core_memory_listener, &address_space_memory);
> memory_listener_register(&io_memory_listener, &address_space_io);
>
next prev parent reply other threads:[~2012-11-10 1:55 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-11-09 3:14 [Qemu-devel] [RFC v1 0/3] make address_space_map() safe without biglock's protection Liu Ping Fan
2012-11-09 3:14 ` [Qemu-devel] [RFC v1 1/3] bouce buffer has fine grain lock Liu Ping Fan
2012-11-10 1:49 ` Paolo Bonzini
2012-11-12 6:23 ` liu ping fan
2012-11-12 8:53 ` Paolo Bonzini
2012-11-09 3:14 ` [Qemu-devel] [RFC v1 2/3] ramlist: apply fine grain lock for ram_list Liu Ping Fan
2012-11-10 1:54 ` Paolo Bonzini [this message]
2012-11-12 6:22 ` liu ping fan
2012-11-12 8:48 ` Paolo Bonzini
2012-11-13 6:07 ` liu ping fan
2012-11-09 3:14 ` [Qemu-devel] [RFC v1 3/3] make address_space_map safe Liu Ping Fan
[not found] ` <20130213121214.GC4576@dhcp-192-168-178-175.profitbricks.localdomain>
2013-03-07 1:59 ` liu ping fan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=509DB3EE.4060409@redhat.com \
--to=pbonzini@redhat.com \
--cc=anthony@codemonkey.ws \
--cc=avi@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=qemulist@gmail.com \
--cc=stefanha@gmail.com \
--cc=vasilis.liaskovitis@profitbricks.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).