qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Dr. David Alan Gilbert (git)" <dgilbert@redhat.com>
To: qemu-devel@nongnu.org, leobras@redhat.com, thuth@redhat.com,
	peter.maydell@linaro.org, vgoyal@redhat.com
Cc: peterx@redhat.com, quintela@redhat.com
Subject: [PULL 2/5] Revert "migration: Simplify unqueue_page()"
Date: Tue,  2 Aug 2022 16:54:44 +0100	[thread overview]
Message-ID: <20220802155447.216018-3-dgilbert@redhat.com> (raw)
In-Reply-To: <20220802155447.216018-1-dgilbert@redhat.com>

From: Thomas Huth <thuth@redhat.com>

This reverts commit cfd66f30fb0f735df06ff4220e5000290a43dad3.

The simplification of unqueue_page() introduced a bug that sometimes
breaks migration on s390x hosts.

The problem is not fully understood yet, but since we are already in
the freeze for QEMU 7.1 and we need something working there, let's
revert this patch for the upcoming release. The optimization can be
redone later again in a proper way if necessary.

Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2099934
Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <20220802061949.331576-1-thuth@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 migration/ram.c        | 37 ++++++++++++++++++++++++++-----------
 migration/trace-events |  3 ++-
 2 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/migration/ram.c b/migration/ram.c
index b94669ba5d..dc1de9ddbc 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1612,7 +1612,6 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
 {
     struct RAMSrcPageRequest *entry;
     RAMBlock *block = NULL;
-    size_t page_size;
 
     if (!postcopy_has_request(rs)) {
         return NULL;
@@ -1629,13 +1628,10 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
     entry = QSIMPLEQ_FIRST(&rs->src_page_requests);
     block = entry->rb;
     *offset = entry->offset;
-    page_size = qemu_ram_pagesize(block);
-    /* Each page request should only be multiple page size of the ramblock */
-    assert((entry->len % page_size) == 0);
 
-    if (entry->len > page_size) {
-        entry->len -= page_size;
-        entry->offset += page_size;
+    if (entry->len > TARGET_PAGE_SIZE) {
+        entry->len -= TARGET_PAGE_SIZE;
+        entry->offset += TARGET_PAGE_SIZE;
     } else {
         memory_region_unref(block->mr);
         QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
@@ -1643,9 +1639,6 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
         migration_consume_urgent_request();
     }
 
-    trace_unqueue_page(block->idstr, *offset,
-                       test_bit((*offset >> TARGET_PAGE_BITS), block->bmap));
-
     return block;
 }
 
@@ -2069,8 +2062,30 @@ static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
 {
     RAMBlock  *block;
     ram_addr_t offset;
+    bool dirty;
+
+    do {
+        block = unqueue_page(rs, &offset);
+        /*
+         * We're sending this page, and since it's postcopy nothing else
+         * will dirty it, and we must make sure it doesn't get sent again
+         * even if this queue request was received after the background
+         * search already sent it.
+         */
+        if (block) {
+            unsigned long page;
+
+            page = offset >> TARGET_PAGE_BITS;
+            dirty = test_bit(page, block->bmap);
+            if (!dirty) {
+                trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
+                                                page);
+            } else {
+                trace_get_queued_page(block->idstr, (uint64_t)offset, page);
+            }
+        }
 
-    block = unqueue_page(rs, &offset);
+    } while (block && !dirty);
 
     if (block) {
         /* See comment above postcopy_preempted_contains() */
diff --git a/migration/trace-events b/migration/trace-events
index a34afe7b85..57003edcbd 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -85,6 +85,8 @@ put_qlist_end(const char *field_name, const char *vmsd_name) "%s(%s)"
 qemu_file_fclose(void) ""
 
 # ram.c
+get_queued_page(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx"
+get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx"
 migration_bitmap_sync_start(void) ""
 migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64
 migration_bitmap_clear_dirty(char *str, uint64_t start, uint64_t size, unsigned long page) "rb %s start 0x%"PRIx64" size 0x%"PRIx64" page 0x%lx"
@@ -110,7 +112,6 @@ ram_save_iterate_big_wait(uint64_t milliconds, int iterations) "big wait: %" PRI
 ram_load_complete(int ret, uint64_t seq_iter) "exit_code %d seq iteration %" PRIu64
 ram_write_tracking_ramblock_start(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu"
 ram_write_tracking_ramblock_stop(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu"
-unqueue_page(char *block, uint64_t offset, bool dirty) "ramblock '%s' offset 0x%"PRIx64" dirty %d"
 postcopy_preempt_triggered(char *str, unsigned long page) "during sending ramblock %s offset 0x%lx"
 postcopy_preempt_restored(char *str, unsigned long page) "ramblock %s offset 0x%lx"
 postcopy_preempt_hit(char *str, uint64_t offset) "ramblock %s offset 0x%"PRIx64
-- 
2.37.1



  parent reply	other threads:[~2022-08-02 15:56 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-08-02 15:54 [PULL 0/5] migration queue Dr. David Alan Gilbert (git)
2022-08-02 15:54 ` [PULL 1/5] migration: add remaining params->has_* = true in migration_instance_init() Dr. David Alan Gilbert (git)
2022-08-02 15:54 ` Dr. David Alan Gilbert (git) [this message]
2022-08-02 15:54 ` [PULL 3/5] migration: Assert that migrate_multifd_compression() returns an in-range value Dr. David Alan Gilbert (git)
2022-08-02 15:54 ` [PULL 4/5] migration: Define BLK_MIG_BLOCK_SIZE as unsigned long long Dr. David Alan Gilbert (git)
2022-08-02 15:54 ` [PULL 5/5] virtiofsd: Disable killpriv_v2 by default Dr. David Alan Gilbert (git)
2022-08-02 19:12 ` [PULL 0/5] migration queue Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220802155447.216018-3-dgilbert@redhat.com \
    --to=dgilbert@redhat.com \
    --cc=leobras@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=quintela@redhat.com \
    --cc=thuth@redhat.com \
    --cc=vgoyal@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).