All of lore.kernel.org
 help / color / mirror / Atom feed
From: Fabiano Rosas <farosas@suse.de>
To: Peter Xu <peterx@redhat.com>, qemu-devel@nongnu.org
Cc: peterx@redhat.com, Juan Quintela <quintela@redhat.com>
Subject: Re: [PATCH v3 03/10] migration: Refactor error handling in source return path
Date: Thu, 05 Oct 2023 09:57:58 -0300	[thread overview]
Message-ID: <87il7lfcq1.fsf@suse.de> (raw)
In-Reply-To: <20231004220240.167175-4-peterx@redhat.com>

Peter Xu <peterx@redhat.com> writes:

> @@ -1882,48 +1870,46 @@ static void *source_return_path_thread(void *opaque)
>      uint32_t tmp32, sibling_error;
>      ram_addr_t start = 0; /* =0 to silence warning */
>      size_t  len = 0, expected_len;
> +    Error *err = NULL;
>      int res;
>  
>      trace_source_return_path_thread_entry();
>      rcu_register_thread();
>  
> -    while (!ms->rp_state.error && !qemu_file_get_error(rp) &&
> +    while (!migrate_has_error(ms) && !qemu_file_get_error(rp) &&
>             migration_is_setup_or_active(ms->state)) {
>          trace_source_return_path_thread_loop_top();
> +
>          header_type = qemu_get_be16(rp);
>          header_len = qemu_get_be16(rp);
>  
>          if (qemu_file_get_error(rp)) {
> -            mark_source_rp_bad(ms);
>              goto out;
>          }

This error will be lost because outside the loop we only check for err.

>  
>          if (header_type >= MIG_RP_MSG_MAX ||
>              header_type == MIG_RP_MSG_INVALID) {
> -            error_report("RP: Received invalid message 0x%04x length 0x%04x",
> -                         header_type, header_len);
> -            mark_source_rp_bad(ms);
> +            error_setg(&err, "Received invalid message 0x%04x length 0x%04x",
> +                       header_type, header_len);
>              goto out;
>          }
>  
>          if ((rp_cmd_args[header_type].len != -1 &&
>              header_len != rp_cmd_args[header_type].len) ||
>              header_len > sizeof(buf)) {
> -            error_report("RP: Received '%s' message (0x%04x) with"
> -                         "incorrect length %d expecting %zu",
> -                         rp_cmd_args[header_type].name, header_type, header_len,
> -                         (size_t)rp_cmd_args[header_type].len);
> -            mark_source_rp_bad(ms);
> +            error_setg(&err, "Received '%s' message (0x%04x) with"
> +                       "incorrect length %d expecting %zu",
> +                       rp_cmd_args[header_type].name, header_type, header_len,
> +                       (size_t)rp_cmd_args[header_type].len);
>              goto out;
>          }
>  
>          /* We know we've got a valid header by this point */
>          res = qemu_get_buffer(rp, buf, header_len);
>          if (res != header_len) {
> -            error_report("RP: Failed reading data for message 0x%04x"
> -                         " read %d expected %d",
> -                         header_type, res, header_len);
> -            mark_source_rp_bad(ms);
> +            error_setg(&err, "Failed reading data for message 0x%04x"
> +                       " read %d expected %d",
> +                       header_type, res, header_len);
>              goto out;
>          }
>  
> @@ -1933,8 +1919,7 @@ static void *source_return_path_thread(void *opaque)
>              sibling_error = ldl_be_p(buf);
>              trace_source_return_path_thread_shut(sibling_error);
>              if (sibling_error) {
> -                error_report("RP: Sibling indicated error %d", sibling_error);
> -                mark_source_rp_bad(ms);
> +                error_setg(&err, "Sibling indicated error %d", sibling_error);
>              }
>              /*
>               * We'll let the main thread deal with closing the RP
> @@ -1952,7 +1937,10 @@ static void *source_return_path_thread(void *opaque)
>          case MIG_RP_MSG_REQ_PAGES:
>              start = ldq_be_p(buf);
>              len = ldl_be_p(buf + 8);
> -            migrate_handle_rp_req_pages(ms, NULL, start, len);
> +            migrate_handle_rp_req_pages(ms, NULL, start, len, &err);
> +            if (err) {
> +                goto out;
> +            }
>              break;
>  
>          case MIG_RP_MSG_REQ_PAGES_ID:
> @@ -1967,32 +1955,32 @@ static void *source_return_path_thread(void *opaque)
>                  expected_len += tmp32;
>              }
>              if (header_len != expected_len) {
> -                error_report("RP: Req_Page_id with length %d expecting %zd",
> -                             header_len, expected_len);
> -                mark_source_rp_bad(ms);
> +                error_setg(&err, "Req_Page_id with length %d expecting %zd",
> +                           header_len, expected_len);
> +                goto out;
> +            }
> +            migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len,
> +                                        &err);
> +            if (err) {
>                  goto out;
>              }
> -            migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len);
>              break;
>  
>          case MIG_RP_MSG_RECV_BITMAP:
>              if (header_len < 1) {
> -                error_report("%s: missing block name", __func__);
> -                mark_source_rp_bad(ms);
> +                error_setg(&err, "MIG_RP_MSG_RECV_BITMAP missing block name");
>                  goto out;
>              }
>              /* Format: len (1B) + idstr (<255B). This ends the idstr. */
>              buf[buf[0] + 1] = '\0';
> -            if (migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1))) {
> -                mark_source_rp_bad(ms);
> +            if (migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1), &err)) {
>                  goto out;
>              }
>              break;
>  
>          case MIG_RP_MSG_RESUME_ACK:
>              tmp32 = ldl_be_p(buf);
> -            if (migrate_handle_rp_resume_ack(ms, tmp32)) {
> -                mark_source_rp_bad(ms);
> +            if (migrate_handle_rp_resume_ack(ms, tmp32, &err)) {
>                  goto out;
>              }
>              break;
> @@ -2008,9 +1996,14 @@ static void *source_return_path_thread(void *opaque)
>      }
>  
>  out:
> -    if (qemu_file_get_error(rp)) {
> +    if (err) {

Need to keep both checks here.

> +        /*
> +         * Collect any error in return-path thread and report it to the
> +         * migration state object.
> +         */
> +        migrate_set_error(ms, err);
> +        error_free(err);
>          trace_source_return_path_thread_bad_end();
> -        mark_source_rp_bad(ms);
>      }
>  
>      trace_source_return_path_thread_end();
> @@ -2036,13 +2029,10 @@ static int open_return_path_on_source(MigrationState *ms)
>      return 0;
>  }


  parent reply	other threads:[~2023-10-05 12:58 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-04 22:02 [PATCH v3 00/10] migration: Better error handling in rp thread, allow failures in recover Peter Xu
2023-10-04 22:02 ` [PATCH v3 01/10] migration: Display error in query-migrate irrelevant of status Peter Xu
2023-10-05  7:28   ` Juan Quintela
2023-10-04 22:02 ` [PATCH v3 02/10] migration: Introduce migrate_has_error() Peter Xu
2023-10-05  7:30   ` Juan Quintela
2023-10-04 22:02 ` [PATCH v3 03/10] migration: Refactor error handling in source return path Peter Xu
2023-10-05  6:11   ` Philippe Mathieu-Daudé
2023-10-05 16:05     ` Peter Xu
2023-10-08 11:39       ` Philippe Mathieu-Daudé
2023-10-05  8:22   ` Juan Quintela
2023-10-05 19:35     ` Peter Xu
2023-10-05 12:57   ` Fabiano Rosas [this message]
2023-10-05 19:35     ` Peter Xu
2023-10-04 22:02 ` [PATCH v3 04/10] migration: Deliver return path file error to migrate state too Peter Xu
2023-10-05  7:32   ` Juan Quintela
2023-10-04 22:02 ` [PATCH v3 05/10] qemufile: Always return a verbose error Peter Xu
2023-10-05  7:42   ` Juan Quintela
2023-10-04 22:02 ` [PATCH v3 06/10] migration: Remember num of ramblocks to sync during recovery Peter Xu
2023-10-05  7:43   ` Juan Quintela
2023-10-04 22:02 ` [PATCH v3 07/10] migration: Add migration_rp_wait|kick() Peter Xu
2023-10-05  7:49   ` Juan Quintela
2023-10-05 20:47     ` Peter Xu
2023-10-04 22:02 ` [PATCH v3 08/10] migration: Allow network to fail even during recovery Peter Xu
2023-10-05 13:25   ` Fabiano Rosas
2023-10-04 22:02 ` [PATCH v3 09/10] migration: Allow RECOVER->PAUSED convertion for dest qemu Peter Xu
2023-10-05  8:24   ` Juan Quintela
2023-10-04 22:02 ` [PATCH v3 10/10] tests/migration-test: Add a test for postcopy hangs during RECOVER Peter Xu
2023-10-05 13:24   ` Fabiano Rosas
2023-10-05 13:37     ` Fabiano Rosas
2023-10-05 20:55       ` Peter Xu
2023-10-05 21:10         ` Fabiano Rosas
2023-10-05 21:44           ` Peter Xu
2023-10-05 22:01             ` Fabiano Rosas
2023-10-09 16:50               ` Fabiano Rosas
2023-10-10 16:00                 ` Peter Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87il7lfcq1.fsf@suse.de \
    --to=farosas@suse.de \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=quintela@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.