All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
To: Alexey Perevalov <a.perevalov@samsung.com>
Cc: qemu-devel@nongnu.org, i.maximets@samsung.com
Subject: Re: [Qemu-devel] [PATCH 6/6] migration: detailed traces for postcopy
Date: Mon, 24 Apr 2017 19:03:32 +0100	[thread overview]
Message-ID: <20170424180332.GP2362@work-vm> (raw)
In-Reply-To: <1492175840-5021-7-git-send-email-a.perevalov@samsung.com>

* Alexey Perevalov (a.perevalov@samsung.com) wrote:
> It could help to track down vCPU state during page fault and
> page fault sources.
> 
> This patch showes proc's status/stack/syscall file at the moment of pagefault,
> it's very interesting to know who was page fault initiator.

This is a LOT of debug code, almost none of it is postcopy specific,
so probably a question for generic tracing code; but I'll admit to
not being happy about the idea of putting this much code in for
this type of dumping; when it gets this desperate we just normally do
a special build.

However, some specific comments as well.

> Signed-off-by: Alexey Perevalov <a.perevalov@samsung.com>
> ---
>  migration/postcopy-ram.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++-
>  migration/trace-events   |  6 +++
>  2 files changed, 103 insertions(+), 1 deletion(-)
> 
> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> index 42330fd..513633c 100644
> --- a/migration/postcopy-ram.c
> +++ b/migration/postcopy-ram.c
> @@ -412,7 +412,91 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr,
>      return 0;
>  }
>  
> -static int get_mem_fault_cpu_index(uint32_t pid)
> +#define PROC_LEN 1024
> +#define DEBUG_FAULT_PROCESS_STATUS 1
> +
> +#ifdef DEBUG_FAULT_PROCESS_STATUS
> +
> +static FILE *get_proc_file(const gchar *frmt, pid_t thread_id)
> +{
> +    FILE *f = NULL;
> +    gchar *file_path = g_strdup_printf(frmt, thread_id);
> +    if (file_path == NULL) {
> +        error_report("Couldn't allocate path for %u", thread_id);
> +        return NULL;
> +    }

I was going to say that I thought g_strdup_printf couldn't
return NULL; but then I looked at the source - eww it can.

> +    f = fopen(file_path, "r");
> +    if (!f) {
> +        error_report("can't open %s", file_path);
> +    }
> +
> +    trace_get_proc_file(file_path);
> +    g_free(file_path);
> +    return f;
> +}
> +
> +typedef void(*proc_line_handler)(const char *line);
> +
> +static void proc_line_cb(const char *line)
> +{
> +    /* trace_ functions are inline */
> +    trace_proc_line_cb(line);
> +}
> +
> +static void foreach_line_in_file(FILE *f, proc_line_handler cb)
> +{
> +    char *line = NULL;
> +    ssize_t read;
> +    size_t len;
> +
> +    while ((read = getline(&line, &len, f)) != -1) {
> +        /* workaround, trace_ infrastructure already insert \n
> +         * and getline includes it */
> +        ssize_t str_len = strlen(line) - 1;
> +        if (str_len <= 0)
> +            continue;
> +        line[str_len] = '\0';
> +        cb(line);
> +    }
> +    free(line);
> +}
> +
> +static void observe_thread_proc(const gchar *path_frmt, pid_t thread_id)
> +{
> +    FILE *f = get_proc_file(path_frmt, thread_id);
> +    if (!f) {
> +        error_report("can't read thread's proc");
> +        return;
> +    }
> +
> +    foreach_line_in_file(f, proc_line_cb);

> +    fclose(f);
> +}
> +
> +/*
> + * for convinience tracing need to trace
> + * observe_thread_begin
> + * get_proc_file
> + * proc_line_cb
> + * observe_thread_end
> + */
> +static void observe_thread(const char *msg, pid_t thread_id)
> +{
> +    trace_observe_thread_begin(msg);
> +    observe_thread_proc("/proc/%d/status", thread_id);
> +    observe_thread_proc("/proc/%d/syscall", thread_id);
> +    observe_thread_proc("/proc/%d/stack", thread_id);

You could wrap that in something like:
  if (TRACE_PROC_LINE_CB_ENABLED) {

so it doesn't read all of the files and do all the allocation
to get to the point it realised no one cared.

Dave

> +    trace_observe_thread_end(msg);
> +}
> +
> +#else
> +static void observe_thread(const char *msg, pid_t thread_id)
> +{
> +}
> +
> +#endif /* DEBUG_FAULT_PROCESS_STATUS */
> +
> +static int get_mem_fault_cpu_index(pid_t pid)
>  {
>      CPUState *cpu_iter;
>  
> @@ -421,9 +505,20 @@ static int get_mem_fault_cpu_index(uint32_t pid)
>             return cpu_iter->cpu_index;
>      }
>      trace_get_mem_fault_cpu_index(pid);
> +    observe_thread("not a vCPU", pid);
> +
>      return -1;
>  }
>  
> +static void observe_vcpu_state(void)
> +{
> +    CPUState *cpu_iter;
> +    CPU_FOREACH(cpu_iter) {
> +        observe_thread("vCPU", cpu_iter->thread_id);
> +        trace_vcpu_state(cpu_iter->running, cpu_iter->cpu_index);
> +    }
> +}
> +
>  /*
>   * Handle faults detected by the USERFAULT markings
>   */
> @@ -465,6 +560,7 @@ static void *postcopy_ram_fault_thread(void *opaque)
>          }
>  
>          ret = read(mis->userfault_fd, &msg, sizeof(msg));
> +        observe_vcpu_state();
>          if (ret != sizeof(msg)) {
>              if (errno == EAGAIN) {
>                  /*
> diff --git a/migration/trace-events b/migration/trace-events
> index ab2e1e4..3a74f91 100644
> --- a/migration/trace-events
> +++ b/migration/trace-events
> @@ -202,6 +202,12 @@ save_xbzrle_page_overflow(void) ""
>  ram_save_iterate_big_wait(uint64_t milliconds, int iterations) "big wait: %" PRIu64 " milliseconds, %d iterations"
>  ram_load_complete(int ret, uint64_t seq_iter) "exit_code %d seq iteration %" PRIu64
>  get_mem_fault_cpu_index(uint32_t pid) "pid %u is not vCPU"
> +observe_thread_status(int ptid, char *name, char *status) "host_tid %d %s %s"
> +vcpu_state(int cpu_index, int is_running) "cpu %d running %d"
> +proc_line_cb(const char *str) "%s"
> +get_proc_file(const char *str) "opened %s"
> +observe_thread_begin(const char *str) "%s"
> +observe_thread_end(const char *str) "%s"
>  
>  # migration/exec.c
>  migration_exec_outgoing(const char *cmd) "cmd=%s"
> -- 
> 1.8.3.1
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK

  parent reply	other threads:[~2017-04-24 18:03 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CGME20170414131735eucas1p21f1fcadf426789276f567191372f7794@eucas1p2.samsung.com>
2017-04-14 13:17 ` [Qemu-devel] [PATCH 0/6] calculate downtime for postcopy live migration Alexey Perevalov
2017-04-14 13:17   ` [Qemu-devel] [PATCH 1/6] userfault: add pid into uffd_msg & update UFFD_FEATURE_* Alexey Perevalov
2017-04-14 13:17   ` [Qemu-devel] [PATCH 2/6] util: introduce glib-helper.c Alexey Perevalov
2017-04-14 16:05     ` Philippe Mathieu-Daudé
2017-04-17  7:07       ` Alexey
2017-04-21 10:01       ` Dr. David Alan Gilbert
2017-04-21 10:27     ` Peter Maydell
2017-04-21 15:10       ` Alexey
2017-04-21 15:49         ` Peter Maydell
2017-04-25 11:23           ` Dr. David Alan Gilbert
2017-04-14 13:17   ` [Qemu-devel] [PATCH 3/6] migration: add UFFD_FEATURE_THREAD_ID feature support Alexey Perevalov
2017-04-21 10:24     ` Dr. David Alan Gilbert
2017-04-21 15:22       ` Alexey
2017-04-24  8:03         ` Peter Xu
2017-04-24  8:12         ` Peter Xu
2017-04-24  8:38           ` Alexey
2017-04-24 17:10             ` Dr. David Alan Gilbert
2017-04-25  7:55               ` Alexey
2017-04-25 11:14                 ` Dr. David Alan Gilbert
2017-04-25 11:51                   ` Alexey Perevalov
2017-04-14 13:17   ` [Qemu-devel] [PATCH 4/6] migration: calculate downtime on dst side Alexey Perevalov
2017-04-21 12:00     ` Dr. David Alan Gilbert
2017-04-21 18:47       ` Alexey
2017-04-24 17:11         ` Dr. David Alan Gilbert
2017-04-22  9:49       ` [Qemu-devel] [PATCH 4/6] migration: calculate downtime on dst side (CPUMASK) Alexey
2017-04-24 17:13         ` Dr. David Alan Gilbert
2017-04-25  8:24     ` [Qemu-devel] [PATCH 4/6] migration: calculate downtime on dst side Peter Xu
2017-04-25 10:10       ` Alexey Perevalov
2017-04-25 10:25         ` Peter Xu
2017-04-25 10:47           ` Alexey Perevalov
2017-04-14 13:17   ` [Qemu-devel] [PATCH 5/6] migration: send postcopy downtime back to source Alexey Perevalov
2017-04-24 17:26     ` Dr. David Alan Gilbert
2017-04-25  5:51       ` Alexey
2017-04-14 13:17   ` [Qemu-devel] [PATCH 6/6] migration: detailed traces for postcopy Alexey Perevalov
2017-04-17 13:32     ` Philippe Mathieu-Daudé
2017-04-24 18:03     ` Dr. David Alan Gilbert [this message]
2017-04-17  2:32   ` [Qemu-devel] [PATCH 0/6] calculate downtime for postcopy live migration no-reply
2017-04-17  2:36   ` no-reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170424180332.GP2362@work-vm \
    --to=dgilbert@redhat.com \
    --cc=a.perevalov@samsung.com \
    --cc=i.maximets@samsung.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.