public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Anthony Liguori <anthony@codemonkey.ws>
To: Anthony Liguori <aliguori@us.ibm.com>
Cc: qemu-devel@nongnu.org, Chris Wright <chrisw@sous-sol.org>,
	kvm@vger.kernel.org
Subject: Re: [PATCH] qemu-kvm: introduce cpu_start/cpu_stop commands
Date: Mon, 22 Nov 2010 17:03:23 -0600	[thread overview]
Message-ID: <4CEAF6BB.3080808@codemonkey.ws> (raw)
In-Reply-To: <1290466818-5230-1-git-send-email-aliguori@us.ibm.com>

[-- Attachment #1: Type: text/plain, Size: 3938 bytes --]

On 11/22/2010 05:00 PM, Anthony Liguori wrote:
> qemu-kvm vcpu threads don't response to SIGSTOP/SIGCONT.  Instead of teaching
> them to respond to these signals, introduce monitor commands that stop and start
> individual vcpus.
>
> The purpose of these commands are to implement CPU hard limits using an external
> tool that watches the CPU consumption and stops the CPU as appropriate.
>
> The monitor commands provide a more elegant solution that signals because it
> ensures that a stopped vcpu isn't holding the qemu_mutex.
>
> I'll reply to this note with an example tool.
>    

This is super rough but demonstrates the concept.  If you run it with '0 
50 100' it will cap VCPU 0 at 50%.

It's not the prettiest thing in the world but it's minimally invasive 
and seems to work well.

Regards,

Anthony Liguori

> Signed-off-by: Anthony Liguori<aliguori@us.ibm.com>
>
> diff --git a/hmp-commands.hx b/hmp-commands.hx
> index ba6de28..827bd67 100644
> --- a/hmp-commands.hx
> +++ b/hmp-commands.hx
> @@ -279,6 +279,24 @@ Resume emulation.
>   ETEXI
>
>       {
> +        .name       = "cpu_start",
> +        .args_type  = "cpu:i",
> +        .params     = "[cpu]",
> +        .help       = "start cpu emulation",
> +        .user_print = monitor_user_noop,
> +        .mhandler.cmd_new = do_vcpu_start,
> +    },
> +
> +    {
> +        .name       = "cpu_stop",
> +        .args_type  = "cpu:i",
> +        .params     = "[cpu]",
> +        .help       = "stop cpu emulation",
> +        .user_print = monitor_user_noop,
> +        .mhandler.cmd_new = do_vcpu_stop,
> +    },
> +
> +    {
>           .name       = "gdbserver",
>           .args_type  = "device:s?",
>           .params     = "[device]",
> diff --git a/qemu-kvm.c b/qemu-kvm.c
> index 471306b..35121ed 100644
> --- a/qemu-kvm.c
> +++ b/qemu-kvm.c
> @@ -1351,6 +1351,65 @@ static void pause_all_threads(void)
>       }
>   }
>
> +static void vcpu_stop(int cpu)
> +{
> +    CPUState *env = first_cpu;
> +
> +    for (env = first_cpu; env; env = env->next_cpu) {
> +        if (env->cpu_index == cpu) {
> +            break;
> +        }
> +    }
> +
> +    if (env) {
> +        if (env != cpu_single_env) {
> +            env->stop = 1;
> +            pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
> +        } else {
> +            env->stop = 0;
> +            env->stopped = 1;
> +            cpu_exit(env);
> +        }
> +
> +        while (!env->stopped) {
> +            qemu_cond_wait(&qemu_pause_cond);
> +        }
> +    }
> +}
> +
> +static void vcpu_start(int cpu)
> +{
> +    CPUState *env = first_cpu;
> +
> +    assert(!cpu_single_env);
> +
> +    for (env = first_cpu; env; env = env->next_cpu) {
> +        if (env->cpu_index == cpu) {
> +            break;
> +        }
> +    }
> +
> +    if (env) {
> +        env->stop = 0;
> +        env->stopped = 0;
> +        pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
> +    }
> +}
> +
> +int do_vcpu_stop(Monitor *mon, const QDict *qdict, QObject **ret_data)
> +{
> +    int vcpu = qdict_get_int(qdict, "cpu");
> +    vcpu_stop(vcpu);
> +    return 0;
> +}
> +
> +int do_vcpu_start(Monitor *mon, const QDict *qdict, QObject **ret_data)
> +{
> +    int vcpu = qdict_get_int(qdict, "cpu");
> +    vcpu_start(vcpu);
> +    return 0;
> +}
> +
>   static void resume_all_threads(void)
>   {
>       CPUState *penv = first_cpu;
> diff --git a/sysemu.h b/sysemu.h
> index 849dc8c..3ef68dd 100644
> --- a/sysemu.h
> +++ b/sysemu.h
> @@ -61,6 +61,9 @@ void qemu_system_reset(void);
>   void qemu_add_exit_notifier(Notifier *notify);
>   void qemu_remove_exit_notifier(Notifier *notify);
>
> +int do_vcpu_stop(Monitor *mon, const QDict *qdict, QObject **ret_data);
> +int do_vcpu_start(Monitor *mon, const QDict *qdict, QObject **ret_data);
> +
>   void do_savevm(Monitor *mon, const QDict *qdict);
>   int load_vmstate(const char *name);
>   void do_delvm(Monitor *mon, const QDict *qdict);
>    


[-- Attachment #2: main.c --]
[-- Type: text/x-csrc, Size: 5658 bytes --]

#define _XOPEN_SOURCE 500
#define _GNU_SOURCE
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdbool.h>
#include <sys/types.h>
#include <signal.h>
#include <sys/time.h>
#include <sys/syscall.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <stdarg.h>

#define USEC_PER_SEC 1000000ULL

static long get_cguest_time(const char *buffer)
{
    const char *ptr;
    int space_count;

    for (ptr = buffer; *ptr && space_count != 42; ptr++) {
        if (*ptr == ' ') {
            space_count++;
        }
    }

    return strtol(ptr, NULL, 10);
}

static void tv_add(struct timeval *tv, suseconds_t usec)
{
    tv->tv_usec += usec;

    while (tv->tv_usec > USEC_PER_SEC) {
        tv->tv_sec += 1;
        tv->tv_usec -= USEC_PER_SEC;
    }
}

static int tv_cmp(struct timeval *lhs, struct timeval *rhs)
{
    if (lhs->tv_sec == rhs->tv_sec) {
        if (lhs->tv_usec < rhs->tv_usec) {
            return -1;
        } else if (lhs->tv_usec > rhs->tv_usec) {
            return 1;
        }
        return 0;
    } else if (lhs->tv_sec < rhs->tv_sec) {
        return -1;
    } else if (lhs->tv_sec > rhs->tv_sec) {
        return 1;
    }
    return 0;
}

static void write_all(int fd, const void *buffer, size_t buffer_len)
{
    size_t offset = 0;

    while (offset < buffer_len) {
        ssize_t len;

        len = write(fd, buffer + offset, buffer_len - offset);
        if (len > 0) {
            offset += len;
        }
    }
}

static void read_reply(int fd, char *buffer, size_t buffer_len)
{
    size_t offset = 0;

    while (offset < buffer_len) {
        ssize_t len;

        len = read(fd, buffer + offset, buffer_len - offset);
        if (len > 0) {
            offset += len;
        }
        if (offset > 8 &&
            memcmp("\n(qemu) ", buffer + (offset - 8), 8) == 0) {
            char *ptr;
            buffer[offset - 8] = 0;
            ptr = strchr(buffer, '\n');
            if (ptr == NULL) {
                buffer[0] = 0;
            } else {
                memmove(buffer, ptr + 1, offset - (ptr - buffer) - 1);
            }
            return;
        }
    }
}

static int monitor_fd;

static void monitor_command(const char *fmt, ...)
{
    char buffer[256];
    va_list ap;
    size_t len;
    
    va_start(ap, fmt);
    len = vsnprintf(buffer, sizeof(buffer), fmt, ap);
    va_end(ap);

    write_all(monitor_fd, buffer, len);
    write_all(monitor_fd, "\n", 1);
    read_reply(monitor_fd, buffer, sizeof(buffer));
}

static void monitor_command_response(char *rsp, size_t rsp_len,
                                     const char *fmt, ...)
{
    char buffer[256];
    va_list ap;
    size_t len;
    
    va_start(ap, fmt);
    len = vsnprintf(buffer, sizeof(buffer), fmt, ap);
    va_end(ap);

    write_all(monitor_fd, buffer, len);
    write_all(monitor_fd, "\n", 1);
    read_reply(monitor_fd, rsp, rsp_len);
}

static int vm_running = 1;

static void guest_start(int vcpu)
{
    if (!vm_running) {
        monitor_command("cpu_start %d", vcpu);
    }
    vm_running = 1;
}

static void guest_stop(int vcpu)
{
    if (vm_running) {
        monitor_command("cpu_stop %d", vcpu);
    }
    vm_running = 0;
}

static int find_pid(char *buffer, int vcpu)
{
    char *ptr = buffer;
    int i;

    for (i = 0; ptr && i < vcpu; i++) {
        ptr = strchr(ptr, '\n');
        if (ptr) {
            ptr++;
        }
    }

    if (ptr) {
        ptr = strstr(ptr, "thread_id=");
        if (ptr) {
            ptr += 10;
            return atoi(ptr);
        }
    }

    return 0;
}

int main(int argc, char **argv)
{
    int fd, pid, vcpu;
    char buffer[1024];
    long ticks_per_sec;
    long cguest_time_last = 0;
    struct timeval period_end;
    long cguest_ticks;
    long entitlement;
    long period;
    struct sockaddr_un addr;

    if (argc != 4) {
        fprintf(stderr, "Missing arguments\n");
        return 1;
    }

    vcpu = atoi(argv[1]);
    /* FIXME hack, does guest time get scaled with vcpu count? */
    entitlement = atoi(argv[2]) * 2;
    period = atoi(argv[3]);

    monitor_fd = socket(PF_UNIX, SOCK_STREAM, 0);
    addr.sun_family = AF_UNIX;
    snprintf(addr.sun_path, 108, "/tmp/monitor.sock");

    if (connect(monitor_fd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
        return 1;
    }

    read_reply(monitor_fd, buffer, sizeof(buffer));
    monitor_command_response(buffer, sizeof(buffer), "info cpus");
    pid = find_pid(buffer, vcpu);

    ticks_per_sec = sysconf(_SC_CLK_TCK);
    entitlement = (entitlement * ticks_per_sec) / 1000;
    period *= 1000;

    snprintf(buffer, sizeof(buffer), "/proc/%d/stat", pid);
    fd = open(buffer, O_RDONLY);

    gettimeofday(&period_end, NULL);
    tv_add(&period_end, period);
    cguest_ticks = 0;

    while (1) {
        long cguest_time_now;
        struct timeval tv_now;
        ssize_t len;

        gettimeofday(&tv_now, NULL);
        len = pread(fd, buffer, sizeof(buffer) - 1, 0);
        buffer[len] = 0;
        cguest_time_now = get_cguest_time(buffer);

        if (cguest_time_last) {
            cguest_ticks += cguest_time_now - cguest_time_last;

            if (tv_cmp(&tv_now, &period_end) < 0) {
                if (cguest_ticks >= entitlement) {
                    guest_stop(vcpu);
                    cguest_ticks = 0;
                }
            } else {
                guest_start(vcpu);
                cguest_ticks = 0;
                tv_add(&tv_now, period);
                period_end = tv_now;
            }
        }

        cguest_time_last = cguest_time_now;
        usleep(10000); // 10ms
    }

    close(fd);

    return 0;
}

  reply	other threads:[~2010-11-22 23:03 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-11-22 23:00 [PATCH] qemu-kvm: introduce cpu_start/cpu_stop commands Anthony Liguori
2010-11-22 23:03 ` Anthony Liguori [this message]
2010-11-22 23:04 ` Chris Wright
2010-11-22 23:44   ` Anthony Liguori
2010-11-22 23:56     ` Chris Wright
2010-11-23  0:24       ` Anthony Liguori
2010-11-23  6:35   ` [Qemu-devel] " Avi Kivity
2010-11-23  6:41 ` [Qemu-devel] " Avi Kivity
2010-11-23  8:16   ` Dor Laor
2010-11-23 13:57     ` Anthony Liguori
2010-11-23 13:51   ` Anthony Liguori
2010-11-23 14:00     ` Avi Kivity
2010-11-23 14:24       ` Anthony Liguori
2010-11-23 14:35         ` Avi Kivity
2010-11-23  7:29 ` Gleb Natapov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4CEAF6BB.3080808@codemonkey.ws \
    --to=anthony@codemonkey.ws \
    --cc=aliguori@us.ibm.com \
    --cc=chrisw@sous-sol.org \
    --cc=kvm@vger.kernel.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox