From: Anthony Liguori <anthony@codemonkey.ws>
To: Anthony Liguori <aliguori@us.ibm.com>
Cc: qemu-devel@nongnu.org, Chris Wright <chrisw@sous-sol.org>,
kvm@vger.kernel.org
Subject: Re: [PATCH] qemu-kvm: introduce cpu_start/cpu_stop commands
Date: Mon, 22 Nov 2010 17:03:23 -0600 [thread overview]
Message-ID: <4CEAF6BB.3080808@codemonkey.ws> (raw)
In-Reply-To: <1290466818-5230-1-git-send-email-aliguori@us.ibm.com>
[-- Attachment #1: Type: text/plain, Size: 3938 bytes --]
On 11/22/2010 05:00 PM, Anthony Liguori wrote:
> qemu-kvm vcpu threads don't response to SIGSTOP/SIGCONT. Instead of teaching
> them to respond to these signals, introduce monitor commands that stop and start
> individual vcpus.
>
> The purpose of these commands are to implement CPU hard limits using an external
> tool that watches the CPU consumption and stops the CPU as appropriate.
>
> The monitor commands provide a more elegant solution that signals because it
> ensures that a stopped vcpu isn't holding the qemu_mutex.
>
> I'll reply to this note with an example tool.
>
This is super rough but demonstrates the concept. If you run it with '0
50 100' it will cap VCPU 0 at 50%.
It's not the prettiest thing in the world but it's minimally invasive
and seems to work well.
Regards,
Anthony Liguori
> Signed-off-by: Anthony Liguori<aliguori@us.ibm.com>
>
> diff --git a/hmp-commands.hx b/hmp-commands.hx
> index ba6de28..827bd67 100644
> --- a/hmp-commands.hx
> +++ b/hmp-commands.hx
> @@ -279,6 +279,24 @@ Resume emulation.
> ETEXI
>
> {
> + .name = "cpu_start",
> + .args_type = "cpu:i",
> + .params = "[cpu]",
> + .help = "start cpu emulation",
> + .user_print = monitor_user_noop,
> + .mhandler.cmd_new = do_vcpu_start,
> + },
> +
> + {
> + .name = "cpu_stop",
> + .args_type = "cpu:i",
> + .params = "[cpu]",
> + .help = "stop cpu emulation",
> + .user_print = monitor_user_noop,
> + .mhandler.cmd_new = do_vcpu_stop,
> + },
> +
> + {
> .name = "gdbserver",
> .args_type = "device:s?",
> .params = "[device]",
> diff --git a/qemu-kvm.c b/qemu-kvm.c
> index 471306b..35121ed 100644
> --- a/qemu-kvm.c
> +++ b/qemu-kvm.c
> @@ -1351,6 +1351,65 @@ static void pause_all_threads(void)
> }
> }
>
> +static void vcpu_stop(int cpu)
> +{
> + CPUState *env = first_cpu;
> +
> + for (env = first_cpu; env; env = env->next_cpu) {
> + if (env->cpu_index == cpu) {
> + break;
> + }
> + }
> +
> + if (env) {
> + if (env != cpu_single_env) {
> + env->stop = 1;
> + pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
> + } else {
> + env->stop = 0;
> + env->stopped = 1;
> + cpu_exit(env);
> + }
> +
> + while (!env->stopped) {
> + qemu_cond_wait(&qemu_pause_cond);
> + }
> + }
> +}
> +
> +static void vcpu_start(int cpu)
> +{
> + CPUState *env = first_cpu;
> +
> + assert(!cpu_single_env);
> +
> + for (env = first_cpu; env; env = env->next_cpu) {
> + if (env->cpu_index == cpu) {
> + break;
> + }
> + }
> +
> + if (env) {
> + env->stop = 0;
> + env->stopped = 0;
> + pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
> + }
> +}
> +
> +int do_vcpu_stop(Monitor *mon, const QDict *qdict, QObject **ret_data)
> +{
> + int vcpu = qdict_get_int(qdict, "cpu");
> + vcpu_stop(vcpu);
> + return 0;
> +}
> +
> +int do_vcpu_start(Monitor *mon, const QDict *qdict, QObject **ret_data)
> +{
> + int vcpu = qdict_get_int(qdict, "cpu");
> + vcpu_start(vcpu);
> + return 0;
> +}
> +
> static void resume_all_threads(void)
> {
> CPUState *penv = first_cpu;
> diff --git a/sysemu.h b/sysemu.h
> index 849dc8c..3ef68dd 100644
> --- a/sysemu.h
> +++ b/sysemu.h
> @@ -61,6 +61,9 @@ void qemu_system_reset(void);
> void qemu_add_exit_notifier(Notifier *notify);
> void qemu_remove_exit_notifier(Notifier *notify);
>
> +int do_vcpu_stop(Monitor *mon, const QDict *qdict, QObject **ret_data);
> +int do_vcpu_start(Monitor *mon, const QDict *qdict, QObject **ret_data);
> +
> void do_savevm(Monitor *mon, const QDict *qdict);
> int load_vmstate(const char *name);
> void do_delvm(Monitor *mon, const QDict *qdict);
>
[-- Attachment #2: main.c --]
[-- Type: text/x-csrc, Size: 5658 bytes --]
#define _XOPEN_SOURCE 500
#define _GNU_SOURCE
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdbool.h>
#include <sys/types.h>
#include <signal.h>
#include <sys/time.h>
#include <sys/syscall.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <stdarg.h>
#define USEC_PER_SEC 1000000ULL
static long get_cguest_time(const char *buffer)
{
const char *ptr;
int space_count;
for (ptr = buffer; *ptr && space_count != 42; ptr++) {
if (*ptr == ' ') {
space_count++;
}
}
return strtol(ptr, NULL, 10);
}
static void tv_add(struct timeval *tv, suseconds_t usec)
{
tv->tv_usec += usec;
while (tv->tv_usec > USEC_PER_SEC) {
tv->tv_sec += 1;
tv->tv_usec -= USEC_PER_SEC;
}
}
static int tv_cmp(struct timeval *lhs, struct timeval *rhs)
{
if (lhs->tv_sec == rhs->tv_sec) {
if (lhs->tv_usec < rhs->tv_usec) {
return -1;
} else if (lhs->tv_usec > rhs->tv_usec) {
return 1;
}
return 0;
} else if (lhs->tv_sec < rhs->tv_sec) {
return -1;
} else if (lhs->tv_sec > rhs->tv_sec) {
return 1;
}
return 0;
}
static void write_all(int fd, const void *buffer, size_t buffer_len)
{
size_t offset = 0;
while (offset < buffer_len) {
ssize_t len;
len = write(fd, buffer + offset, buffer_len - offset);
if (len > 0) {
offset += len;
}
}
}
static void read_reply(int fd, char *buffer, size_t buffer_len)
{
size_t offset = 0;
while (offset < buffer_len) {
ssize_t len;
len = read(fd, buffer + offset, buffer_len - offset);
if (len > 0) {
offset += len;
}
if (offset > 8 &&
memcmp("\n(qemu) ", buffer + (offset - 8), 8) == 0) {
char *ptr;
buffer[offset - 8] = 0;
ptr = strchr(buffer, '\n');
if (ptr == NULL) {
buffer[0] = 0;
} else {
memmove(buffer, ptr + 1, offset - (ptr - buffer) - 1);
}
return;
}
}
}
static int monitor_fd;
static void monitor_command(const char *fmt, ...)
{
char buffer[256];
va_list ap;
size_t len;
va_start(ap, fmt);
len = vsnprintf(buffer, sizeof(buffer), fmt, ap);
va_end(ap);
write_all(monitor_fd, buffer, len);
write_all(monitor_fd, "\n", 1);
read_reply(monitor_fd, buffer, sizeof(buffer));
}
static void monitor_command_response(char *rsp, size_t rsp_len,
const char *fmt, ...)
{
char buffer[256];
va_list ap;
size_t len;
va_start(ap, fmt);
len = vsnprintf(buffer, sizeof(buffer), fmt, ap);
va_end(ap);
write_all(monitor_fd, buffer, len);
write_all(monitor_fd, "\n", 1);
read_reply(monitor_fd, rsp, rsp_len);
}
static int vm_running = 1;
static void guest_start(int vcpu)
{
if (!vm_running) {
monitor_command("cpu_start %d", vcpu);
}
vm_running = 1;
}
static void guest_stop(int vcpu)
{
if (vm_running) {
monitor_command("cpu_stop %d", vcpu);
}
vm_running = 0;
}
static int find_pid(char *buffer, int vcpu)
{
char *ptr = buffer;
int i;
for (i = 0; ptr && i < vcpu; i++) {
ptr = strchr(ptr, '\n');
if (ptr) {
ptr++;
}
}
if (ptr) {
ptr = strstr(ptr, "thread_id=");
if (ptr) {
ptr += 10;
return atoi(ptr);
}
}
return 0;
}
int main(int argc, char **argv)
{
int fd, pid, vcpu;
char buffer[1024];
long ticks_per_sec;
long cguest_time_last = 0;
struct timeval period_end;
long cguest_ticks;
long entitlement;
long period;
struct sockaddr_un addr;
if (argc != 4) {
fprintf(stderr, "Missing arguments\n");
return 1;
}
vcpu = atoi(argv[1]);
/* FIXME hack, does guest time get scaled with vcpu count? */
entitlement = atoi(argv[2]) * 2;
period = atoi(argv[3]);
monitor_fd = socket(PF_UNIX, SOCK_STREAM, 0);
addr.sun_family = AF_UNIX;
snprintf(addr.sun_path, 108, "/tmp/monitor.sock");
if (connect(monitor_fd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
return 1;
}
read_reply(monitor_fd, buffer, sizeof(buffer));
monitor_command_response(buffer, sizeof(buffer), "info cpus");
pid = find_pid(buffer, vcpu);
ticks_per_sec = sysconf(_SC_CLK_TCK);
entitlement = (entitlement * ticks_per_sec) / 1000;
period *= 1000;
snprintf(buffer, sizeof(buffer), "/proc/%d/stat", pid);
fd = open(buffer, O_RDONLY);
gettimeofday(&period_end, NULL);
tv_add(&period_end, period);
cguest_ticks = 0;
while (1) {
long cguest_time_now;
struct timeval tv_now;
ssize_t len;
gettimeofday(&tv_now, NULL);
len = pread(fd, buffer, sizeof(buffer) - 1, 0);
buffer[len] = 0;
cguest_time_now = get_cguest_time(buffer);
if (cguest_time_last) {
cguest_ticks += cguest_time_now - cguest_time_last;
if (tv_cmp(&tv_now, &period_end) < 0) {
if (cguest_ticks >= entitlement) {
guest_stop(vcpu);
cguest_ticks = 0;
}
} else {
guest_start(vcpu);
cguest_ticks = 0;
tv_add(&tv_now, period);
period_end = tv_now;
}
}
cguest_time_last = cguest_time_now;
usleep(10000); // 10ms
}
close(fd);
return 0;
}
next prev parent reply other threads:[~2010-11-22 23:03 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-11-22 23:00 [PATCH] qemu-kvm: introduce cpu_start/cpu_stop commands Anthony Liguori
2010-11-22 23:03 ` Anthony Liguori [this message]
2010-11-22 23:04 ` Chris Wright
2010-11-22 23:44 ` Anthony Liguori
2010-11-22 23:56 ` Chris Wright
2010-11-23 0:24 ` Anthony Liguori
2010-11-23 6:35 ` [Qemu-devel] " Avi Kivity
2010-11-23 6:41 ` [Qemu-devel] " Avi Kivity
2010-11-23 8:16 ` Dor Laor
2010-11-23 13:57 ` Anthony Liguori
2010-11-23 13:51 ` Anthony Liguori
2010-11-23 14:00 ` Avi Kivity
2010-11-23 14:24 ` Anthony Liguori
2010-11-23 14:35 ` Avi Kivity
2010-11-23 7:29 ` Gleb Natapov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4CEAF6BB.3080808@codemonkey.ws \
--to=anthony@codemonkey.ws \
--cc=aliguori@us.ibm.com \
--cc=chrisw@sous-sol.org \
--cc=kvm@vger.kernel.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox