From: Marcelo Tosatti <mtosatti@redhat.com>
To: kvm@vger.kernel.org, qemu-devel@nongnu.org
Cc: avi@redhat.com, Marcelo Tosatti <mtosatti@redhat.com>,
john cooper <john.cooper@redhat.com>
Subject: [patch uq/master 2/2] Add option to use file backed guest memory
Date: Wed, 24 Feb 2010 18:11:20 -0300 [thread overview]
Message-ID: <20100224211507.913712224@amt.cnet> (raw)
In-Reply-To: 20100224211117.958583246@amt.cnet
[-- Attachment #1: mempath --]
[-- Type: text/plain, Size: 6107 bytes --]
Port qemu-kvm's -mem-path and -mem-prealloc options. These are useful
for backing guest memory with huge pages via hugetlbfs.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
CC: john cooper <john.cooper@redhat.com>
Index: qemu-kvm/cpu-all.h
===================================================================
--- qemu-kvm.orig/cpu-all.h
+++ qemu-kvm/cpu-all.h
@@ -850,6 +850,9 @@ extern uint8_t *phys_ram_dirty;
extern ram_addr_t ram_size;
extern ram_addr_t last_ram_offset;
+extern const char *mem_path;
+extern int mem_prealloc;
+
/* physical memory access */
/* MMIO pages are identified by a combination of an IO device index and
Index: qemu-kvm/exec.c
===================================================================
--- qemu-kvm.orig/exec.c
+++ qemu-kvm/exec.c
@@ -2513,6 +2513,111 @@ void qemu_flush_coalesced_mmio_buffer(vo
kvm_flush_coalesced_mmio_buffer();
}
+#ifdef __linux__
+
+#include <sys/vfs.h>
+
+#define HUGETLBFS_MAGIC 0x958458f6
+
+static long gethugepagesize(const char *path)
+{
+ struct statfs fs;
+ int ret;
+
+ do {
+ ret = statfs(path, &fs);
+ } while (ret != 0 && errno == EINTR);
+
+ if (ret != 0) {
+ perror("statfs");
+ return 0;
+ }
+
+ if (fs.f_type != HUGETLBFS_MAGIC)
+ fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
+
+ return fs.f_bsize;
+}
+
+static void *file_ram_alloc(ram_addr_t memory, const char *path)
+{
+ char *filename;
+ void *area;
+ int fd;
+#ifdef MAP_POPULATE
+ int flags;
+#endif
+ unsigned long hpagesize;
+
+ if (!path) {
+ return NULL;
+ }
+
+ hpagesize = gethugepagesize(path);
+ if (!hpagesize) {
+ return NULL;
+ }
+
+ if (memory < hpagesize) {
+ return NULL;
+ }
+
+ if (kvm_enabled() && !kvm_has_sync_mmu()) {
+ fprintf(stderr, "kvm: host lacks mmu notifiers, disabling -mem-path\n");
+ return NULL;
+ }
+
+ if (asprintf(&filename, "%s/kvm.XXXXXX", path) == -1) {
+ return NULL;
+ }
+
+ fd = mkstemp(filename);
+ if (fd < 0) {
+ perror("mkstemp");
+ free(filename);
+ return NULL;
+ }
+ unlink(filename);
+ free(filename);
+
+ memory = (memory+hpagesize-1) & ~(hpagesize-1);
+
+ /*
+ * ftruncate is not supported by hugetlbfs in older
+ * hosts, so don't bother checking for errors.
+ * If anything goes wrong with it under other filesystems,
+ * mmap will fail.
+ */
+ if (ftruncate(fd, memory))
+ perror("ftruncate");
+
+#ifdef MAP_POPULATE
+ /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
+ * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
+ * to sidestep this quirk.
+ */
+ flags = mem_prealloc ? MAP_POPULATE|MAP_SHARED : MAP_PRIVATE;
+ area = mmap(0, memory, PROT_READ|PROT_WRITE, flags, fd, 0);
+#else
+ area = mmap(0, memory, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
+#endif
+ if (area == MAP_FAILED) {
+ perror("file_ram_alloc: can't mmap RAM pages");
+ close(fd);
+ return (NULL);
+ }
+ return area;
+}
+
+#else
+
+static void *file_ram_alloc(ram_addr_t memory, const char *path)
+{
+ return NULL;
+}
+
+#endif
+
ram_addr_t qemu_ram_alloc(ram_addr_t size)
{
RAMBlock *new_block;
@@ -2520,16 +2625,20 @@ ram_addr_t qemu_ram_alloc(ram_addr_t siz
size = TARGET_PAGE_ALIGN(size);
new_block = qemu_malloc(sizeof(*new_block));
+ new_block->host = file_ram_alloc(size, mem_path);
+ if (!new_block->host) {
#if defined(TARGET_S390X) && defined(CONFIG_KVM)
- /* XXX S390 KVM requires the topmost vma of the RAM to be < 256GB */
- new_block->host = mmap((void*)0x1000000, size, PROT_EXEC|PROT_READ|PROT_WRITE,
- MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ /* XXX S390 KVM requires the topmost vma of the RAM to be < 256GB */
+ new_block->host = mmap((void*)0x1000000, size,
+ PROT_EXEC|PROT_READ|PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
#else
- new_block->host = qemu_vmalloc(size);
+ new_block->host = qemu_vmalloc(size);
#endif
#ifdef MADV_MERGEABLE
- madvise(new_block->host, size, MADV_MERGEABLE);
+ madvise(new_block->host, size, MADV_MERGEABLE);
#endif
+ }
new_block->offset = last_ram_offset;
new_block->length = size;
Index: qemu-kvm/qemu-options.hx
===================================================================
--- qemu-kvm.orig/qemu-options.hx
+++ qemu-kvm/qemu-options.hx
@@ -314,6 +314,22 @@ a suffix of ``M'' or ``G'' can be used t
gigabytes respectively.
ETEXI
+DEF("mem-path", HAS_ARG, QEMU_OPTION_mempath,
+ "-mem-path FILE provide backing storage for guest RAM\n")
+STEXI
+@item -mem-path @var{path}
+Allocate guest RAM from a temporarily created file in @var{path}.
+ETEXI
+
+#ifdef MAP_POPULATE
+DEF("mem-prealloc", 0, QEMU_OPTION_mem_prealloc,
+ "-mem-prealloc preallocate guest memory (use with -mem-path)\n")
+STEXI
+@item -mem-prealloc
+Preallocate memory when using -mem-path.
+ETEXI
+#endif
+
DEF("k", HAS_ARG, QEMU_OPTION_k,
"-k language use keyboard layout (for example 'fr' for French)\n")
STEXI
Index: qemu-kvm/vl.c
===================================================================
--- qemu-kvm.orig/vl.c
+++ qemu-kvm/vl.c
@@ -186,6 +186,10 @@ static DisplayState *display_state;
DisplayType display_type = DT_DEFAULT;
const char* keyboard_layout = NULL;
ram_addr_t ram_size;
+const char *mem_path = NULL;
+#ifdef MAP_POPULATE
+int mem_prealloc = 1; /* force preallocation of physical target memory */
+#endif
int nb_nics;
NICInfo nd_table[MAX_NICS];
int vm_running;
@@ -5252,6 +5256,13 @@ int main(int argc, char **argv, char **e
ram_size = value;
break;
}
+ case QEMU_OPTION_mempath:
+ mem_path = optarg;
+ break;
+#ifdef MAP_POPULATE
+ case QEMU_OPTION_mem_prealloc:
+ mem_prealloc = !mem_prealloc;
+#endif
case QEMU_OPTION_d:
{
int mask;
WARNING: multiple messages have this Message-ID (diff)
From: Marcelo Tosatti <mtosatti@redhat.com>
To: kvm@vger.kernel.org, qemu-devel@nongnu.org
Cc: john cooper <john.cooper@redhat.com>,
Marcelo Tosatti <mtosatti@redhat.com>,
avi@redhat.com
Subject: [Qemu-devel] [patch uq/master 2/2] Add option to use file backed guest memory
Date: Wed, 24 Feb 2010 18:11:20 -0300 [thread overview]
Message-ID: <20100224211507.913712224@amt.cnet> (raw)
In-Reply-To: 20100224211117.958583246@amt.cnet
[-- Attachment #1: mempath --]
[-- Type: text/plain, Size: 6105 bytes --]
Port qemu-kvm's -mem-path and -mem-prealloc options. These are useful
for backing guest memory with huge pages via hugetlbfs.
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
CC: john cooper <john.cooper@redhat.com>
Index: qemu-kvm/cpu-all.h
===================================================================
--- qemu-kvm.orig/cpu-all.h
+++ qemu-kvm/cpu-all.h
@@ -850,6 +850,9 @@ extern uint8_t *phys_ram_dirty;
extern ram_addr_t ram_size;
extern ram_addr_t last_ram_offset;
+extern const char *mem_path;
+extern int mem_prealloc;
+
/* physical memory access */
/* MMIO pages are identified by a combination of an IO device index and
Index: qemu-kvm/exec.c
===================================================================
--- qemu-kvm.orig/exec.c
+++ qemu-kvm/exec.c
@@ -2513,6 +2513,111 @@ void qemu_flush_coalesced_mmio_buffer(vo
kvm_flush_coalesced_mmio_buffer();
}
+#ifdef __linux__
+
+#include <sys/vfs.h>
+
+#define HUGETLBFS_MAGIC 0x958458f6
+
+static long gethugepagesize(const char *path)
+{
+ struct statfs fs;
+ int ret;
+
+ do {
+ ret = statfs(path, &fs);
+ } while (ret != 0 && errno == EINTR);
+
+ if (ret != 0) {
+ perror("statfs");
+ return 0;
+ }
+
+ if (fs.f_type != HUGETLBFS_MAGIC)
+ fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
+
+ return fs.f_bsize;
+}
+
+static void *file_ram_alloc(ram_addr_t memory, const char *path)
+{
+ char *filename;
+ void *area;
+ int fd;
+#ifdef MAP_POPULATE
+ int flags;
+#endif
+ unsigned long hpagesize;
+
+ if (!path) {
+ return NULL;
+ }
+
+ hpagesize = gethugepagesize(path);
+ if (!hpagesize) {
+ return NULL;
+ }
+
+ if (memory < hpagesize) {
+ return NULL;
+ }
+
+ if (kvm_enabled() && !kvm_has_sync_mmu()) {
+ fprintf(stderr, "kvm: host lacks mmu notifiers, disabling -mem-path\n");
+ return NULL;
+ }
+
+ if (asprintf(&filename, "%s/kvm.XXXXXX", path) == -1) {
+ return NULL;
+ }
+
+ fd = mkstemp(filename);
+ if (fd < 0) {
+ perror("mkstemp");
+ free(filename);
+ return NULL;
+ }
+ unlink(filename);
+ free(filename);
+
+ memory = (memory+hpagesize-1) & ~(hpagesize-1);
+
+ /*
+ * ftruncate is not supported by hugetlbfs in older
+ * hosts, so don't bother checking for errors.
+ * If anything goes wrong with it under other filesystems,
+ * mmap will fail.
+ */
+ if (ftruncate(fd, memory))
+ perror("ftruncate");
+
+#ifdef MAP_POPULATE
+ /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
+ * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
+ * to sidestep this quirk.
+ */
+ flags = mem_prealloc ? MAP_POPULATE|MAP_SHARED : MAP_PRIVATE;
+ area = mmap(0, memory, PROT_READ|PROT_WRITE, flags, fd, 0);
+#else
+ area = mmap(0, memory, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
+#endif
+ if (area == MAP_FAILED) {
+ perror("file_ram_alloc: can't mmap RAM pages");
+ close(fd);
+ return (NULL);
+ }
+ return area;
+}
+
+#else
+
+static void *file_ram_alloc(ram_addr_t memory, const char *path)
+{
+ return NULL;
+}
+
+#endif
+
ram_addr_t qemu_ram_alloc(ram_addr_t size)
{
RAMBlock *new_block;
@@ -2520,16 +2625,20 @@ ram_addr_t qemu_ram_alloc(ram_addr_t siz
size = TARGET_PAGE_ALIGN(size);
new_block = qemu_malloc(sizeof(*new_block));
+ new_block->host = file_ram_alloc(size, mem_path);
+ if (!new_block->host) {
#if defined(TARGET_S390X) && defined(CONFIG_KVM)
- /* XXX S390 KVM requires the topmost vma of the RAM to be < 256GB */
- new_block->host = mmap((void*)0x1000000, size, PROT_EXEC|PROT_READ|PROT_WRITE,
- MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ /* XXX S390 KVM requires the topmost vma of the RAM to be < 256GB */
+ new_block->host = mmap((void*)0x1000000, size,
+ PROT_EXEC|PROT_READ|PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
#else
- new_block->host = qemu_vmalloc(size);
+ new_block->host = qemu_vmalloc(size);
#endif
#ifdef MADV_MERGEABLE
- madvise(new_block->host, size, MADV_MERGEABLE);
+ madvise(new_block->host, size, MADV_MERGEABLE);
#endif
+ }
new_block->offset = last_ram_offset;
new_block->length = size;
Index: qemu-kvm/qemu-options.hx
===================================================================
--- qemu-kvm.orig/qemu-options.hx
+++ qemu-kvm/qemu-options.hx
@@ -314,6 +314,22 @@ a suffix of ``M'' or ``G'' can be used t
gigabytes respectively.
ETEXI
+DEF("mem-path", HAS_ARG, QEMU_OPTION_mempath,
+ "-mem-path FILE provide backing storage for guest RAM\n")
+STEXI
+@item -mem-path @var{path}
+Allocate guest RAM from a temporarily created file in @var{path}.
+ETEXI
+
+#ifdef MAP_POPULATE
+DEF("mem-prealloc", 0, QEMU_OPTION_mem_prealloc,
+ "-mem-prealloc preallocate guest memory (use with -mem-path)\n")
+STEXI
+@item -mem-prealloc
+Preallocate memory when using -mem-path.
+ETEXI
+#endif
+
DEF("k", HAS_ARG, QEMU_OPTION_k,
"-k language use keyboard layout (for example 'fr' for French)\n")
STEXI
Index: qemu-kvm/vl.c
===================================================================
--- qemu-kvm.orig/vl.c
+++ qemu-kvm/vl.c
@@ -186,6 +186,10 @@ static DisplayState *display_state;
DisplayType display_type = DT_DEFAULT;
const char* keyboard_layout = NULL;
ram_addr_t ram_size;
+const char *mem_path = NULL;
+#ifdef MAP_POPULATE
+int mem_prealloc = 1; /* force preallocation of physical target memory */
+#endif
int nb_nics;
NICInfo nd_table[MAX_NICS];
int vm_running;
@@ -5252,6 +5256,13 @@ int main(int argc, char **argv, char **e
ram_size = value;
break;
}
+ case QEMU_OPTION_mempath:
+ mem_path = optarg;
+ break;
+#ifdef MAP_POPULATE
+ case QEMU_OPTION_mem_prealloc:
+ mem_prealloc = !mem_prealloc;
+#endif
case QEMU_OPTION_d:
{
int mask;
next prev parent reply other threads:[~2010-02-24 21:16 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-02-24 21:11 [patch uq/master 0/2] port qemu-kvm's -mem-path and -mem-prealloc to qemu Marcelo Tosatti
2010-02-24 21:11 ` [Qemu-devel] " Marcelo Tosatti
2010-02-24 21:11 ` [patch uq/master 1/2] Allocate memory below 4GB as one chunk Marcelo Tosatti
2010-02-24 21:11 ` [Qemu-devel] " Marcelo Tosatti
2010-02-25 13:33 ` Avi Kivity
2010-02-25 13:33 ` [Qemu-devel] " Avi Kivity
2010-02-24 21:11 ` Marcelo Tosatti [this message]
2010-02-24 21:11 ` [Qemu-devel] [patch uq/master 2/2] Add option to use file backed guest memory Marcelo Tosatti
2010-02-28 1:28 ` Paul Brook
2010-02-28 1:28 ` Paul Brook
2010-03-01 23:25 ` Marcelo Tosatti
2010-03-01 23:25 ` Marcelo Tosatti
2010-03-01 23:32 ` Marcelo Tosatti
2010-03-01 23:32 ` Marcelo Tosatti
2010-02-25 13:32 ` [patch uq/master 0/2] port qemu-kvm's -mem-path and -mem-prealloc to qemu Avi Kivity
2010-02-25 13:32 ` [Qemu-devel] " Avi Kivity
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100224211507.913712224@amt.cnet \
--to=mtosatti@redhat.com \
--cc=avi@redhat.com \
--cc=john.cooper@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.