From: Glauber Costa <glommer@redhat.com>
To: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Cc: aliguori@us.ibm.com, qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] vga optmization
Date: Tue, 4 Nov 2008 18:28:46 -0200 [thread overview]
Message-ID: <20081104202846.GB27481@poweredge.glommer> (raw)
In-Reply-To: <491063C1.4080202@eu.citrix.com>
[-- Attachment #1: Type: text/plain, Size: 1065 bytes --]
On Tue, Nov 04, 2008 at 03:01:21PM +0000, Stefano Stabellini wrote:
> Avi Kivity wrote:
>
> > Stefano Stabellini wrote:
> >> Why don't you make qemu_physical_sync_dirty_bitmap take also and end
> >> address, and you merge the two bitmaps in this address range in this
> >> function, so you don't have to change cpu_physical_memory_get_dirty at
> >> all?
> >>
> >> I am saying to do something like:
> >>
> >> void qemu_physical_sync_dirty_bitmap(ram_addr_t start_addr, ram_addr_t
> >> end_addr)
> >> {
> >> /* sync and merge the two bitmaps between start_addr and end_addr */
> >> }
> >>
> >> then leave cpu_physical_memory_get_dirty untouched.
> >> I would prefer this approch, it also leaves more space to other
> >> optimizations.
> >>
> >
> > That's how current kvm userspace works. It's also more correct, since
> > the kvm bitmap feeds both the vga dirty bits and the live migration
> > dirty bits.
> >
>
>
> That's also how currently qemu-xen works.
> I am glad that we agree :)
I'm attaching a new version. Let me know if it's better this way.
[-- Attachment #2: vga2.patch --]
[-- Type: text/plain, Size: 16233 bytes --]
diff --git a/exec.c b/exec.c
index ef1072b..2ed5e2b 100644
--- a/exec.c
+++ b/exec.c
@@ -1823,6 +1823,17 @@ int cpu_physical_memory_get_dirty_tracking(void)
return in_migration;
}
+void qemu_physical_sync_dirty_bitmap(ram_addr_t start_addr, ram_addr_t end_addr)
+{
+ if (kvm_enabled()) {
+ ram_addr_t addr;
+ kvm_physical_sync_dirty_bitmap(start_addr);
+ for (addr = start_addr; addr < end_addr; addr += TARGET_PAGE_SIZE)
+ phys_ram_dirty[addr >> TARGET_PAGE_BITS] |= kvm_physical_memory_get_dirty(addr);
+
+ }
+}
+
static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
{
ram_addr_t ram_addr;
diff --git a/hw/cirrus_vga.c b/hw/cirrus_vga.c
index af9c9e6..bc1a3af 100644
--- a/hw/cirrus_vga.c
+++ b/hw/cirrus_vga.c
@@ -31,6 +31,7 @@
#include "pci.h"
#include "console.h"
#include "vga_int.h"
+#include "kvm.h"
/*
* TODO:
@@ -248,6 +249,9 @@ typedef struct CirrusVGAState {
int cirrus_linear_io_addr;
int cirrus_linear_bitblt_io_addr;
int cirrus_mmio_io_addr;
+ uint32_t cirrus_lfb_addr;
+ uint32_t cirrus_lfb_end;
+ uint32_t cirrus_lfb_mapped;
uint32_t cirrus_addr_mask;
uint32_t linear_mmio_mask;
uint8_t cirrus_shadow_gr0;
@@ -2618,6 +2622,69 @@ static CPUWriteMemoryFunc *cirrus_linear_bitblt_write[3] = {
cirrus_linear_bitblt_writel,
};
+static void set_vram_mapping(target_phys_addr_t begin, target_phys_addr_t end, ram_addr_t target)
+{
+ /* align begin and end address */
+ begin = begin & TARGET_PAGE_MASK;
+ end = begin + VGA_RAM_SIZE;
+ end = (end + TARGET_PAGE_SIZE -1 ) & TARGET_PAGE_MASK;
+
+ cpu_register_physical_memory(begin, end - begin, target);
+
+ if (kvm_enabled()) {
+ /* XXX: per-slot dirty tracking in qemu may get rid of it */
+ kvm_log_start(begin, end - begin);
+ }
+}
+
+static void unset_vram_mapping(target_phys_addr_t begin, target_phys_addr_t end)
+{
+ /* align begin and end address */
+ end = begin + VGA_RAM_SIZE;
+ begin = begin & TARGET_PAGE_MASK;
+ end = (end + TARGET_PAGE_SIZE -1 ) & TARGET_PAGE_MASK;
+
+ if (kvm_enabled()) {
+ /* XXX: per-slot dirty tracking in qemu may get rid of it */
+ kvm_log_stop(begin, end - begin);
+ }
+ cpu_register_physical_memory(begin, end - begin, IO_MEM_UNASSIGNED);
+
+}
+
+static void map_linear_vram(CirrusVGAState *s)
+{
+
+ if (!s->cirrus_lfb_mapped && s->cirrus_lfb_addr && s->cirrus_lfb_end) {
+ set_vram_mapping(s->cirrus_lfb_addr, s->cirrus_lfb_end, s->vram_offset);
+ s->cirrus_lfb_mapped = 1;
+ }
+
+ if(!(s->cirrus_srcptr != s->cirrus_srcptr_end)
+ && !((s->sr[0x07] & 0x01) == 0)
+ && !((s->gr[0x0B] & 0x14) == 0x14)
+ && !(s->gr[0x0B] & 0x02)) {
+ cpu_register_physical_memory(isa_mem_base + 0xa0000, 0x20000, s->vram_offset | IO_MEM_RAM);
+ }
+ else {
+ cpu_register_physical_memory(isa_mem_base + 0xa0000, 0x20000, s->vga_io_memory);
+ }
+}
+
+static void unmap_linear_vram(CirrusVGAState *s)
+{
+
+ if (s->cirrus_lfb_mapped && s->cirrus_lfb_addr && s->cirrus_lfb_end) {
+ unset_vram_mapping(s->cirrus_lfb_addr,
+ s->cirrus_lfb_end);
+ s->cirrus_lfb_mapped = 0;
+ }
+
+ cpu_register_physical_memory(isa_mem_base + 0x000a0000, 0x20000,
+ s->vga_io_memory);
+
+}
+
/* Compute the memory access functions */
static void cirrus_update_memory_access(CirrusVGAState *s)
{
@@ -2636,11 +2703,13 @@ static void cirrus_update_memory_access(CirrusVGAState *s)
mode = s->gr[0x05] & 0x7;
if (mode < 4 || mode > 5 || ((s->gr[0x0B] & 0x4) == 0)) {
+ map_linear_vram(s);
s->cirrus_linear_write[0] = cirrus_linear_mem_writeb;
s->cirrus_linear_write[1] = cirrus_linear_mem_writew;
s->cirrus_linear_write[2] = cirrus_linear_mem_writel;
} else {
generic_io:
+ unmap_linear_vram(s);
s->cirrus_linear_write[0] = cirrus_linear_writeb;
s->cirrus_linear_write[1] = cirrus_linear_writew;
s->cirrus_linear_write[2] = cirrus_linear_writel;
@@ -3117,7 +3186,7 @@ static int cirrus_vga_load(QEMUFile *f, void *opaque, int version_id)
static void cirrus_init_common(CirrusVGAState * s, int device_id, int is_pci)
{
- int vga_io_memory, i;
+ int i;
static int inited;
if (!inited) {
@@ -3156,10 +3225,10 @@ static void cirrus_init_common(CirrusVGAState * s, int device_id, int is_pci)
register_ioport_read(0x3ba, 1, 1, vga_ioport_read, s);
register_ioport_read(0x3da, 1, 1, vga_ioport_read, s);
- vga_io_memory = cpu_register_io_memory(0, cirrus_vga_mem_read,
+ s->vga_io_memory = cpu_register_io_memory(0, cirrus_vga_mem_read,
cirrus_vga_mem_write, s);
cpu_register_physical_memory(isa_mem_base + 0x000a0000, 0x20000,
- vga_io_memory);
+ s->vga_io_memory);
s->sr[0x06] = 0x0f;
if (device_id == CIRRUS_ID_CLGD5446) {
@@ -3258,9 +3327,13 @@ static void cirrus_pci_lfb_map(PCIDevice *d, int region_num,
/* XXX: add byte swapping apertures */
cpu_register_physical_memory(addr, s->vram_size,
- s->cirrus_linear_io_addr);
+ s->cirrus_linear_io_addr | s->vram_offset);
cpu_register_physical_memory(addr + 0x1000000, 0x400000,
s->cirrus_linear_bitblt_io_addr);
+
+ s->cirrus_lfb_mapped = 0;
+ s->cirrus_lfb_addr = addr;
+ s->cirrus_lfb_end = addr + VGA_RAM_SIZE;
}
static void cirrus_pci_mmio_map(PCIDevice *d, int region_num,
diff --git a/hw/vga.c b/hw/vga.c
index 9540db0..1568be4 100644
--- a/hw/vga.c
+++ b/hw/vga.c
@@ -28,6 +28,7 @@
#include "vga_int.h"
#include "pixel_ops.h"
#include "qemu-timer.h"
+#include "kvm.h"
//#define DEBUG_VGA
//#define DEBUG_VGA_MEM
@@ -1568,6 +1569,8 @@ static void vga_draw_graphic(VGAState *s, int full_update)
uint32_t v, addr1, addr;
vga_draw_line_func *vga_draw_line;
+ qemu_physical_sync_dirty_bitmap(s->vram_offset, s->vram_offset + VGA_RAM_SIZE);
+
full_update |= update_basic_params(s);
s->get_resolution(s, &width, &height);
@@ -2102,6 +2105,9 @@ static void vga_map(PCIDevice *pci_dev, int region_num,
} else {
cpu_register_physical_memory(addr, s->vram_size, s->vram_offset);
}
+
+ if (kvm_enabled())
+ kvm_log_start(addr, VGA_RAM_SIZE);
}
void vga_common_init(VGAState *s, DisplayState *ds, uint8_t *vga_ram_base,
diff --git a/hw/vga_int.h b/hw/vga_int.h
index 82a755e..91a8d77 100644
--- a/hw/vga_int.h
+++ b/hw/vga_int.h
@@ -100,7 +100,7 @@ typedef void (* vga_update_retrace_info_fn)(struct VGAState *s);
#define VGA_STATE_COMMON \
uint8_t *vram_ptr; \
- unsigned long vram_offset; \
+ ram_addr_t vram_offset; \
unsigned int vram_size; \
unsigned long bios_offset; \
unsigned int bios_size; \
@@ -129,6 +129,7 @@ typedef void (* vga_update_retrace_info_fn)(struct VGAState *s);
int dac_8bit; \
uint8_t palette[768]; \
int32_t bank_offset; \
+ int vga_io_memory; \
int (*get_bpp)(struct VGAState *s); \
void (*get_offsets)(struct VGAState *s, \
uint32_t *pline_offset, \
diff --git a/kvm-all.c b/kvm-all.c
index 4379071..4916865 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -31,7 +31,18 @@
do { } while (0)
#endif
-typedef struct kvm_userspace_memory_region KVMSlot;
+#define warning(fmt, ...) \
+ do { fprintf(stderr, "%s:%d" fmt, __func__, __LINE__, ## __VA_ARGS__); } while (0)
+
+typedef struct KVMSlot {
+ struct kvm_userspace_memory_region region;
+ int logging_count;
+ uint64_t *dirty_bitmap;
+} KVMSlot;
+
+#define kvm_uaddr(addr) ((addr) + (ram_addr_t)phys_ram_base)
+
+typedef struct kvm_dirty_log KVMDirty;
int kvm_allowed = 0;
@@ -49,7 +60,7 @@ static KVMSlot *kvm_alloc_slot(KVMState *s)
int i;
for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
- if (s->slots[i].memory_size == 0)
+ if (s->slots[i].region.memory_size == 0)
return &s->slots[i];
}
@@ -63,8 +74,26 @@ static KVMSlot *kvm_lookup_slot(KVMState *s, target_phys_addr_t start_addr)
for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
KVMSlot *mem = &s->slots[i];
- if (start_addr >= mem->guest_phys_addr &&
- start_addr < (mem->guest_phys_addr + mem->memory_size))
+ if (start_addr >= mem->region.guest_phys_addr &&
+ start_addr < (mem->region.guest_phys_addr + mem->region.memory_size))
+ return mem;
+ }
+
+ return NULL;
+}
+
+/* find the slot correspondence using userspace_addr as a key */
+static KVMSlot *kvm_lookup_slot_uaddr(KVMState *s, ram_addr_t addr)
+{
+ int i;
+
+ uint64_t uaddr = (uint64_t)kvm_uaddr(addr);
+
+ for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
+ KVMSlot *mem = &s->slots[i];
+
+ if (uaddr >= mem->region.userspace_addr &&
+ uaddr < (mem->region.userspace_addr + mem->region.memory_size))
return mem;
}
@@ -109,6 +138,133 @@ err:
return ret;
}
+/*
+ * * dirty pages logging control
+ * */
+static int kvm_dirty_pages_log_change(KVMSlot *mem,
+ unsigned flags,
+ unsigned mask)
+{
+ int r = -1;
+ KVMState *s = kvm_state;
+
+ flags = (mem->region.flags & ~mask) | flags;
+ if (flags == mem->region.flags)
+ return 0;
+
+ mem->region.flags = flags;
+
+ r = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem->region);
+ if (r == -1)
+ fprintf(stderr, "%s: %m\n", __FUNCTION__);
+
+ return r;
+}
+
+#define BMAP_SIZE(mem) ((mem->region.memory_size >> TARGET_PAGE_BITS)/ sizeof(*mem->dirty_bitmap))
+
+int kvm_log_start(target_phys_addr_t phys_addr, target_phys_addr_t len)
+{
+ KVMState *s = kvm_state;
+ KVMSlot *mem = kvm_lookup_slot(s, phys_addr);
+
+ dprintf("try log start %llx len %llx\n", phys_addr, len);
+
+ if (mem == NULL) {
+ fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
+ return -EINVAL;
+ }
+
+ if (mem->logging_count++)
+ return 0;
+
+ mem->dirty_bitmap = malloc(BMAP_SIZE(mem));
+
+ if (mem->dirty_bitmap == NULL)
+ return -ENOMEM;
+
+ dprintf("slot %d: enable logging (phys %llx, uaddr %llx)\n",
+ mem->region.slot, mem->region.guest_phys_addr, mem->region.userspace_addr);
+
+ return kvm_dirty_pages_log_change(mem,
+ KVM_MEM_LOG_DIRTY_PAGES,
+ KVM_MEM_LOG_DIRTY_PAGES);
+}
+
+int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t len)
+{
+
+ KVMState *s = kvm_state;
+ KVMSlot *mem = kvm_lookup_slot(s, phys_addr);
+
+ if (mem == NULL) {
+ fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
+ return -EINVAL;
+ }
+ dprintf("slot %d: disabling logging\n", mem->region.slot);
+
+ if (mem->logging_count--)
+ return 0;
+
+ return kvm_dirty_pages_log_change(mem,
+ 0,
+ KVM_MEM_LOG_DIRTY_PAGES);
+}
+
+#define BITMAP_WORD(mem) (sizeof(*(mem)->dirty_bitmap) * 8)
+static inline int lookup_bitmap_phys(KVMSlot *mem, ram_addr_t addr)
+{
+ unsigned nr = (uint32_t)((uint64_t)kvm_uaddr(addr) - mem->region.userspace_addr) >> TARGET_PAGE_BITS;
+ unsigned word = nr / BITMAP_WORD(mem);
+ unsigned bit = nr % BITMAP_WORD(mem);
+ int ret;
+
+ ret = (mem->dirty_bitmap[word] >> bit) & 1;
+ return ret;
+}
+
+int kvm_physical_memory_get_dirty(ram_addr_t addr)
+{
+ KVMState *s = kvm_state;
+ KVMSlot *mem = kvm_lookup_slot_uaddr(s, addr);
+
+ if (mem == NULL) {
+ fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
+ return -EINVAL;
+ }
+
+ if (mem->dirty_bitmap == NULL) {
+ return 0;
+ }
+ return lookup_bitmap_phys(mem, addr);
+}
+
+void kvm_physical_sync_dirty_bitmap(ram_addr_t start_addr)
+{
+ KVMState *s = kvm_state;
+ KVMSlot *mem = kvm_lookup_slot_uaddr(s, start_addr);
+
+ KVMDirty d;
+
+ dprintf("sync addr: %lx %llx %llx\n", start_addr, mem->region.guest_phys_addr, kvm_uaddr(start_addr));
+ if (mem == NULL) {
+ fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
+ return;
+ }
+
+ if (mem->dirty_bitmap == NULL)
+ warning("Asked to sync dirty bitmap for region with logging disabled\n");
+
+ d.slot = mem->region.slot;
+ d.dirty_bitmap = mem->dirty_bitmap;
+ dprintf("slot %d, phys_addr %llx, uaddr: %llx\n",
+ d.slot, mem->region.guest_phys_addr, mem->region.userspace_addr);
+
+ if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1)
+ warning("ioctl failed %d\n", errno);
+
+}
+
int kvm_init(int smp_cpus)
{
KVMState *s;
@@ -123,7 +279,7 @@ int kvm_init(int smp_cpus)
return -ENOMEM;
for (i = 0; i < ARRAY_SIZE(s->slots); i++)
- s->slots[i].slot = i;
+ s->slots[i].region.slot = i;
s->vmfd = -1;
s->fd = open("/dev/kvm", O_RDWR);
@@ -317,15 +473,18 @@ void kvm_set_phys_mem(target_phys_addr_t start_addr,
mem = kvm_lookup_slot(s, start_addr);
if (mem) {
- if (flags == IO_MEM_UNASSIGNED) {
- mem->memory_size = 0;
- mem->guest_phys_addr = start_addr;
- mem->userspace_addr = 0;
- mem->flags = 0;
-
- kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, mem);
- } else if (start_addr >= mem->guest_phys_addr &&
- (start_addr + size) <= (mem->guest_phys_addr + mem->memory_size))
+ if ((flags == IO_MEM_UNASSIGNED) || (flags >= TLB_MMIO)) {
+ if (mem->region.flags == KVM_MEM_LOG_DIRTY_PAGES)
+ return;
+ dprintf("deleting memory %llx with flags %d\n", mem->region.guest_phys_addr, mem->region.flags);
+ mem->region.memory_size = 0;
+ mem->region.guest_phys_addr = start_addr;
+ mem->region.userspace_addr = 0;
+ mem->region.flags = 0;
+
+ kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem->region);
+ } else if (start_addr >= mem->region.guest_phys_addr &&
+ (start_addr + size) <= (mem->region.guest_phys_addr + mem->region.memory_size))
return;
}
@@ -334,12 +493,12 @@ void kvm_set_phys_mem(target_phys_addr_t start_addr,
return;
mem = kvm_alloc_slot(s);
- mem->memory_size = size;
- mem->guest_phys_addr = start_addr;
- mem->userspace_addr = (unsigned long)(phys_ram_base + phys_offset);
- mem->flags = 0;
+ mem->region.memory_size = size;
+ mem->region.guest_phys_addr = start_addr;
+ mem->region.userspace_addr = (unsigned long)(phys_ram_base + phys_offset);
+ mem->region.flags = 0;
- kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, mem);
+ kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem->region);
/* FIXME deal with errors */
}
diff --git a/kvm.h b/kvm.h
index 37102b4..6e2d9d2 100644
--- a/kvm.h
+++ b/kvm.h
@@ -38,6 +38,9 @@ void kvm_set_phys_mem(target_phys_addr_t start_addr,
ram_addr_t size,
ram_addr_t phys_offset);
+int kvm_physical_memory_get_dirty(ram_addr_t addr);
+void kvm_physical_sync_dirty_bitmap(ram_addr_t start_addr);
+
/* internal API */
struct KVMState;
next prev parent reply other threads:[~2008-11-04 20:27 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-11-03 17:31 [Qemu-devel] vga optmization Glauber Costa
2008-11-03 17:43 ` Stefano Stabellini
2008-11-03 17:52 ` Glauber Costa
2008-11-03 18:06 ` Stefano Stabellini
2008-11-03 18:03 ` Blue Swirl
2008-11-03 18:14 ` Glauber Costa
2008-11-03 18:41 ` Blue Swirl
2008-11-03 18:47 ` Glauber Costa
2008-11-03 18:13 ` Fabrice Bellard
2008-11-03 18:18 ` Glauber Costa
2008-11-04 7:23 ` Avi Kivity
2008-11-04 9:31 ` andrzej zaborowski
2008-11-04 11:40 ` Stefano Stabellini
2008-11-04 13:43 ` Glauber Costa
2008-11-04 14:51 ` Avi Kivity
2008-11-04 14:52 ` Anthony Liguori
2008-11-04 14:55 ` Glauber Costa
2008-11-04 15:13 ` Stefano Stabellini
2008-11-04 20:42 ` Avi Kivity
2008-11-04 20:51 ` Anthony Liguori
2008-11-04 15:01 ` Stefano Stabellini
2008-11-04 20:28 ` Glauber Costa [this message]
2008-11-04 20:40 ` Anthony Liguori
2008-11-05 14:42 ` Stefano Stabellini
2008-11-07 11:15 ` Glauber Costa
2008-11-07 11:33 ` Stefano Stabellini
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20081104202846.GB27481@poweredge.glommer \
--to=glommer@redhat.com \
--cc=aliguori@us.ibm.com \
--cc=qemu-devel@nongnu.org \
--cc=stefano.stabellini@eu.citrix.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).