From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([140.186.70.92]:51912) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1QlQVu-00085d-S0 for qemu-devel@nongnu.org; Mon, 25 Jul 2011 15:08:28 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1QlQVq-0006dn-L8 for qemu-devel@nongnu.org; Mon, 25 Jul 2011 15:08:26 -0400 Received: from mail-gw0-f45.google.com ([74.125.83.45]:58355) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1QlQVq-0006dh-Fq for qemu-devel@nongnu.org; Mon, 25 Jul 2011 15:08:22 -0400 Received: by gwb19 with SMTP id 19so3304408gwb.4 for ; Mon, 25 Jul 2011 12:08:22 -0700 (PDT) Message-ID: <4E2DBF24.20803@codemonkey.ws> Date: Mon, 25 Jul 2011 14:08:20 -0500 From: Anthony Liguori MIME-Version: 1.0 References: <1311602584-23409-1-git-send-email-avi@redhat.com> <1311602584-23409-12-git-send-email-avi@redhat.com> In-Reply-To: <1311602584-23409-12-git-send-email-avi@redhat.com> Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Subject: Re: [Qemu-devel] [PATCH 11/23] memory: add ioeventfd support List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Avi Kivity Cc: qemu-devel@nongnu.org, kvm@vger.kernel.org On 07/25/2011 09:02 AM, Avi Kivity wrote: > As with the rest of the memory API, the caller associates an eventfd > with an address, and the memory API takes care of registering or > unregistering when the address is made visible or invisible to the > guest. > > Signed-off-by: Avi Kivity > --- > memory.c | 230 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > memory.h | 20 ++++++ > 2 files changed, 250 insertions(+), 0 deletions(-) > > diff --git a/memory.c b/memory.c > index e4446a0..a5cde0c 100644 > --- a/memory.c > +++ b/memory.c > @@ -15,6 +15,7 @@ > #include "exec-memory.h" > #include "ioport.h" > #include "bitops.h" > +#include "kvm.h" > #include > > typedef struct AddrRange AddrRange; > @@ -64,6 +65,50 @@ struct CoalescedMemoryRange { > QTAILQ_ENTRY(CoalescedMemoryRange) link; > }; > > +struct MemoryRegionIoeventfd { > + AddrRange addr; > + bool match_data; > + uint64_t data; > + int fd; > +}; > + > +static bool memory_region_ioeventfd_before(MemoryRegionIoeventfd a, > + MemoryRegionIoeventfd b) > +{ > + if (a.addr.start< b.addr.start) { > + return true; > + } else if (a.addr.start> b.addr.start) { > + return false; > + } else if (a.addr.size< b.addr.size) { > + return true; > + } else if (a.addr.size> b.addr.size) { > + return false; > + } else if (a.match_data< b.match_data) { > + return true; > + } else if (a.match_data> b.match_data) { > + return false; > + } else if (a.match_data) { > + if (a.data< b.data) { > + return true; > + } else if (a.data> b.data) { > + return false; > + } > + } > + if (a.fd< b.fd) { > + return true; > + } else if (a.fd> b.fd) { > + return false; > + } > + return false; > +} > + > +static bool memory_region_ioeventfd_equal(MemoryRegionIoeventfd a, > + MemoryRegionIoeventfd b) > +{ > + return !memory_region_ioeventfd_before(a, b) > +&& !memory_region_ioeventfd_before(b, a); > +} > + > typedef struct FlatRange FlatRange; > typedef struct FlatView FlatView; > > @@ -92,6 +137,8 @@ struct AddressSpace { > const AddressSpaceOps *ops; > MemoryRegion *root; > FlatView current_map; > + int ioeventfd_nb; > + MemoryRegionIoeventfd *ioeventfds; > }; > > struct AddressSpaceOps { > @@ -99,6 +146,8 @@ struct AddressSpaceOps { > void (*range_del)(AddressSpace *as, FlatRange *fr); > void (*log_start)(AddressSpace *as, FlatRange *fr); > void (*log_stop)(AddressSpace *as, FlatRange *fr); > + void (*ioeventfd_add)(AddressSpace *as, MemoryRegionIoeventfd *fd); > + void (*ioeventfd_del)(AddressSpace *as, MemoryRegionIoeventfd *fd); > }; > > #define FOR_EACH_FLAT_RANGE(var, view) \ > @@ -201,11 +250,37 @@ static void as_memory_log_stop(AddressSpace *as, FlatRange *fr) > cpu_physical_log_stop(fr->addr.start, fr->addr.size); > } > > +static void as_memory_ioeventfd_add(AddressSpace *as, MemoryRegionIoeventfd *fd) > +{ > + int r; > + > + if (!fd->match_data || fd->addr.size != 4) { > + abort(); > + } > + > + r = kvm_set_ioeventfd_mmio_long(fd->fd, fd->addr.start, fd->data, true); > + if (r< 0) { > + abort(); > + } asserts would be friendlier. I really dislike baking ioeventfd into this API. There is only one user of ioeventfd in the tree. I worry that by having things like ioeventfd the API, we're making it too difficult to side-step the API which prevents future optimizations. I'd prefer virtio-pci to have ugliness in it where it circumvented the layering vs. having such a device specific thing in generic code. Regards, Anthony Liguori > +} > + > +static void as_memory_ioeventfd_del(AddressSpace *as, MemoryRegionIoeventfd *fd) > +{ > + int r; > + > + r = kvm_set_ioeventfd_mmio_long(fd->fd, fd->addr.start, fd->data, false); > + if (r< 0) { > + abort(); > + } > +} > + > static const AddressSpaceOps address_space_ops_memory = { > .range_add = as_memory_range_add, > .range_del = as_memory_range_del, > .log_start = as_memory_log_start, > .log_stop = as_memory_log_stop, > + .ioeventfd_add = as_memory_ioeventfd_add, > + .ioeventfd_del = as_memory_ioeventfd_del, > }; > > static AddressSpace address_space_memory = { > @@ -281,9 +356,35 @@ static void as_io_range_del(AddressSpace *as, FlatRange *fr) > isa_unassign_ioport(fr->addr.start, fr->addr.size); > } > > +static void as_io_ioeventfd_add(AddressSpace *as, MemoryRegionIoeventfd *fd) > +{ > + int r; > + > + if (!fd->match_data || fd->addr.size != 2) { > + abort(); > + } > + > + r = kvm_set_ioeventfd_pio_word(fd->fd, fd->addr.start, fd->data, true); > + if (r< 0) { > + abort(); > + } > +} > + > +static void as_io_ioeventfd_del(AddressSpace *as, MemoryRegionIoeventfd *fd) > +{ > + int r; > + > + r = kvm_set_ioeventfd_pio_word(fd->fd, fd->addr.start, fd->data, false); > + if (r< 0) { > + abort(); > + } > +} > + > static const AddressSpaceOps address_space_ops_io = { > .range_add = as_io_range_add, > .range_del = as_io_range_del, > + .ioeventfd_add = as_io_ioeventfd_add, > + .ioeventfd_del = as_io_ioeventfd_del, > }; > > static AddressSpace address_space_io = { > @@ -382,6 +483,69 @@ static FlatView generate_memory_topology(MemoryRegion *mr) > return view; > } > > +static void address_space_add_del_ioeventfds(AddressSpace *as, > + MemoryRegionIoeventfd *fds_new, > + unsigned fds_new_nb, > + MemoryRegionIoeventfd *fds_old, > + unsigned fds_old_nb) > +{ > + unsigned iold, inew; > + > + /* Generate a symmetric difference of the old and new fd sets, adding > + * and deleting as necessary. > + */ > + > + iold = inew = 0; > + while (iold< fds_old_nb || inew< fds_new_nb) { > + if (iold< fds_old_nb > +&& (inew == fds_new_nb > + || memory_region_ioeventfd_before(fds_old[iold], > + fds_new[inew]))) { > + as->ops->ioeventfd_del(as,&fds_old[iold]); > + ++iold; > + } else if (inew< fds_new_nb > +&& (iold == fds_old_nb > + || memory_region_ioeventfd_before(fds_new[inew], > + fds_old[iold]))) { > + as->ops->ioeventfd_add(as,&fds_new[inew]); > + ++inew; > + } else { > + ++iold; > + ++inew; > + } > + } > +} > + > +static void address_space_update_ioeventfds(AddressSpace *as) > +{ > + FlatRange *fr; > + unsigned ioeventfd_nb = 0; > + MemoryRegionIoeventfd *ioeventfds = NULL; > + AddrRange tmp; > + unsigned i; > + > + FOR_EACH_FLAT_RANGE(fr,&as->current_map) { > + for (i = 0; i< fr->mr->ioeventfd_nb; ++i) { > + tmp = addrrange_shift(fr->mr->ioeventfds[i].addr, > + fr->addr.start - fr->offset_in_region); > + if (addrrange_intersects(fr->addr, tmp)) { > + ++ioeventfd_nb; > + ioeventfds = qemu_realloc(ioeventfds, > + ioeventfd_nb * sizeof(*ioeventfds)); > + ioeventfds[ioeventfd_nb-1] = fr->mr->ioeventfds[i]; > + ioeventfds[ioeventfd_nb-1].addr = tmp; > + } > + } > + } > + > + address_space_add_del_ioeventfds(as, ioeventfds, ioeventfd_nb, > + as->ioeventfds, as->ioeventfd_nb); > + > + qemu_free(as->ioeventfds); > + as->ioeventfds = ioeventfds; > + as->ioeventfd_nb = ioeventfd_nb; > +} > + > static void address_space_update_topology(AddressSpace *as) > { > FlatView old_view = as->current_map; > @@ -434,6 +598,7 @@ static void address_space_update_topology(AddressSpace *as) > } > as->current_map = new_view; > flatview_destroy(&old_view); > + address_space_update_ioeventfds(as); > } > > static void memory_region_update_topology(void) > @@ -464,6 +629,8 @@ void memory_region_init(MemoryRegion *mr, > QTAILQ_INIT(&mr->coalesced); > mr->name = qemu_strdup(name); > mr->dirty_log_mask = 0; > + mr->ioeventfd_nb = 0; > + mr->ioeventfds = NULL; > } > > static bool memory_region_access_valid(MemoryRegion *mr, > @@ -675,6 +842,7 @@ void memory_region_destroy(MemoryRegion *mr) > assert(QTAILQ_EMPTY(&mr->subregions)); > memory_region_clear_coalescing(mr); > qemu_free((char *)mr->name); > + qemu_free(mr->ioeventfds); > } > > target_phys_addr_t memory_region_size(MemoryRegion *mr) > @@ -798,6 +966,68 @@ void memory_region_clear_coalescing(MemoryRegion *mr) > memory_region_update_coalesced_range(mr); > } > > +void memory_region_add_eventfd(MemoryRegion *mr, > + target_phys_addr_t addr, > + unsigned size, > + bool match_data, > + uint64_t data, > + int fd) > +{ > + MemoryRegionIoeventfd mrfd = { > + .addr.start = addr, > + .addr.size = size, > + .match_data = match_data, > + .data = data, > + .fd = fd, > + }; > + unsigned i; > + > + for (i = 0; i< mr->ioeventfd_nb; ++i) { > + if (memory_region_ioeventfd_before(mrfd, mr->ioeventfds[i])) { > + break; > + } > + } > + ++mr->ioeventfd_nb; > + mr->ioeventfds = qemu_realloc(mr->ioeventfds, > + sizeof(*mr->ioeventfds) * mr->ioeventfd_nb); > + memmove(&mr->ioeventfds[i+1],&mr->ioeventfds[i], > + sizeof(*mr->ioeventfds) * (mr->ioeventfd_nb-1 - i)); > + mr->ioeventfds[i] = mrfd; > + memory_region_update_topology(); > +} > + > +void memory_region_del_eventfd(MemoryRegion *mr, > + target_phys_addr_t addr, > + unsigned size, > + bool match_data, > + uint64_t data, > + int fd) > +{ > + MemoryRegionIoeventfd mrfd = { > + .addr.start = addr, > + .addr.size = size, > + .match_data = match_data, > + .data = data, > + .fd = fd, > + }; > + unsigned i; > + > + for (i = 0; i< mr->ioeventfd_nb; ++i) { > + if (memory_region_ioeventfd_equal(mrfd, mr->ioeventfds[i])) { > + break; > + } > + } > + if (i == mr->ioeventfd_nb) { > + abort(); > + } > + memmove(&mr->ioeventfds[i],&mr->ioeventfds[i+1], > + sizeof(*mr->ioeventfds) * (mr->ioeventfd_nb - (i+1))); > + --mr->ioeventfd_nb; > + mr->ioeventfds = qemu_realloc(mr->ioeventfds, > + sizeof(*mr->ioeventfds)*mr->ioeventfd_nb + 1); > + memory_region_update_topology(); > +} > + > static void memory_region_add_subregion_common(MemoryRegion *mr, > target_phys_addr_t offset, > MemoryRegion *subregion) > diff --git a/memory.h b/memory.h > index 4624946..e4c0ad1 100644 > --- a/memory.h > +++ b/memory.h > @@ -85,6 +85,7 @@ struct MemoryRegionOps { > }; > > typedef struct CoalescedMemoryRange CoalescedMemoryRange; > +typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd; > > struct MemoryRegion { > /* All fields are private - violators will be prosecuted */ > @@ -107,6 +108,8 @@ struct MemoryRegion { > QTAILQ_HEAD(coalesced_ranges, CoalescedMemoryRange) coalesced; > const char *name; > uint8_t dirty_log_mask; > + unsigned ioeventfd_nb; > + MemoryRegionIoeventfd *ioeventfds; > }; > > struct MemoryRegionPortio { > @@ -208,6 +211,23 @@ void memory_region_add_coalescing(MemoryRegion *mr, > /* Disable MMIO coalescing for the region. */ > void memory_region_clear_coalescing(MemoryRegion *mr); > > + > +/* Request an eventfd to be triggered when a word is written to a location */ > +void memory_region_add_eventfd(MemoryRegion *mr, > + target_phys_addr_t addr, > + unsigned size, > + bool match_data, > + uint64_t data, > + int fd); > + > +/* Cancel an existing eventfd */ > +void memory_region_del_eventfd(MemoryRegion *mr, > + target_phys_addr_t addr, > + unsigned size, > + bool match_data, > + uint64_t data, > + int fd); > + > /* Add a sub-region at @offset. The sub-region may not overlap with other > * subregions (except for those explicitly marked as overlapping) > */