From: Anthony Liguori <anthony@codemonkey.ws>
To: Cam Macdonell <cam@cs.ualberta.ca>
Cc: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH] VirtIO Support for Memory Regions
Date: Tue, 23 Feb 2010 15:05:48 -0600 [thread overview]
Message-ID: <4B84432C.6090104@codemonkey.ws> (raw)
In-Reply-To: <20100223205225.14860.50990.stgit@st-brides.cs.ualberta.ca>
Hi Cam,
On 02/23/2010 02:52 PM, Cam Macdonell wrote:
> Support for passing memory regions via VirtIO to remove need for PCI
> support in the guest.
>
> Adds new vectors to VirtIO config space to pass memory regions similar
> to how virtqueues are passed.
>
> Kernel patch is forthcoming that add device_ops to access the memory regions.
>
> I have used this mechanism to implement my host shared memory implementation
> and modified Alex's Virtio FB to use it as well.
>
Virtio is really a DMA engine. One of the nice things about it's design
is that you can do things like transparent bounce buffering if needed.
Adding a mechanism like this breaks this abstract and turns virtio into
something that's more than I think it should be.
For guest shared memory, what makes sense to me is to make use of
uio_pci within a guest to map bars in userspace. The device can have
the first bar map a small MMIO region that is tied to a ioeventfd with
KVM. With just qemu, it can be tied to a normal eventfd. You can use
irqfd with KVM to tie an eventfd to the PCI irq. Likewise, you can use
a normal eventfd to emulate that. I'd suggest using a /dev/shm file to
act as the shared memory segment. You'll need a little magic within
qemu_ram_alloc() to use this (maybe qemu_ram_alloc() should take a
context parameter so this can be setup).
You'll need to setup the backend with the name of the shared memory
segment and the two eventfds. Also, a user needs to specify a pci
vendor/device id. It's probably worth having some kind of identifier
tag in the PCI device's first MMIO region.
I think there's a couple important characteristics of an implementation
like this. By using a uio_pci, you can implement shared memory in
userspace but you can also implement a proper kernel driver if you so
desired for a specific device.
Regards,
Anthony Liguori
> Comments are welcome,
>
> Thanks,
> Cam
> ---
> hw/virtio-pci.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++----
> hw/virtio.c | 38 ++++++++++++++++++++++++++++++++++
> hw/virtio.h | 22 ++++++++++++++++++++
> 3 files changed, 117 insertions(+), 4 deletions(-)
>
> diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
> index f3373ae..e71bf64 100644
> --- a/hw/virtio-pci.c
> +++ b/hw/virtio-pci.c
> @@ -55,13 +55,13 @@
>
> /* MSI-X registers: only enabled if MSI-X is enabled. */
> /* A 16-bit vector for configuration changes. */
> -#define VIRTIO_MSI_CONFIG_VECTOR 20
> +#define VIRTIO_MSI_CONFIG_VECTOR 32
> /* A 16-bit vector for selected queue notifications. */
> -#define VIRTIO_MSI_QUEUE_VECTOR 22
> +#define VIRTIO_MSI_QUEUE_VECTOR 34
>
> /* Config space size */
> -#define VIRTIO_PCI_CONFIG_NOMSI 20
> -#define VIRTIO_PCI_CONFIG_MSI 24
> +#define VIRTIO_PCI_CONFIG_NOMSI 32
> +#define VIRTIO_PCI_CONFIG_MSI 36
> #define VIRTIO_PCI_REGION_SIZE(dev) (msix_present(dev) ? \
> VIRTIO_PCI_CONFIG_MSI : \
> VIRTIO_PCI_CONFIG_NOMSI)
> @@ -72,6 +72,12 @@
> VIRTIO_PCI_CONFIG_MSI : \
> VIRTIO_PCI_CONFIG_NOMSI)
>
> +#define VIRTIO_PCI_MEM_SEL 20
> +
> +#define VIRTIO_PCI_MEM_SIZE 24
> +
> +#define VIRTIO_PCI_MEM_PFN 28
> +
> /* Virtio ABI version, if we increment this, we break the guest driver. */
> #define VIRTIO_PCI_ABI_VERSION 0
>
> @@ -227,6 +233,11 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
> val = VIRTIO_NO_VECTOR;
> virtio_queue_set_vector(vdev, vdev->queue_sel, val);
> break;
> + case VIRTIO_PCI_MEM_SEL:
> + fprintf(stderr, "%s: VIRTIO_PCI_MEM_SEL %d\n", __func__, val);
> + if (val< VIRTIO_PCI_MEM_MAX)
> + vdev->mem_sel = val;
> + break;
> default:
> fprintf(stderr, "%s: unexpected address 0x%x value 0x%x\n",
> __func__, addr, val);
> @@ -271,6 +282,14 @@ static uint32_t virtio_ioport_read(VirtIOPCIProxy *proxy, uint32_t addr)
> case VIRTIO_MSI_QUEUE_VECTOR:
> ret = virtio_queue_vector(vdev, vdev->queue_sel);
> break;
> + case VIRTIO_PCI_MEM_SIZE:
> + ret = virtio_mem_get_size(vdev, vdev->mem_sel);
> + break;
> + case VIRTIO_PCI_MEM_PFN:
> + ret = virtio_mem_get_addr(vdev, vdev->mem_sel)
> +>> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
> + break;
> +
> default:
> break;
> }
> @@ -370,6 +389,30 @@ static void virtio_map(PCIDevice *pci_dev, int region_num,
> vdev->get_config(vdev, vdev->config);
> }
>
> +
> +static void virtio_setup_mem(PCIDevice *d, int region_num,
> + pcibus_t addr, pcibus_t size, int type)
> +{
> + VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, d);
> + VirtIODevice *vdev;
> +
> + printf("XXX setmem to %#x - %#x\n", (uint32_t)addr, (uint32_t)(addr + size));
> + vdev = proxy->vdev;
> +
> + cpu_register_physical_memory(addr, size, IO_MEM_UNASSIGNED);
> + cpu_register_physical_memory(addr, size, vdev->vmem[region_num-2].ram_addr | IO_MEM_RAM);
> +
> + /* store the address of the region and get a pointer for it
> + * that can access the mem region */
> + vdev->vmem[region_num-2].guest_phys_addr = (target_phys_addr_t)addr;
> +
> + if (vdev->vmem[region_num-2].on_map) {
> + vdev->vmem[region_num-2].on_map(vdev, addr, size);
> + }
> + return;
> +
> +}
> +
> static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
> uint32_t val, int len)
> {
> @@ -406,6 +449,7 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
> {
> uint8_t *config;
> uint32_t size;
> + int i;
>
> proxy->vdev = vdev;
>
> @@ -426,6 +470,15 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
>
> config[0x3d] = 1;
>
> + // only BARs that were added with virtio_add_memory will be allocated here
> + for (i = 0; i< VIRTIO_PCI_QUEUE_MAX; i++) {
> + if (vdev->vmem[i].size> 0) {
> + pci_register_bar(&proxy->pci_dev, 2 + i, vdev->vmem[i].size,
> + PCI_BASE_ADDRESS_SPACE_MEMORY, virtio_setup_mem);
> + }
> + }
> +
> +
> if (vdev->nvectors&& !msix_init(&proxy->pci_dev, vdev->nvectors, 1, 0)) {
> pci_register_bar(&proxy->pci_dev, 1,
> msix_bar_size(&proxy->pci_dev),
> diff --git a/hw/virtio.c b/hw/virtio.c
> index 7c020a3..1f7924e 100644
> --- a/hw/virtio.c
> +++ b/hw/virtio.c
> @@ -549,6 +549,16 @@ target_phys_addr_t virtio_queue_get_addr(VirtIODevice *vdev, int n)
> return vdev->vq[n].pa;
> }
>
> +target_phys_addr_t virtio_mem_get_addr(VirtIODevice *vdev, int n)
> +{
> + return vdev->vmem[n].guest_phys_addr;
> +}
> +
> +int virtio_mem_get_size(VirtIODevice *vdev, int n)
> +{
> + return vdev->vmem[n].size;
> +}
> +
> int virtio_queue_get_num(VirtIODevice *vdev, int n)
> {
> return vdev->vq[n].vring.num;
> @@ -592,6 +602,33 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
> return&vdev->vq[i];
> }
>
> +VirtMem *virtio_add_memory(VirtIODevice *vdev, int mem_size,
> + void (*handle_access)(VirtIODevice *,
> + target_phys_addr_t, uint32_t))
> +{
> + int i;
> +
> + /* trace what virtio_fb_setmem does */
> +
> + for (i = 0; i< VIRTIO_PCI_MEM_MAX; i++) {
> + if (vdev->vmem[i].size == 0) {
> + /* XXX: check that memory is a power of 2 */
> + vdev->vmem[i].size = mem_size * 1024 * 1024; /* presume size is in MBs*/
> + vdev->vmem[i].ram_addr = qemu_ram_alloc(vdev->vmem[i].size);
> + vdev->vmem[i].host_ptr = qemu_get_ram_ptr(vdev->vmem[i].ram_addr);
> + vdev->vmem[i].on_map = handle_access;
> + break;
> + }
> + }
> +
> + return&vdev->vmem[i];
> +}
> +
> +void * virtio_get_memory_ptr(VirtMem * vmem)
> +{
> + return vmem->host_ptr;
> +}
> +
> void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
> {
> /* Always notify when queue is empty (when feature acknowledge) */
> @@ -714,6 +751,7 @@ VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
> vdev->queue_sel = 0;
> vdev->config_vector = VIRTIO_NO_VECTOR;
> vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
> + vdev->vmem = qemu_mallocz(sizeof(VirtMem) * VIRTIO_PCI_MEM_MAX);
> for(i = 0; i< VIRTIO_PCI_QUEUE_MAX; i++)
> vdev->vq[i].vector = VIRTIO_NO_VECTOR;
>
> diff --git a/hw/virtio.h b/hw/virtio.h
> index 3baa2a3..02beb27 100644
> --- a/hw/virtio.h
> +++ b/hw/virtio.h
> @@ -82,6 +82,15 @@ typedef struct VirtQueueElement
> struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
> } VirtQueueElement;
>
> +typedef struct VirtMem
> +{
> + target_phys_addr_t guest_phys_addr;
> + ram_addr_t ram_addr;
> + int size;
> + uint8_t * host_ptr;
> + void (*on_map)(VirtIODevice *vdev, target_phys_addr_t, uint32_t);
> +} VirtMem;
> +
> typedef struct {
> void (*notify)(void * opaque, uint16_t vector);
> void (*save_config)(void * opaque, QEMUFile *f);
> @@ -93,6 +102,8 @@ typedef struct {
>
> #define VIRTIO_PCI_QUEUE_MAX 64
>
> +#define VIRTIO_PCI_MEM_MAX 16
> +
> #define VIRTIO_NO_VECTOR 0xffff
>
> struct VirtIODevice
> @@ -101,6 +112,7 @@ struct VirtIODevice
> uint8_t status;
> uint8_t isr;
> uint16_t queue_sel;
> + uint16_t mem_sel;
> uint32_t guest_features;
> size_t config_len;
> void *config;
> @@ -113,6 +125,7 @@ struct VirtIODevice
> void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
> void (*reset)(VirtIODevice *vdev);
> VirtQueue *vq;
> + VirtMem * vmem;
> const VirtIOBindings *binding;
> void *binding_opaque;
> uint16_t device_id;
> @@ -122,6 +135,12 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
> void (*handle_output)(VirtIODevice *,
> VirtQueue *));
>
> +void * virtio_get_memory_ptr(VirtMem * vmem);
> +VirtMem *virtio_add_memory(VirtIODevice *vdev, int mem_size,
> + void (*handle_access)(VirtIODevice *,
> + target_phys_addr_t,
> + uint32_t));
> +
> void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
> unsigned int len);
> void virtqueue_flush(VirtQueue *vq, unsigned int count);
> @@ -169,6 +188,9 @@ void virtio_update_irq(VirtIODevice *vdev);
> void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
> void *opaque);
>
> +target_phys_addr_t virtio_mem_get_addr(VirtIODevice *vdev, int n);
> +int virtio_mem_get_size(VirtIODevice *vdev, int n);
> +
> /* Base devices. */
> VirtIODevice *virtio_blk_init(DeviceState *dev, BlockConf *conf);
> VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf);
>
>
>
next prev parent reply other threads:[~2010-02-23 21:05 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-02-23 20:52 [Qemu-devel] [PATCH] VirtIO Support for Memory Regions Cam Macdonell
2010-02-23 21:05 ` Anthony Liguori [this message]
2010-02-23 21:09 ` Anthony Liguori
2010-02-24 6:20 ` Cam Macdonell
2010-02-24 11:14 ` Avi Kivity
2010-02-24 15:01 ` Anthony Liguori
2010-02-24 15:13 ` Anthony Liguori
2010-02-24 15:41 ` Anthony Liguori
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4B84432C.6090104@codemonkey.ws \
--to=anthony@codemonkey.ws \
--cc=cam@cs.ualberta.ca \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).