public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Avi Kivity <avi@redhat.com>
To: Sheng Yang <sheng@linux.intel.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	kvm@vger.kernel.org, Alex Williamson <alex.williamson@redhat.com>
Subject: Re: [PATCH 6/6] KVM: assigned dev: MSI-X mask support
Date: Wed, 17 Nov 2010 15:58:00 +0200	[thread overview]
Message-ID: <4CE3DF68.7090309@redhat.com> (raw)
In-Reply-To: <1289812532-3227-7-git-send-email-sheng@linux.intel.com>

On 11/15/2010 11:15 AM, Sheng Yang wrote:
> This patch enable per-vector mask for assigned devices using MSI-X.
>
> This patch provided two new APIs: one is for guest to specific device's MSI-X
> table address in MMIO, the other is for userspace to get information about mask
> bit.
>
> All the mask bit operation are kept in kernel, in order to accelerate.
> Userspace shouldn't access the device MMIO directly for the information,
> instead it should uses provided API to do so.
>
> Signed-off-by: Sheng Yang<sheng@linux.intel.com>
> ---
>   arch/x86/kvm/x86.c       |    1 +
>   include/linux/kvm.h      |   32 +++++
>   include/linux/kvm_host.h |    5 +
>   virt/kvm/assigned-dev.c  |  318 +++++++++++++++++++++++++++++++++++++++++++++-
>   4 files changed, 355 insertions(+), 1 deletions(-)
>

Documentation?

> +static int msix_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
> +			  void *val)
> +{
> +	struct kvm_assigned_dev_kernel *adev =
> +			container_of(this, struct kvm_assigned_dev_kernel,
> +				     msix_mmio_dev);
> +	int idx, r = 0;
> +	u32 entry[4];
> +	struct kvm_kernel_irq_routing_entry e;
> +
> +	/* TODO: Get big-endian machine work */
> +	mutex_lock(&adev->kvm->lock);
> +	if (!msix_mmio_in_range(adev, addr, len)) {
> +		r = -EOPNOTSUPP;
> +		goto out;
> +	}
> +	if ((addr&  0x3) || len != 4)
> +		goto out;
> +
> +	idx = msix_get_enabled_idx(adev, addr, len);
> +	if (idx<  0) {
> +		idx = (addr - adev->msix_mmio_base) / PCI_MSIX_ENTRY_SIZE;
> +		if ((addr % PCI_MSIX_ENTRY_SIZE) ==
> +				PCI_MSIX_ENTRY_VECTOR_CTRL)
> +			*(unsigned long *)val =
> +				test_bit(idx, adev->msix_mask_bitmap) ?
> +				PCI_MSIX_ENTRY_CTRL_MASKBIT : 0;
> +		else
> +			r = -EOPNOTSUPP;
> +		goto out;
> +	}
> +
> +	r = kvm_get_irq_routing_entry(adev->kvm,
> +			adev->guest_msix_entries[idx].vector,&e);
> +	if (r || e.type != KVM_IRQ_ROUTING_MSI) {
> +		r = -EOPNOTSUPP;
> +		goto out;
> +	}
> +	entry[0] = e.msi.address_lo;
> +	entry[1] = e.msi.address_hi;
> +	entry[2] = e.msi.data;
> +	entry[3] = test_bit(adev->guest_msix_entries[idx].entry,
> +			adev->msix_mask_bitmap);
> +	memcpy(val,&entry[addr % PCI_MSIX_ENTRY_SIZE / sizeof *entry], len);
> +
> +out:
> +	mutex_unlock(&adev->kvm->lock);
> +	return r;
> +}
> +
> +static int msix_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
> +			   const void *val)
> +{
> +	struct kvm_assigned_dev_kernel *adev =
> +			container_of(this, struct kvm_assigned_dev_kernel,
> +				     msix_mmio_dev);
> +	int idx, r = 0;
> +	unsigned long new_val = *(unsigned long *)val;

What if it's a 64-bit write on a 32-bit host?

Are we sure the trailing bytes of val are zero?

> +
> +	/* TODO: Get big-endian machine work */

BUILD_BUG_ON(something)

> +	mutex_lock(&adev->kvm->lock);
> +	if (!msix_mmio_in_range(adev, addr, len)) {
> +		r = -EOPNOTSUPP;
> +		goto out;
> +	}

Why is this needed?  Didn't the iodev check already do this?

> +	if ((addr&  0x3) || len != 4)
> +		goto out;

What if len == 8?  I think mst said it was legal.

> +
> +	idx = msix_get_enabled_idx(adev, addr, len);
> +	if (idx<  0) {
> +		idx = (addr - adev->msix_mmio_base) / PCI_MSIX_ENTRY_SIZE;
> +		if (((addr % PCI_MSIX_ENTRY_SIZE) ==
> +				PCI_MSIX_ENTRY_VECTOR_CTRL)) {
> +			if (new_val&  ~PCI_MSIX_ENTRY_CTRL_MASKBIT)
> +				goto out;
> +			if (new_val&  PCI_MSIX_ENTRY_CTRL_MASKBIT)
> +				set_bit(idx, adev->msix_mask_bitmap);
> +			else
> +				clear_bit(idx, adev->msix_mask_bitmap);
> +			/* It's possible that we need re-enable MSI-X, so go
> +			 * back to userspace */
> +		}
> +		/* Userspace would handle other MMIO writing */
> +		r = -EOPNOTSUPP;

That's not very good.  We should do the entire thing in the kernel or in 
userspace.  We can have a new EXIT_REASON to let userspace know an msix 
entry changed, and it should read it from the kernel.

> +		goto out;
> +	}
> +	if (addr % PCI_MSIX_ENTRY_SIZE != PCI_MSIX_ENTRY_VECTOR_CTRL) {
> +		r = -EOPNOTSUPP;
> +		goto out;
> +	}
> +	if (new_val&  ~PCI_MSIX_ENTRY_CTRL_MASKBIT)
> +		goto out;
> +	update_msix_mask(adev, idx, !!(new_val&  PCI_MSIX_ENTRY_CTRL_MASKBIT));
> +out:
> +	mutex_unlock(&adev->kvm->lock);
> +
> +	return r;
> +}
> +
> +static int kvm_vm_ioctl_update_msix_mmio(struct kvm *kvm,
> +				struct kvm_msix_mmio *msix_mmio)
> +{
> +	int r = 0;
> +	struct kvm_assigned_dev_kernel *adev;
> +
> +	if (msix_mmio->type != KVM_MSIX_TYPE_ASSIGNED_DEV)
> +		return -EINVAL;
> +
> +	if (!msix_mmio->flags)
> +		return -EINVAL;

Need to check flags for undefined bits too.

> +
> +	mutex_lock(&kvm->lock);
> +	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
> +				      msix_mmio->id);
> +	if (!adev) {
> +		r = -EINVAL;
> +		goto out;
> +	}
> +	if (msix_mmio->base_addr == 0) {
> +		r = -EINVAL;
> +		goto out;
> +	}
> +	if (msix_mmio->max_entries_nr == 0 ||
> +			msix_mmio->max_entries_nr>  KVM_MAX_MSIX_PER_DEV) {
> +		r = -EINVAL;
> +		goto out;
> +	}
> +
> +	if ((msix_mmio->flags&  KVM_MSIX_MMIO_FLAG_REGISTER)&&
> +			(msix_mmio->flags&  KVM_MSIX_MMIO_FLAG_UNREGISTER)) {
> +		r = -EINVAL;
> +		goto out;
> +	}
> +
> +	if (msix_mmio->flags&  KVM_MSIX_MMIO_FLAG_REGISTER) {
> +		if (!(adev->flags&  KVM_ASSIGNED_ENABLED_MSIX_MMIO)) {
> +			mutex_lock(&kvm->slots_lock);
> +			kvm_iodevice_init(&adev->msix_mmio_dev,
> +					&msix_mmio_ops);
> +			r = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS,
> +					&adev->msix_mmio_dev);
> +			if (!r)
> +				adev->flags |= KVM_ASSIGNED_ENABLED_MSIX_MMIO;
> +			mutex_unlock(&kvm->slots_lock);
> +		}
> +		if (!r) {
> +			adev->msix_mmio_base = msix_mmio->base_addr;
> +			adev->msix_max_entries_nr = msix_mmio->max_entries_nr;
> +		}
> +	} else if (msix_mmio->flags&  KVM_MSIX_MMIO_FLAG_UNREGISTER)
> +		unregister_msix_mmio(kvm, adev);
> +out:
> +	mutex_unlock(&kvm->lock);
> +
> +	return r;
> +}
>   #endif

Question: how do we do this with vfio?  One idea is to have

    ioctl(vmfd, KVM_IO_PIPE, { KVM_IO_MMIO /* or KVM_IO_PIO */, range, 
pipefd1, pipefd2 })

and have kvm convert mmio or pio in that range to commands and responses 
sent over that pipe.  We could have qemu threads, other userspace 
processes, or kernel components like vfio implement the other side of 
the channel.

-- 
error compiling committee.c: too many arguments to function


  parent reply	other threads:[~2010-11-17 13:58 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-11-15  9:15 [PATCH 0/6 v5] MSI-X mask support for assigned device Sheng Yang
2010-11-15  9:15 ` [PATCH 1/6] PCI: MSI: Move MSI-X entry definition to pci_regs.h Sheng Yang
2010-11-15  9:15 ` [PATCH 2/6] PCI: Add mask bit definition for MSI-X table Sheng Yang
2010-11-15  9:15 ` [PATCH 3/6] KVM: Move struct kvm_io_device to kvm_host.h Sheng Yang
2010-11-15  9:15 ` [PATCH 4/6] KVM: Add kvm_get_irq_routing_entry() func Sheng Yang
2010-11-17 14:01   ` Avi Kivity
2010-11-18  2:22     ` Sheng Yang
2010-11-18  9:30       ` Avi Kivity
2010-11-18  9:41         ` Michael S. Tsirkin
2010-11-18 11:59           ` Sheng Yang
2010-11-18 12:33             ` Michael S. Tsirkin
2010-11-18 12:40               ` Sheng Yang
2010-11-15  9:15 ` [PATCH 5/6] KVM: assigned dev: Clean up assigned_device's flag Sheng Yang
2010-11-15  9:15 ` [PATCH 6/6] KVM: assigned dev: MSI-X mask support Sheng Yang
2010-11-15  9:27   ` [PATCH 6/6 v5 updated] " Sheng Yang
2010-11-16 19:45   ` [PATCH 6/6] " Marcelo Tosatti
2010-11-17  1:29     ` Sheng Yang
2010-11-17 13:35       ` Marcelo Tosatti
2010-11-18  9:43       ` Michael S. Tsirkin
2010-11-17 13:58   ` Avi Kivity [this message]
2010-11-18  1:58     ` Sheng Yang
2010-11-18  6:21       ` Michael S. Tsirkin
2010-11-18  6:39         ` Sheng Yang
2010-11-18  9:28       ` Avi Kivity
2010-11-18  9:37         ` Michael S. Tsirkin
2010-11-18 12:08         ` Sheng Yang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4CE3DF68.7090309@redhat.com \
    --to=avi@redhat.com \
    --cc=alex.williamson@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=mtosatti@redhat.com \
    --cc=sheng@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox