From: Marcelo Tosatti <mtosatti@redhat.com>
To: Sheng Yang <sheng@linux.intel.com>
Cc: Avi Kivity <avi@redhat.com>,
"Michael S. Tsirkin" <mst@redhat.com>,
kvm@vger.kernel.org
Subject: Re: [PATCH 6/6] KVM: assigned dev: MSI-X mask support
Date: Tue, 16 Nov 2010 17:45:22 -0200 [thread overview]
Message-ID: <20101116194522.GA22758@amt.cnet> (raw)
In-Reply-To: <1289812532-3227-7-git-send-email-sheng@linux.intel.com>
On Mon, Nov 15, 2010 at 05:15:32PM +0800, Sheng Yang wrote:
> This patch enable per-vector mask for assigned devices using MSI-X.
>
> This patch provided two new APIs: one is for guest to specific device's MSI-X
> table address in MMIO, the other is for userspace to get information about mask
> bit.
>
> All the mask bit operation are kept in kernel, in order to accelerate.
> Userspace shouldn't access the device MMIO directly for the information,
> instead it should uses provided API to do so.
>
> Signed-off-by: Sheng Yang <sheng@linux.intel.com>
> ---
> arch/x86/kvm/x86.c | 1 +
> include/linux/kvm.h | 32 +++++
> include/linux/kvm_host.h | 5 +
> virt/kvm/assigned-dev.c | 318 +++++++++++++++++++++++++++++++++++++++++++++-
> 4 files changed, 355 insertions(+), 1 deletions(-)
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index fc29223..37602e2 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1966,6 +1966,7 @@ int kvm_dev_ioctl_check_extension(long ext)
> case KVM_CAP_X86_ROBUST_SINGLESTEP:
> case KVM_CAP_XSAVE:
> case KVM_CAP_ASYNC_PF:
> + case KVM_CAP_MSIX_MASK:
> r = 1;
> break;
> case KVM_CAP_COALESCED_MMIO:
> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> index ea2dc1a..b3e5ffe 100644
> --- a/include/linux/kvm.h
> +++ b/include/linux/kvm.h
> @@ -541,6 +541,9 @@ struct kvm_ppc_pvinfo {
> #define KVM_CAP_PPC_GET_PVINFO 57
> #define KVM_CAP_PPC_IRQ_LEVEL 58
> #define KVM_CAP_ASYNC_PF 59
> +#ifdef __KVM_HAVE_MSIX
> +#define KVM_CAP_MSIX_MASK 60
> +#endif
>
> #ifdef KVM_CAP_IRQ_ROUTING
>
> @@ -672,6 +675,9 @@ struct kvm_clock_data {
> #define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config)
> #define KVM_SET_CLOCK _IOW(KVMIO, 0x7b, struct kvm_clock_data)
> #define KVM_GET_CLOCK _IOR(KVMIO, 0x7c, struct kvm_clock_data)
> +/* Available with KVM_CAP_MSIX_MASK */
> +#define KVM_GET_MSIX_ENTRY _IOWR(KVMIO, 0x7d, struct kvm_msix_entry)
> +#define KVM_UPDATE_MSIX_MMIO _IOW(KVMIO, 0x7e, struct kvm_msix_mmio)
> /* Available with KVM_CAP_PIT_STATE2 */
> #define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2)
> #define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2)
> @@ -795,4 +801,30 @@ struct kvm_assigned_msix_entry {
> __u16 padding[3];
> };
>
> +#define KVM_MSIX_TYPE_ASSIGNED_DEV 1
> +
> +#define KVM_MSIX_FLAG_MASKBIT (1 << 0)
> +#define KVM_MSIX_FLAG_QUERY_MASKBIT (1 << 0)
> +
> +struct kvm_msix_entry {
> + __u32 id;
> + __u32 type;
Is type really necessary? Will it ever differ from
KVM_MSIX_TYPE_ASSIGNED_DEV?
> + __u32 entry; /* The index of entry in the MSI-X table */
> + __u32 flags;
> + __u32 query_flags;
> + __u32 reserved[5];
> +};
> +
> +#define KVM_MSIX_MMIO_FLAG_REGISTER (1 << 0)
> +#define KVM_MSIX_MMIO_FLAG_UNREGISTER (1 << 1)
> +
> +struct kvm_msix_mmio {
> + __u32 id;
> + __u32 type;
> + __u64 base_addr;
> + __u32 max_entries_nr;
> + __u32 flags;
> + __u32 reserved[6];
> +};
> +
> #endif /* __LINUX_KVM_H */
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index f09db87..57a437a 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -501,6 +501,7 @@ struct kvm_guest_msix_entry {
> };
>
> #define KVM_ASSIGNED_ENABLED_IOMMU (1 << 0)
> +#define KVM_ASSIGNED_ENABLED_MSIX_MMIO (1 << 1)
> struct kvm_assigned_dev_kernel {
> struct kvm_irq_ack_notifier ack_notifier;
> struct work_struct interrupt_work;
> @@ -521,6 +522,10 @@ struct kvm_assigned_dev_kernel {
> struct pci_dev *dev;
> struct kvm *kvm;
> spinlock_t assigned_dev_lock;
> + DECLARE_BITMAP(msix_mask_bitmap, KVM_MAX_MSIX_PER_DEV);
> + gpa_t msix_mmio_base;
> + struct kvm_io_device msix_mmio_dev;
> + int msix_max_entries_nr;
> };
>
> struct kvm_irq_mask_notifier {
> diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
> index 5c6b96d..76a1f12 100644
> --- a/virt/kvm/assigned-dev.c
> +++ b/virt/kvm/assigned-dev.c
> @@ -226,12 +226,27 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
> kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
> }
>
> +static void unregister_msix_mmio(struct kvm *kvm,
> + struct kvm_assigned_dev_kernel *adev)
> +{
> + if (adev->flags & KVM_ASSIGNED_ENABLED_MSIX_MMIO) {
> + mutex_lock(&kvm->slots_lock);
> + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
> + &adev->msix_mmio_dev);
> + mutex_unlock(&kvm->slots_lock);
> + adev->flags &= ~KVM_ASSIGNED_ENABLED_MSIX_MMIO;
> + }
> +}
> +
> static void kvm_free_assigned_device(struct kvm *kvm,
> struct kvm_assigned_dev_kernel
> *assigned_dev)
> {
> kvm_free_assigned_irq(kvm, assigned_dev);
>
> +#ifdef __KVM_HAVE_MSIX
> + unregister_msix_mmio(kvm, assigned_dev);
> +#endif
> pci_reset_function(assigned_dev->dev);
>
> pci_release_regions(assigned_dev->dev);
> @@ -504,7 +519,7 @@ out:
> static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
> struct kvm_assigned_pci_dev *assigned_dev)
> {
> - int r = 0, idx;
> + int r = 0, idx, i;
> struct kvm_assigned_dev_kernel *match;
> struct pci_dev *dev;
>
> @@ -564,6 +579,10 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
>
> list_add(&match->list, &kvm->arch.assigned_dev_head);
>
> + /* The state after reset of MSI-X table is all masked */
> + for (i = 0; i < KVM_MAX_MSIX_PER_DEV; i++)
> + set_bit(i, match->msix_mask_bitmap);
> +
> if (assigned_dev->flags & KVM_ASSIGNED_ENABLED_IOMMU) {
> if (!kvm->arch.iommu_domain) {
> r = kvm_iommu_map_guest(kvm);
> @@ -667,6 +686,43 @@ msix_nr_out:
> return r;
> }
>
> +static void update_msix_mask(struct kvm_assigned_dev_kernel *adev,
> + int idx, bool new_mask_flag)
> +{
> + int irq;
> + bool old_mask_flag, need_flush = false;
> +
> + spin_lock_irq(&adev->assigned_dev_lock);
> +
> + if (!adev->dev->msix_enabled ||
> + !(adev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX))
> + goto out;
> +
> + old_mask_flag = test_bit(adev->guest_msix_entries[idx].entry,
> + adev->msix_mask_bitmap);
> + if (old_mask_flag == new_mask_flag)
> + goto out;
> +
> + irq = adev->host_msix_entries[idx].vector;
> + BUG_ON(irq == 0);
> +
> + if (new_mask_flag) {
> + set_bit(adev->guest_msix_entries[idx].entry,
> + adev->msix_mask_bitmap);
> + disable_irq_nosync(irq);
> + need_flush = true;
> + } else {
> + clear_bit(adev->guest_msix_entries[idx].entry,
> + adev->msix_mask_bitmap);
> + enable_irq(irq);
> + }
> +out:
> + spin_unlock_irq(&adev->assigned_dev_lock);
> +
> + if (need_flush)
> + flush_work(&adev->interrupt_work);
> +}
> +
> static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
> struct kvm_assigned_msix_entry *entry)
> {
> @@ -701,6 +757,235 @@ msix_entry_out:
>
> return r;
> }
> +
> +static int kvm_vm_ioctl_get_msix_entry(struct kvm *kvm,
> + struct kvm_msix_entry *entry)
> +{
> + int r = 0;
> + struct kvm_assigned_dev_kernel *adev;
> +
> + if (entry->type != KVM_MSIX_TYPE_ASSIGNED_DEV)
> + return -EINVAL;
> +
> + if (!entry->query_flags)
> + return -EINVAL;
> +
> + mutex_lock(&kvm->lock);
> +
> + adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
> + entry->id);
> +
> + if (!adev) {
> + r = -EINVAL;
> + goto out;
> + }
> +
> + if (entry->entry >= adev->msix_max_entries_nr) {
> + r = -ENOSPC;
> + goto out;
> + }
> +
> + if (entry->query_flags & KVM_MSIX_FLAG_QUERY_MASKBIT) {
> + if (test_bit(entry->entry, adev->msix_mask_bitmap))
> + entry->flags |= KVM_MSIX_FLAG_MASKBIT;
> + else
> + entry->flags &= ~KVM_MSIX_FLAG_MASKBIT;
> + }
> +
> +out:
> + mutex_unlock(&kvm->lock);
> +
> + return r;
> +}
> +
> +static bool msix_mmio_in_range(struct kvm_assigned_dev_kernel *adev,
> + gpa_t addr, int len)
> +{
> + gpa_t start, end;
> +
> + BUG_ON(!(adev->flags & KVM_ASSIGNED_ENABLED_MSIX_MMIO));
> + start = adev->msix_mmio_base;
> + end = adev->msix_mmio_base + PCI_MSIX_ENTRY_SIZE *
> + adev->msix_max_entries_nr;
> + if (addr >= start && addr + len <= end)
> + return true;
> +
> + return false;
> +}
> +
> +static int msix_get_enabled_idx(struct kvm_assigned_dev_kernel *adev,
> + gpa_t addr, int len)
> +{
> + int i, index = (addr - adev->msix_mmio_base) / PCI_MSIX_ENTRY_SIZE;
> +
> + for (i = 0; i < adev->entries_nr; i++)
> + if (adev->guest_msix_entries[i].entry == index)
> + return i;
> +
> + return -EINVAL;
> +}
> +
> +static int msix_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
> + void *val)
> +{
> + struct kvm_assigned_dev_kernel *adev =
> + container_of(this, struct kvm_assigned_dev_kernel,
> + msix_mmio_dev);
> + int idx, r = 0;
> + u32 entry[4];
> + struct kvm_kernel_irq_routing_entry e;
> +
> + /* TODO: Get big-endian machine work */
> + mutex_lock(&adev->kvm->lock);
> + if (!msix_mmio_in_range(adev, addr, len)) {
> + r = -EOPNOTSUPP;
> + goto out;
> + }
The unregister path does:
mutex_lock(kvm->lock)
kvm_io_bus_unregister_dev()
synchronize_srcu()
If an instance of msix_mmio_read/msix_mmio_write is waiting on
kvm->lock, synchronize_srcu will never complete.
You should use a separate lock for the in range check (and have it mind
that reads/writes can trigger after kvm_io_bus_register_dev, so all
state accessible in the r/w handlers should be complete by that time).
> + if ((addr & 0x3) || len != 4)
> + goto out;
> +
> + idx = msix_get_enabled_idx(adev, addr, len);
> + if (idx < 0) {
> + idx = (addr - adev->msix_mmio_base) / PCI_MSIX_ENTRY_SIZE;
> + if ((addr % PCI_MSIX_ENTRY_SIZE) ==
> + PCI_MSIX_ENTRY_VECTOR_CTRL)
> + *(unsigned long *)val =
> + test_bit(idx, adev->msix_mask_bitmap) ?
> + PCI_MSIX_ENTRY_CTRL_MASKBIT : 0;
> + else
> + r = -EOPNOTSUPP;
> + goto out;
> + }
> +
> + r = kvm_get_irq_routing_entry(adev->kvm,
> + adev->guest_msix_entries[idx].vector, &e);
> + if (r || e.type != KVM_IRQ_ROUTING_MSI) {
> + r = -EOPNOTSUPP;
> + goto out;
> + }
> + entry[0] = e.msi.address_lo;
> + entry[1] = e.msi.address_hi;
> + entry[2] = e.msi.data;
> + entry[3] = test_bit(adev->guest_msix_entries[idx].entry,
> + adev->msix_mask_bitmap);
> + memcpy(val, &entry[addr % PCI_MSIX_ENTRY_SIZE / sizeof *entry], len);
Division by zero?
next prev parent reply other threads:[~2010-11-16 19:46 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-11-15 9:15 [PATCH 0/6 v5] MSI-X mask support for assigned device Sheng Yang
2010-11-15 9:15 ` [PATCH 1/6] PCI: MSI: Move MSI-X entry definition to pci_regs.h Sheng Yang
2010-11-15 9:15 ` [PATCH 2/6] PCI: Add mask bit definition for MSI-X table Sheng Yang
2010-11-15 9:15 ` [PATCH 3/6] KVM: Move struct kvm_io_device to kvm_host.h Sheng Yang
2010-11-15 9:15 ` [PATCH 4/6] KVM: Add kvm_get_irq_routing_entry() func Sheng Yang
2010-11-17 14:01 ` Avi Kivity
2010-11-18 2:22 ` Sheng Yang
2010-11-18 9:30 ` Avi Kivity
2010-11-18 9:41 ` Michael S. Tsirkin
2010-11-18 11:59 ` Sheng Yang
2010-11-18 12:33 ` Michael S. Tsirkin
2010-11-18 12:40 ` Sheng Yang
2010-11-15 9:15 ` [PATCH 5/6] KVM: assigned dev: Clean up assigned_device's flag Sheng Yang
2010-11-15 9:15 ` [PATCH 6/6] KVM: assigned dev: MSI-X mask support Sheng Yang
2010-11-15 9:27 ` [PATCH 6/6 v5 updated] " Sheng Yang
2010-11-16 19:45 ` Marcelo Tosatti [this message]
2010-11-17 1:29 ` [PATCH 6/6] " Sheng Yang
2010-11-17 13:35 ` Marcelo Tosatti
2010-11-18 9:43 ` Michael S. Tsirkin
2010-11-17 13:58 ` Avi Kivity
2010-11-18 1:58 ` Sheng Yang
2010-11-18 6:21 ` Michael S. Tsirkin
2010-11-18 6:39 ` Sheng Yang
2010-11-18 9:28 ` Avi Kivity
2010-11-18 9:37 ` Michael S. Tsirkin
2010-11-18 12:08 ` Sheng Yang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20101116194522.GA22758@amt.cnet \
--to=mtosatti@redhat.com \
--cc=avi@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=mst@redhat.com \
--cc=sheng@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.