From: Marcelo Tosatti <mtosatti@redhat.com>
To: Sheng Yang <sheng@linux.intel.com>
Cc: Avi Kivity <avi@redhat.com>,
"Michael S. Tsirkin" <mst@redhat.com>,
kvm@vger.kernel.org
Subject: Re: [PATCH 6/6] KVM: assigned dev: MSI-X mask support
Date: Tue, 16 Nov 2010 17:45:22 -0200 [thread overview]
Message-ID: <20101116194522.GA22758@amt.cnet> (raw)
In-Reply-To: <1289812532-3227-7-git-send-email-sheng@linux.intel.com>
On Mon, Nov 15, 2010 at 05:15:32PM +0800, Sheng Yang wrote:
> This patch enable per-vector mask for assigned devices using MSI-X.
>
> This patch provided two new APIs: one is for guest to specific device's MSI-X
> table address in MMIO, the other is for userspace to get information about mask
> bit.
>
> All the mask bit operation are kept in kernel, in order to accelerate.
> Userspace shouldn't access the device MMIO directly for the information,
> instead it should uses provided API to do so.
>
> Signed-off-by: Sheng Yang <sheng@linux.intel.com>
> ---
> arch/x86/kvm/x86.c | 1 +
> include/linux/kvm.h | 32 +++++
> include/linux/kvm_host.h | 5 +
> virt/kvm/assigned-dev.c | 318 +++++++++++++++++++++++++++++++++++++++++++++-
> 4 files changed, 355 insertions(+), 1 deletions(-)
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index fc29223..37602e2 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1966,6 +1966,7 @@ int kvm_dev_ioctl_check_extension(long ext)
> case KVM_CAP_X86_ROBUST_SINGLESTEP:
> case KVM_CAP_XSAVE:
> case KVM_CAP_ASYNC_PF:
> + case KVM_CAP_MSIX_MASK:
> r = 1;
> break;
> case KVM_CAP_COALESCED_MMIO:
> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> index ea2dc1a..b3e5ffe 100644
> --- a/include/linux/kvm.h
> +++ b/include/linux/kvm.h
> @@ -541,6 +541,9 @@ struct kvm_ppc_pvinfo {
> #define KVM_CAP_PPC_GET_PVINFO 57
> #define KVM_CAP_PPC_IRQ_LEVEL 58
> #define KVM_CAP_ASYNC_PF 59
> +#ifdef __KVM_HAVE_MSIX
> +#define KVM_CAP_MSIX_MASK 60
> +#endif
>
> #ifdef KVM_CAP_IRQ_ROUTING
>
> @@ -672,6 +675,9 @@ struct kvm_clock_data {
> #define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config)
> #define KVM_SET_CLOCK _IOW(KVMIO, 0x7b, struct kvm_clock_data)
> #define KVM_GET_CLOCK _IOR(KVMIO, 0x7c, struct kvm_clock_data)
> +/* Available with KVM_CAP_MSIX_MASK */
> +#define KVM_GET_MSIX_ENTRY _IOWR(KVMIO, 0x7d, struct kvm_msix_entry)
> +#define KVM_UPDATE_MSIX_MMIO _IOW(KVMIO, 0x7e, struct kvm_msix_mmio)
> /* Available with KVM_CAP_PIT_STATE2 */
> #define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2)
> #define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2)
> @@ -795,4 +801,30 @@ struct kvm_assigned_msix_entry {
> __u16 padding[3];
> };
>
> +#define KVM_MSIX_TYPE_ASSIGNED_DEV 1
> +
> +#define KVM_MSIX_FLAG_MASKBIT (1 << 0)
> +#define KVM_MSIX_FLAG_QUERY_MASKBIT (1 << 0)
> +
> +struct kvm_msix_entry {
> + __u32 id;
> + __u32 type;
Is type really necessary? Will it ever differ from
KVM_MSIX_TYPE_ASSIGNED_DEV?
> + __u32 entry; /* The index of entry in the MSI-X table */
> + __u32 flags;
> + __u32 query_flags;
> + __u32 reserved[5];
> +};
> +
> +#define KVM_MSIX_MMIO_FLAG_REGISTER (1 << 0)
> +#define KVM_MSIX_MMIO_FLAG_UNREGISTER (1 << 1)
> +
> +struct kvm_msix_mmio {
> + __u32 id;
> + __u32 type;
> + __u64 base_addr;
> + __u32 max_entries_nr;
> + __u32 flags;
> + __u32 reserved[6];
> +};
> +
> #endif /* __LINUX_KVM_H */
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index f09db87..57a437a 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -501,6 +501,7 @@ struct kvm_guest_msix_entry {
> };
>
> #define KVM_ASSIGNED_ENABLED_IOMMU (1 << 0)
> +#define KVM_ASSIGNED_ENABLED_MSIX_MMIO (1 << 1)
> struct kvm_assigned_dev_kernel {
> struct kvm_irq_ack_notifier ack_notifier;
> struct work_struct interrupt_work;
> @@ -521,6 +522,10 @@ struct kvm_assigned_dev_kernel {
> struct pci_dev *dev;
> struct kvm *kvm;
> spinlock_t assigned_dev_lock;
> + DECLARE_BITMAP(msix_mask_bitmap, KVM_MAX_MSIX_PER_DEV);
> + gpa_t msix_mmio_base;
> + struct kvm_io_device msix_mmio_dev;
> + int msix_max_entries_nr;
> };
>
> struct kvm_irq_mask_notifier {
> diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
> index 5c6b96d..76a1f12 100644
> --- a/virt/kvm/assigned-dev.c
> +++ b/virt/kvm/assigned-dev.c
> @@ -226,12 +226,27 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
> kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
> }
>
> +static void unregister_msix_mmio(struct kvm *kvm,
> + struct kvm_assigned_dev_kernel *adev)
> +{
> + if (adev->flags & KVM_ASSIGNED_ENABLED_MSIX_MMIO) {
> + mutex_lock(&kvm->slots_lock);
> + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
> + &adev->msix_mmio_dev);
> + mutex_unlock(&kvm->slots_lock);
> + adev->flags &= ~KVM_ASSIGNED_ENABLED_MSIX_MMIO;
> + }
> +}
> +
> static void kvm_free_assigned_device(struct kvm *kvm,
> struct kvm_assigned_dev_kernel
> *assigned_dev)
> {
> kvm_free_assigned_irq(kvm, assigned_dev);
>
> +#ifdef __KVM_HAVE_MSIX
> + unregister_msix_mmio(kvm, assigned_dev);
> +#endif
> pci_reset_function(assigned_dev->dev);
>
> pci_release_regions(assigned_dev->dev);
> @@ -504,7 +519,7 @@ out:
> static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
> struct kvm_assigned_pci_dev *assigned_dev)
> {
> - int r = 0, idx;
> + int r = 0, idx, i;
> struct kvm_assigned_dev_kernel *match;
> struct pci_dev *dev;
>
> @@ -564,6 +579,10 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
>
> list_add(&match->list, &kvm->arch.assigned_dev_head);
>
> + /* The state after reset of MSI-X table is all masked */
> + for (i = 0; i < KVM_MAX_MSIX_PER_DEV; i++)
> + set_bit(i, match->msix_mask_bitmap);
> +
> if (assigned_dev->flags & KVM_ASSIGNED_ENABLED_IOMMU) {
> if (!kvm->arch.iommu_domain) {
> r = kvm_iommu_map_guest(kvm);
> @@ -667,6 +686,43 @@ msix_nr_out:
> return r;
> }
>
> +static void update_msix_mask(struct kvm_assigned_dev_kernel *adev,
> + int idx, bool new_mask_flag)
> +{
> + int irq;
> + bool old_mask_flag, need_flush = false;
> +
> + spin_lock_irq(&adev->assigned_dev_lock);
> +
> + if (!adev->dev->msix_enabled ||
> + !(adev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX))
> + goto out;
> +
> + old_mask_flag = test_bit(adev->guest_msix_entries[idx].entry,
> + adev->msix_mask_bitmap);
> + if (old_mask_flag == new_mask_flag)
> + goto out;
> +
> + irq = adev->host_msix_entries[idx].vector;
> + BUG_ON(irq == 0);
> +
> + if (new_mask_flag) {
> + set_bit(adev->guest_msix_entries[idx].entry,
> + adev->msix_mask_bitmap);
> + disable_irq_nosync(irq);
> + need_flush = true;
> + } else {
> + clear_bit(adev->guest_msix_entries[idx].entry,
> + adev->msix_mask_bitmap);
> + enable_irq(irq);
> + }
> +out:
> + spin_unlock_irq(&adev->assigned_dev_lock);
> +
> + if (need_flush)
> + flush_work(&adev->interrupt_work);
> +}
> +
> static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
> struct kvm_assigned_msix_entry *entry)
> {
> @@ -701,6 +757,235 @@ msix_entry_out:
>
> return r;
> }
> +
> +static int kvm_vm_ioctl_get_msix_entry(struct kvm *kvm,
> + struct kvm_msix_entry *entry)
> +{
> + int r = 0;
> + struct kvm_assigned_dev_kernel *adev;
> +
> + if (entry->type != KVM_MSIX_TYPE_ASSIGNED_DEV)
> + return -EINVAL;
> +
> + if (!entry->query_flags)
> + return -EINVAL;
> +
> + mutex_lock(&kvm->lock);
> +
> + adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
> + entry->id);
> +
> + if (!adev) {
> + r = -EINVAL;
> + goto out;
> + }
> +
> + if (entry->entry >= adev->msix_max_entries_nr) {
> + r = -ENOSPC;
> + goto out;
> + }
> +
> + if (entry->query_flags & KVM_MSIX_FLAG_QUERY_MASKBIT) {
> + if (test_bit(entry->entry, adev->msix_mask_bitmap))
> + entry->flags |= KVM_MSIX_FLAG_MASKBIT;
> + else
> + entry->flags &= ~KVM_MSIX_FLAG_MASKBIT;
> + }
> +
> +out:
> + mutex_unlock(&kvm->lock);
> +
> + return r;
> +}
> +
> +static bool msix_mmio_in_range(struct kvm_assigned_dev_kernel *adev,
> + gpa_t addr, int len)
> +{
> + gpa_t start, end;
> +
> + BUG_ON(!(adev->flags & KVM_ASSIGNED_ENABLED_MSIX_MMIO));
> + start = adev->msix_mmio_base;
> + end = adev->msix_mmio_base + PCI_MSIX_ENTRY_SIZE *
> + adev->msix_max_entries_nr;
> + if (addr >= start && addr + len <= end)
> + return true;
> +
> + return false;
> +}
> +
> +static int msix_get_enabled_idx(struct kvm_assigned_dev_kernel *adev,
> + gpa_t addr, int len)
> +{
> + int i, index = (addr - adev->msix_mmio_base) / PCI_MSIX_ENTRY_SIZE;
> +
> + for (i = 0; i < adev->entries_nr; i++)
> + if (adev->guest_msix_entries[i].entry == index)
> + return i;
> +
> + return -EINVAL;
> +}
> +
> +static int msix_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
> + void *val)
> +{
> + struct kvm_assigned_dev_kernel *adev =
> + container_of(this, struct kvm_assigned_dev_kernel,
> + msix_mmio_dev);
> + int idx, r = 0;
> + u32 entry[4];
> + struct kvm_kernel_irq_routing_entry e;
> +
> + /* TODO: Get big-endian machine work */
> + mutex_lock(&adev->kvm->lock);
> + if (!msix_mmio_in_range(adev, addr, len)) {
> + r = -EOPNOTSUPP;
> + goto out;
> + }
The unregister path does:
mutex_lock(kvm->lock)
kvm_io_bus_unregister_dev()
synchronize_srcu()
If an instance of msix_mmio_read/msix_mmio_write is waiting on
kvm->lock, synchronize_srcu will never complete.
You should use a separate lock for the in range check (and have it mind
that reads/writes can trigger after kvm_io_bus_register_dev, so all
state accessible in the r/w handlers should be complete by that time).
> + if ((addr & 0x3) || len != 4)
> + goto out;
> +
> + idx = msix_get_enabled_idx(adev, addr, len);
> + if (idx < 0) {
> + idx = (addr - adev->msix_mmio_base) / PCI_MSIX_ENTRY_SIZE;
> + if ((addr % PCI_MSIX_ENTRY_SIZE) ==
> + PCI_MSIX_ENTRY_VECTOR_CTRL)
> + *(unsigned long *)val =
> + test_bit(idx, adev->msix_mask_bitmap) ?
> + PCI_MSIX_ENTRY_CTRL_MASKBIT : 0;
> + else
> + r = -EOPNOTSUPP;
> + goto out;
> + }
> +
> + r = kvm_get_irq_routing_entry(adev->kvm,
> + adev->guest_msix_entries[idx].vector, &e);
> + if (r || e.type != KVM_IRQ_ROUTING_MSI) {
> + r = -EOPNOTSUPP;
> + goto out;
> + }
> + entry[0] = e.msi.address_lo;
> + entry[1] = e.msi.address_hi;
> + entry[2] = e.msi.data;
> + entry[3] = test_bit(adev->guest_msix_entries[idx].entry,
> + adev->msix_mask_bitmap);
> + memcpy(val, &entry[addr % PCI_MSIX_ENTRY_SIZE / sizeof *entry], len);
Division by zero?
next prev parent reply other threads:[~2010-11-16 19:46 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-11-15 9:15 [PATCH 0/6 v5] MSI-X mask support for assigned device Sheng Yang
2010-11-15 9:15 ` [PATCH 1/6] PCI: MSI: Move MSI-X entry definition to pci_regs.h Sheng Yang
2010-11-15 9:15 ` [PATCH 2/6] PCI: Add mask bit definition for MSI-X table Sheng Yang
2010-11-15 9:15 ` [PATCH 3/6] KVM: Move struct kvm_io_device to kvm_host.h Sheng Yang
2010-11-15 9:15 ` [PATCH 4/6] KVM: Add kvm_get_irq_routing_entry() func Sheng Yang
2010-11-17 14:01 ` Avi Kivity
2010-11-18 2:22 ` Sheng Yang
2010-11-18 9:30 ` Avi Kivity
2010-11-18 9:41 ` Michael S. Tsirkin
2010-11-18 11:59 ` Sheng Yang
2010-11-18 12:33 ` Michael S. Tsirkin
2010-11-18 12:40 ` Sheng Yang
2010-11-15 9:15 ` [PATCH 5/6] KVM: assigned dev: Clean up assigned_device's flag Sheng Yang
2010-11-15 9:15 ` [PATCH 6/6] KVM: assigned dev: MSI-X mask support Sheng Yang
2010-11-15 9:27 ` [PATCH 6/6 v5 updated] " Sheng Yang
2010-11-16 19:45 ` Marcelo Tosatti [this message]
2010-11-17 1:29 ` [PATCH 6/6] " Sheng Yang
2010-11-17 13:35 ` Marcelo Tosatti
2010-11-18 9:43 ` Michael S. Tsirkin
2010-11-17 13:58 ` Avi Kivity
2010-11-18 1:58 ` Sheng Yang
2010-11-18 6:21 ` Michael S. Tsirkin
2010-11-18 6:39 ` Sheng Yang
2010-11-18 9:28 ` Avi Kivity
2010-11-18 9:37 ` Michael S. Tsirkin
2010-11-18 12:08 ` Sheng Yang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20101116194522.GA22758@amt.cnet \
--to=mtosatti@redhat.com \
--cc=avi@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=mst@redhat.com \
--cc=sheng@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox