From mboxrd@z Thu Jan 1 00:00:00 1970 From: Avi Kivity Subject: Re: [PATCH 7/8] KVM: assigned dev: Introduce io_device for MSI-X MMIO accessing Date: Wed, 20 Oct 2010 11:46:47 +0200 Message-ID: <4CBEBA87.3040107@redhat.com> References: <1287563192-29685-1-git-send-email-sheng@linux.intel.com> <1287563192-29685-8-git-send-email-sheng@linux.intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Cc: Marcelo Tosatti , kvm@vger.kernel.org, "Michael S. Tsirkin" To: Sheng Yang Return-path: Received: from mx1.redhat.com ([209.132.183.28]:14399 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751009Ab0JTJqx (ORCPT ); Wed, 20 Oct 2010 05:46:53 -0400 In-Reply-To: <1287563192-29685-8-git-send-email-sheng@linux.intel.com> Sender: kvm-owner@vger.kernel.org List-ID: On 10/20/2010 10:26 AM, Sheng Yang wrote: > It would be work with KVM_CAP_DEVICE_MSIX_MASK, which we would enable in the > last patch. > > > +struct kvm_assigned_msix_mmio { > + __u32 assigned_dev_id; > + __u64 base_addr; Different alignment and size on 32 and 64 bits. Is base_addr a guest physical address? Do we need a size or it it fixed? > + __u32 flags; > + __u32 reserved[2]; > +}; > + > > @@ -465,6 +465,8 @@ struct kvm_assigned_dev_kernel { > struct pci_dev *dev; > struct kvm *kvm; > spinlock_t assigned_dev_lock; > + u64 msix_mmio_base; gpa_t. > + struct kvm_io_device msix_mmio_dev; > }; > > struct kvm_irq_mask_notifier { > diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c > index bf96ea7..5d2adc4 100644 > --- a/virt/kvm/assigned-dev.c > +++ b/virt/kvm/assigned-dev.c > @@ -739,6 +739,137 @@ msix_entry_out: > > return r; > } > + > +static bool msix_mmio_in_range(struct kvm_assigned_dev_kernel *adev, > + gpa_t addr, int len, int *idx) > +{ > + int i; > + > + if (!(adev->irq_requested_type& KVM_DEV_IRQ_HOST_MSIX)) > + return false; Just don't install the io_device in that case. > + BUG_ON(adev->msix_mmio_base == 0); > + for (i = 0; i< adev->entries_nr; i++) { > + u64 start, end; > + start = adev->msix_mmio_base + > + adev->guest_msix_entries[i].entry * PCI_MSIX_ENTRY_SIZE; > + end = start + PCI_MSIX_ENTRY_SIZE; > + if (addr>= start&& addr + len<= end) { > + *idx = i; > + return true; > + } What if it's a partial hit? write part of an entry and part of another entry? > + } > + return false; > +} > + > +static int msix_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, > + void *val) > +{ > + struct kvm_assigned_dev_kernel *adev = > + container_of(this, struct kvm_assigned_dev_kernel, > + msix_mmio_dev); > + int idx, r = 0; > + u32 entry[4]; > + struct kvm_kernel_irq_routing_entry *e; > + > + mutex_lock(&adev->kvm->lock); > + if (!msix_mmio_in_range(adev, addr, len,&idx)) { > + r = -EOPNOTSUPP; > + goto out; > + } > + if ((addr& 0x3) || len != 4) { > + printk(KERN_WARNING > + "KVM: Unaligned reading for device MSI-X MMIO! " > + "addr 0x%llx, len %d\n", addr, len); Guest exploitable printk() > + r = -EOPNOTSUPP; If the guest assigned the device to another guest, it allows the nested guest to kill the non-nested guest. Need to exit in a graceful fashion. > + goto out; > + } > + > + e = kvm_get_irq_routing_entry(adev->kvm, > + adev->guest_msix_entries[idx].vector); > + if (!e || e->type != KVM_IRQ_ROUTING_MSI) { > + printk(KERN_WARNING "KVM: Wrong MSI-X routing entry! " > + "addr 0x%llx, len %d\n", addr, len); > + r = -EOPNOTSUPP; > + goto out; > + } > + entry[0] = e->msi.address_lo; > + entry[1] = e->msi.address_hi; > + entry[2] = e->msi.data; > + entry[3] = !!(adev->guest_msix_entries[idx].flags& > + KVM_ASSIGNED_MSIX_MASK); > + memcpy(val,&entry[addr % PCI_MSIX_ENTRY_SIZE / 4], len); > + > +out: > + mutex_unlock(&adev->kvm->lock); > + return r; > +} > + > +static int msix_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, > + const void *val) > +{ > + struct kvm_assigned_dev_kernel *adev = > + container_of(this, struct kvm_assigned_dev_kernel, > + msix_mmio_dev); > + int idx, r = 0; > + unsigned long new_val = *(unsigned long *)val; > + bool entry_masked; > + > + mutex_lock(&adev->kvm->lock); > + if (!msix_mmio_in_range(adev, addr, len,&idx)) { > + r = -EOPNOTSUPP; > + goto out; > + } > + if ((addr& 0x3) || len != 4) { > + printk(KERN_WARNING > + "KVM: Unaligned writing for device MSI-X MMIO! " > + "addr 0x%llx, len %d, val 0x%lx\n", > + addr, len, new_val); > + r = -EOPNOTSUPP; > + goto out; > + } > + entry_masked = adev->guest_msix_entries[idx].flags& > + KVM_ASSIGNED_MSIX_MASK; > + if (addr % PCI_MSIX_ENTRY_SIZE != PCI_MSIX_ENTRY_VECTOR_CTRL) { > + /* Only allow entry modification when entry was masked */ > + if (!entry_masked) { > + printk(KERN_WARNING > + "KVM: guest try to write unmasked MSI-X entry. " > + "addr 0x%llx, len %d, val 0x%lx\n", > + addr, len, new_val); > + r = 0; What does the spec says about this situation? > + } else > + /* Leave it to QEmu */ s/qemu/userspace/ > + r = -EOPNOTSUPP; What would userspace do in this situation? I hope you documented precisely what the kernel handles and what it doesn't? I prefer more kernel code in the kernel to having an interface which is hard to use correctly. > + goto out; > + } > + if (new_val& ~1ul) { Is there a #define for this bit? > + printk(KERN_WARNING > + "KVM: Bad writing for device MSI-X MMIO! " > + "addr 0x%llx, len %d, val 0x%lx\n", > + addr, len, new_val); > + r = -EOPNOTSUPP; > + goto out; > + } > + if (new_val == 1&& !entry_masked) { > + adev->guest_msix_entries[idx].flags |= > + KVM_ASSIGNED_MSIX_MASK; > + update_msix_mask(adev, idx); > + } else if (new_val == 0&& entry_masked) { > + adev->guest_msix_entries[idx].flags&= > + ~KVM_ASSIGNED_MSIX_MASK; > + update_msix_mask(adev, idx); > + } Ah, I see you do reuse update_msix_mask(). > +out: > + mutex_unlock(&adev->kvm->lock); > + > + return r; > +} > + > +static const struct kvm_io_device_ops msix_mmio_ops = { > + .read = msix_mmio_read, > + .write = msix_mmio_write, > +}; > + > #endif > > long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, -- error compiling committee.c: too many arguments to function