Re: [PATCH 7/8] KVM: assigned dev: Introduce io_device for MSI-X MMIO accessing

public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed

From: Avi Kivity <avi@redhat.com>
To: Sheng Yang <sheng@linux.intel.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>,
	kvm@vger.kernel.org, "Michael S. Tsirkin" <mst@redhat.com>
Subject: Re: [PATCH 7/8] KVM: assigned dev: Introduce io_device for MSI-X MMIO accessing
Date: Wed, 20 Oct 2010 11:46:47 +0200	[thread overview]
Message-ID: <4CBEBA87.3040107@redhat.com> (raw)
In-Reply-To: <1287563192-29685-8-git-send-email-sheng@linux.intel.com>

  On 10/20/2010 10:26 AM, Sheng Yang wrote:
> It would be work with KVM_CAP_DEVICE_MSIX_MASK, which we would enable in the
> last patch.
>
>
> +struct kvm_assigned_msix_mmio {
> +	__u32 assigned_dev_id;
> +	__u64 base_addr;

Different alignment and size on 32 and 64 bits.

Is base_addr a guest physical address?  Do we need a size or it it fixed?

> +	__u32 flags;
> +	__u32 reserved[2];
> +};
> +
>
> @@ -465,6 +465,8 @@ struct kvm_assigned_dev_kernel {
>   	struct pci_dev *dev;
>   	struct kvm *kvm;
>   	spinlock_t assigned_dev_lock;
> +	u64 msix_mmio_base;

gpa_t.

> +	struct kvm_io_device msix_mmio_dev;
>   };
>
>   struct kvm_irq_mask_notifier {
> diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
> index bf96ea7..5d2adc4 100644
> --- a/virt/kvm/assigned-dev.c
> +++ b/virt/kvm/assigned-dev.c
> @@ -739,6 +739,137 @@ msix_entry_out:
>
>   	return r;
>   }
> +
> +static bool msix_mmio_in_range(struct kvm_assigned_dev_kernel *adev,
> +			      gpa_t addr, int len, int *idx)
> +{
> +	int i;
> +
> +	if (!(adev->irq_requested_type&  KVM_DEV_IRQ_HOST_MSIX))
> +		return false;

Just don't install the io_device in that case.

> +	BUG_ON(adev->msix_mmio_base == 0);
> +	for (i = 0; i<  adev->entries_nr; i++) {
> +		u64 start, end;
> +		start = adev->msix_mmio_base +
> +			adev->guest_msix_entries[i].entry * PCI_MSIX_ENTRY_SIZE;
> +		end = start + PCI_MSIX_ENTRY_SIZE;
> +		if (addr>= start&&  addr + len<= end) {
> +			*idx = i;
> +			return true;
> +		}

What if it's a partial hit?  write part of an entry and part of another 
entry?

> +	}
> +	return false;
> +}
> +
> +static int msix_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
> +			  void *val)
> +{
> +	struct kvm_assigned_dev_kernel *adev =
> +			container_of(this, struct kvm_assigned_dev_kernel,
> +				     msix_mmio_dev);
> +	int idx, r = 0;
> +	u32 entry[4];
> +	struct kvm_kernel_irq_routing_entry *e;
> +
> +	mutex_lock(&adev->kvm->lock);
> +	if (!msix_mmio_in_range(adev, addr, len,&idx)) {
> +		r = -EOPNOTSUPP;
> +		goto out;
> +	}
> +	if ((addr&  0x3) || len != 4) {
> +		printk(KERN_WARNING
> +			"KVM: Unaligned reading for device MSI-X MMIO! "
> +			"addr 0x%llx, len %d\n", addr, len);

Guest exploitable printk()

> +		r = -EOPNOTSUPP;

If the guest assigned the device to another guest, it allows the nested 
guest to kill the non-nested guest.  Need to exit in a graceful fashion.

> +		goto out;
> +	}
> +
> +	e = kvm_get_irq_routing_entry(adev->kvm,
> +			adev->guest_msix_entries[idx].vector);
> +	if (!e || e->type != KVM_IRQ_ROUTING_MSI) {
> +		printk(KERN_WARNING "KVM: Wrong MSI-X routing entry! "
> +			"addr 0x%llx, len %d\n", addr, len);
> +		r = -EOPNOTSUPP;
> +		goto out;
> +	}
> +	entry[0] = e->msi.address_lo;
> +	entry[1] = e->msi.address_hi;
> +	entry[2] = e->msi.data;
> +	entry[3] = !!(adev->guest_msix_entries[idx].flags&
> +			KVM_ASSIGNED_MSIX_MASK);
> +	memcpy(val,&entry[addr % PCI_MSIX_ENTRY_SIZE / 4], len);
> +
> +out:
> +	mutex_unlock(&adev->kvm->lock);
> +	return r;
> +}
> +
> +static int msix_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
> +			   const void *val)
> +{
> +	struct kvm_assigned_dev_kernel *adev =
> +			container_of(this, struct kvm_assigned_dev_kernel,
> +				     msix_mmio_dev);
> +	int idx, r = 0;
> +	unsigned long new_val = *(unsigned long *)val;
> +	bool entry_masked;
> +
> +	mutex_lock(&adev->kvm->lock);
> +	if (!msix_mmio_in_range(adev, addr, len,&idx)) {
> +		r = -EOPNOTSUPP;
> +		goto out;
> +	}
> +	if ((addr&  0x3) || len != 4) {
> +		printk(KERN_WARNING
> +			"KVM: Unaligned writing for device MSI-X MMIO! "
> +			"addr 0x%llx, len %d, val 0x%lx\n",
> +			addr, len, new_val);
> +		r = -EOPNOTSUPP;
> +		goto out;
> +	}
> +	entry_masked = adev->guest_msix_entries[idx].flags&
> +			KVM_ASSIGNED_MSIX_MASK;
> +	if (addr % PCI_MSIX_ENTRY_SIZE != PCI_MSIX_ENTRY_VECTOR_CTRL) {
> +		/* Only allow entry modification when entry was masked */
> +		if (!entry_masked) {
> +			printk(KERN_WARNING
> +				"KVM: guest try to write unmasked MSI-X entry. "
> +				"addr 0x%llx, len %d, val 0x%lx\n",
> +				addr, len, new_val);
> +			r = 0;

What does the spec says about this situation?

> +		} else
> +			/* Leave it to QEmu */

s/qemu/userspace/

> +			r = -EOPNOTSUPP;

What would userspace do in this situation?  I hope you documented 
precisely what the kernel handles and what it doesn't?

I prefer more kernel code in the kernel to having an interface which is 
hard to use correctly.

> +		goto out;
> +	}
> +	if (new_val&  ~1ul) {

Is there a #define for this bit?

> +		printk(KERN_WARNING
> +			"KVM: Bad writing for device MSI-X MMIO! "
> +			"addr 0x%llx, len %d, val 0x%lx\n",
> +			addr, len, new_val);
> +		r = -EOPNOTSUPP;
> +		goto out;
> +	}
> +	if (new_val == 1&&  !entry_masked) {
> +		adev->guest_msix_entries[idx].flags |=
> +			KVM_ASSIGNED_MSIX_MASK;
> +		update_msix_mask(adev, idx);
> +	} else if (new_val == 0&&  entry_masked) {
> +		adev->guest_msix_entries[idx].flags&=
> +			~KVM_ASSIGNED_MSIX_MASK;
> +		update_msix_mask(adev, idx);
> +	}

Ah, I see you do reuse update_msix_mask().

> +out:
> +	mutex_unlock(&adev->kvm->lock);
> +
> +	return r;
> +}
> +
> +static const struct kvm_io_device_ops msix_mmio_ops = {
> +	.read     = msix_mmio_read,
> +	.write    = msix_mmio_write,
> +};
> +
>   #endif
>
>   long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,


-- 
error compiling committee.c: too many arguments to function

next prev parent reply	other threads:[~2010-10-20  9:46 UTC|newest]

Thread overview: 66+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-10-20  8:26 [PATCH 0/8][v2] MSI-X mask emulation support for assigned device Sheng Yang
2010-10-20  8:26 ` [PATCH 1/8] PCI: MSI: Move MSI-X entry definition to pci_regs.h Sheng Yang
2010-10-20 11:07   ` Matthew Wilcox
2010-10-20  8:26 ` [PATCH 2/8] irq: Export irq_to_desc() to modules Sheng Yang
2010-10-20  8:26 ` [PATCH 3/8] KVM: x86: Enable ENABLE_CAP capability for x86 Sheng Yang
2010-10-20  8:26 ` [PATCH 4/8] KVM: Move struct kvm_io_device to kvm_host.h Sheng Yang
2010-10-20  8:26 ` [PATCH 5/8] KVM: Add kvm_get_irq_routing_entry() func Sheng Yang
2010-10-20  8:53   ` Avi Kivity
2010-10-20  8:58     ` Sheng Yang
2010-10-20  9:13     ` Sheng Yang
2010-10-20  9:17       ` Sheng Yang
2010-10-20  9:32         ` Avi Kivity
2010-10-20  8:26 ` [PATCH 6/8] KVM: assigned dev: Preparation for mask support in userspace Sheng Yang
2010-10-20  9:30   ` Avi Kivity
2010-10-22 14:53   ` Marcelo Tosatti
2010-10-24 12:19     ` Sheng Yang
2010-10-24 12:23       ` Michael S. Tsirkin
2010-10-28  8:21         ` Sheng Yang
2010-10-20  8:26 ` [PATCH 7/8] KVM: assigned dev: Introduce io_device for MSI-X MMIO accessing Sheng Yang
2010-10-20  9:46   ` Avi Kivity [this message]
2010-10-20 10:33     ` Michael S. Tsirkin
2010-10-21  6:46     ` Sheng Yang
2010-10-21  9:27       ` Avi Kivity
2010-10-21  9:24         ` Michael S. Tsirkin
2010-10-21  9:47           ` Avi Kivity
2010-10-21 10:51             ` Michael S. Tsirkin
2010-10-20 22:35   ` Michael S. Tsirkin
2010-10-21  7:44     ` Sheng Yang
2010-10-20  8:26 ` [PATCH 8/8] KVM: Emulation MSI-X mask bits for assigned devices Sheng Yang
2010-10-20  9:49   ` Avi Kivity
2010-10-20 22:24   ` Michael S. Tsirkin
2010-10-21  8:30   ` Sheng Yang
2010-10-21  8:39     ` Michael S. Tsirkin
2010-10-22  4:42       ` Sheng Yang
2010-10-22 10:17         ` Michael S. Tsirkin
2010-10-22 13:30           ` Sheng Yang
2010-10-22 14:32             ` Michael S. Tsirkin
2010-10-20  9:51 ` [PATCH 0/8][v2] MSI-X mask emulation support for assigned device Avi Kivity
2010-10-20 10:44   ` Michael S. Tsirkin
2010-10-20 10:59     ` Avi Kivity
2010-10-20 13:43       ` Michael S. Tsirkin
2010-10-20 14:58         ` Alex Williamson
2010-10-20 14:58           ` Michael S. Tsirkin
2010-10-20 15:12             ` Alex Williamson
2010-10-20 15:17         ` Avi Kivity
2010-10-20 15:22           ` Alex Williamson
2010-10-20 15:26             ` Avi Kivity
2010-10-20 15:38               ` Alex Williamson
2010-10-20 14:47     ` Alex Williamson
2010-10-20 14:46       ` Michael S. Tsirkin
2010-10-20 15:07         ` Alex Williamson
2010-10-20 15:13           ` Michael S. Tsirkin
2010-10-20 20:13             ` Alex Williamson
2010-10-20 22:06               ` Michael S. Tsirkin
2010-10-20 15:23       ` Avi Kivity
2010-10-20 15:38         ` Alex Williamson
2010-10-20 15:54           ` Avi Kivity
2010-10-20 15:59             ` Michael S. Tsirkin
2010-10-20 16:13               ` Avi Kivity
2010-10-20 17:11                 ` Michael S. Tsirkin
2010-10-20 18:31               ` Alex Williamson
2010-10-21  7:41   ` Sheng Yang
2010-10-20 19:02 ` Marcelo Tosatti
2010-10-21  7:10   ` Sheng Yang
2010-10-21  8:21     ` Michael S. Tsirkin
2010-10-20 22:20 ` Michael S. Tsirkin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4CBEBA87.3040107@redhat.com \
    --to=avi@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=mtosatti@redhat.com \
    --cc=sheng@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox