From mboxrd@z Thu Jan 1 00:00:00 1970 From: Avi Kivity Subject: Re: [PATCH 2/2][RFC] KVM: Emulate MSI-X table and PBA in kernel Date: Tue, 28 Dec 2010 14:26:13 +0200 Message-ID: <4D19D765.7080507@redhat.com> References: <1293007495-32325-1-git-send-email-sheng@linux.intel.com> <1293007495-32325-3-git-send-email-sheng@linux.intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Cc: Marcelo Tosatti , "Michael S. Tsirkin" , kvm@vger.kernel.org, Alex Williamson To: Sheng Yang Return-path: Received: from mx1.redhat.com ([209.132.183.28]:53508 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753394Ab0L1M1d (ORCPT ); Tue, 28 Dec 2010 07:27:33 -0500 In-Reply-To: <1293007495-32325-3-git-send-email-sheng@linux.intel.com> Sender: kvm-owner@vger.kernel.org List-ID: On 12/22/2010 10:44 AM, Sheng Yang wrote: > Then we can support mask bit operation of assigned devices now. > > > @@ -3817,14 +3819,16 @@ static int emulator_write_emulated_onepage(unsigned long addr, > > mmio: > trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val); > + r = vcpu_mmio_write(vcpu, gpa, bytes, val); > /* > * Is this MMIO handled locally? > */ > - if (!vcpu_mmio_write(vcpu, gpa, bytes, val)) > + if (!r) > return X86EMUL_CONTINUE; > > vcpu->mmio_needed = 1; > - vcpu->run->exit_reason = KVM_EXIT_MMIO; > + vcpu->run->exit_reason = (r == -ENOTSYNC) ? > + KVM_EXIT_MSIX_ROUTING_UPDATE : KVM_EXIT_MMIO; This isn't very pretty, exit_reason should be written in vcpu_mmio_write(). I guess we can refactor it later. > > +#define KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV (1<< 0) > + > +#define KVM_MSIX_MMIO_TYPE_BASE_TABLE (1<< 8) > +#define KVM_MSIX_MMIO_TYPE_BASE_PBA (1<< 9) > + > +#define KVM_MSIX_MMIO_TYPE_DEV_MASK 0x00ff > +#define KVM_MSIX_MMIO_TYPE_BASE_MASK 0xff00 Any explanation of these? > +struct kvm_msix_mmio_user { > + __u32 dev_id; > + __u16 type; > + __u16 max_entries_nr; > + __u64 base_addr; > + __u64 base_va; > + __u64 flags; > + __u64 reserved[4]; > +}; > + > > > +int kvm_assigned_device_update_msix_mask_bit(struct kvm *kvm, > + int assigned_dev_id, int entry, u32 flag) > +{ Need a better name for 'flag' (and make it a bool). > + int r = -EFAULT; > + struct kvm_assigned_dev_kernel *adev; > + int i; > + > + if (!irqchip_in_kernel(kvm)) > + return r; > + > + mutex_lock(&kvm->lock); > + adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, > + assigned_dev_id); > + if (!adev) > + goto out; > + > + for (i = 0; i< adev->entries_nr; i++) > + if (adev->host_msix_entries[i].entry == entry) { > + if (flag) > + disable_irq_nosync( > + adev->host_msix_entries[i].vector); > + else > + enable_irq(adev->host_msix_entries[i].vector); > + r = 0; > + break; > + } > +out: > + mutex_unlock(&kvm->lock); > + return r; > +} > > @@ -1988,6 +2008,12 @@ static int kvm_dev_ioctl_create_vm(void) > return r; > } > #endif > + r = kvm_register_msix_mmio_dev(kvm); > + if (r< 0) { > + kvm_put_kvm(kvm); > + return r; > + } Shouldn't this be part of individual KVM_REGISTER_MSIX_MMIO calls? > +static int msix_table_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, > + void *val) > +{ > + struct kvm_msix_mmio_dev *mmio_dev = > + container_of(this, struct kvm_msix_mmio_dev, table_dev); > + struct kvm_msix_mmio *mmio; > + int idx, ret = 0, entry, offset, r; > + > + mutex_lock(&mmio_dev->lock); > + idx = get_mmio_table_index(mmio_dev, addr, len); > + if (idx< 0) { > + ret = -EOPNOTSUPP; > + goto out; > + } > + if ((addr& 0x3) || (len != 4&& len != 8)) > + goto out; What about (addr & 4) && (len == 8)? Is it supported? It may cross entry boundaries. > + mmio =&mmio_dev->mmio[idx]; > + > + entry = (addr - mmio->table_base_addr) / PCI_MSIX_ENTRY_SIZE; > + offset = addr& 0xf; > + r = copy_from_user(val, (void *)(mmio->table_base_va + > + entry * PCI_MSIX_ENTRY_SIZE + offset), len); > + if (r) > + goto out; > +out: > + mutex_unlock(&mmio_dev->lock); > + return ret; > +} > + > +static int msix_table_mmio_write(struct kvm_io_device *this, gpa_t addr, > + int len, const void *val) > +{ > + struct kvm_msix_mmio_dev *mmio_dev = > + container_of(this, struct kvm_msix_mmio_dev, table_dev); > + struct kvm_msix_mmio *mmio; > + int idx, entry, offset, ret = 0, r = 0; > + gpa_t entry_base; > + u32 old_ctrl, new_ctrl; > + > + mutex_lock(&mmio_dev->lock); > + idx = get_mmio_table_index(mmio_dev, addr, len); > + if (idx< 0) { > + ret = -EOPNOTSUPP; > + goto out; > + } > + if ((addr& 0x3) || (len != 4&& len != 8)) > + goto out; > + mmio =&mmio_dev->mmio[idx]; > + entry = (addr - mmio->table_base_addr) / PCI_MSIX_ENTRY_SIZE; > + entry_base = mmio->table_base_va + entry * PCI_MSIX_ENTRY_SIZE; > + offset = addr& 0xF; > + > + if (copy_from_user(&old_ctrl, > + entry_base + PCI_MSIX_ENTRY_VECTOR_CTRL, > + sizeof old_ctrl)) > + goto out; get_user() is easier. > + > + /* No allow writing to other fields when entry is unmasked */ > + if (!(old_ctrl& PCI_MSIX_ENTRY_CTRL_MASKBIT)&& > + offset != PCI_MSIX_ENTRY_VECTOR_CTRL) > + goto out; > + > + if (copy_to_user(entry_base + offset, val, len)) > + goto out; > + > + if (copy_from_user(&new_ctrl, > + entry_base + PCI_MSIX_ENTRY_VECTOR_CTRL, > + sizeof new_ctrl)) > + goto out; put_user() > + > + if ((offset< PCI_MSIX_ENTRY_VECTOR_CTRL&& len == 4) || > + (offset< PCI_MSIX_ENTRY_DATA&& len == 8)) > + ret = -ENOTSYNC; > + if (old_ctrl == new_ctrl) > + goto out; > + if (!(old_ctrl& PCI_MSIX_ENTRY_CTRL_MASKBIT)&& > + (new_ctrl& PCI_MSIX_ENTRY_CTRL_MASKBIT)) > + r = update_msix_mask_bit(mmio_dev->kvm, mmio, entry, 1); > + else if ((old_ctrl& PCI_MSIX_ENTRY_CTRL_MASKBIT)&& > + !(new_ctrl& PCI_MSIX_ENTRY_CTRL_MASKBIT)) > + r = update_msix_mask_bit(mmio_dev->kvm, mmio, entry, 0); > + if (r || ret) > + ret = -ENOTSYNC; > +out: > + mutex_unlock(&mmio_dev->lock); > + return ret; > +} blank line... > +static const struct kvm_io_device_ops msix_mmio_table_ops = { > + .read = msix_table_mmio_read, > + .write = msix_table_mmio_write, > +}; > + > ++ > +int kvm_vm_ioctl_register_msix_mmio(struct kvm *kvm, > + struct kvm_msix_mmio_user *mmio_user) > +{ > + struct kvm_msix_mmio_dev *mmio_dev =&kvm->msix_mmio_dev; > + struct kvm_msix_mmio *mmio = NULL; > + int r = 0, i; > + > + mutex_lock(&mmio_dev->lock); > + for (i = 0; i< mmio_dev->mmio_nr; i++) { > + if (mmio_dev->mmio[i].dev_id == mmio_user->dev_id&& > + (mmio_dev->mmio[i].type& KVM_MSIX_MMIO_TYPE_DEV_MASK) == > + (mmio_user->type& KVM_MSIX_MMIO_TYPE_DEV_MASK)) { > + mmio =&mmio_dev->mmio[i]; > + if (mmio->max_entries_nr != mmio_user->max_entries_nr) { > + r = -EINVAL; > + goto out; > + } > + break; > + } > + } > + if (!mmio) { > + if (mmio_dev->mmio_nr == KVM_MSIX_MMIO_MAX) { > + r = -ENOSPC; > + goto out; > + } > + mmio =&mmio_dev->mmio[mmio_dev->mmio_nr]; > + mmio_dev->mmio_nr++; > + } > + mmio->max_entries_nr = mmio_user->max_entries_nr; Sanity check to avoid overflow. > + mmio->dev_id = mmio_user->dev_id; > + mmio->flags = mmio_user->flags; Check for unsupported bits (all of them at present?) > + if ((mmio_user->type& KVM_MSIX_MMIO_TYPE_DEV_MASK) == > + KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV) > + mmio->type = KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV; > + > + if ((mmio_user->type& KVM_MSIX_MMIO_TYPE_BASE_MASK) == > + KVM_MSIX_MMIO_TYPE_BASE_TABLE) { > + mmio->type |= KVM_MSIX_MMIO_TYPE_BASE_TABLE; > + mmio->table_base_addr = mmio_user->base_addr; > + mmio->table_base_va = mmio_user->base_va; Check for va in kernel space. > + } else if ((mmio_user->type& KVM_MSIX_MMIO_TYPE_BASE_MASK) == > + KVM_MSIX_MMIO_TYPE_BASE_PBA) { > + mmio->type |= KVM_MSIX_MMIO_TYPE_BASE_PBA; > + mmio->pba_base_addr = mmio_user->base_addr; > + mmio->pba_base_va = mmio_user->base_va; > + } > +out: > + mutex_unlock(&mmio_dev->lock); > + return r; > +} > + > + In all, looks reasonable. I'd like to see documentation for it, and review from the pci people. Alex, mst? -- error compiling committee.c: too many arguments to function