From mboxrd@z Thu Jan 1 00:00:00 1970 From: Sheng Yang Subject: Re: [PATCH 6/7] KVM: assigned dev: MSI-X mask support Date: Mon, 15 Nov 2010 15:37:21 +0800 Message-ID: <201011151537.22023.sheng@linux.intel.com> References: <1289461620-7055-1-git-send-email-sheng@linux.intel.com> <201011121854.02076.sheng@linux.intel.com> <20101112112516.GC11354@redhat.com> Mime-Version: 1.0 Content-Type: Text/Plain; charset=utf-8 Content-Transfer-Encoding: QUOTED-PRINTABLE Cc: Avi Kivity , Marcelo Tosatti , kvm@vger.kernel.org To: "Michael S. Tsirkin" Return-path: Received: from mga01.intel.com ([192.55.52.88]:60102 "EHLO mga01.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1757584Ab0KOHgu convert rfc822-to-8bit (ORCPT ); Mon, 15 Nov 2010 02:36:50 -0500 In-Reply-To: <20101112112516.GC11354@redhat.com> Sender: kvm-owner@vger.kernel.org List-ID: On Friday 12 November 2010 19:25:16 Michael S. Tsirkin wrote: > On Fri, Nov 12, 2010 at 06:54:01PM +0800, Sheng Yang wrote: > > On Friday 12 November 2010 18:47:29 Michael S. Tsirkin wrote: > > > On Fri, Nov 12, 2010 at 06:13:48PM +0800, Sheng Yang wrote: > > > > On Friday 12 November 2010 17:53:13 Michael S. Tsirkin wrote: > > > > > On Thu, Nov 11, 2010 at 03:46:59PM +0800, Sheng Yang wrote: > > > > > > This patch enable per-vector mask for assigned devices usin= g > > > > > > MSI-X. > > > > > >=20 > > > > > > This patch provided two new APIs: one is for guest to speci= fic > > > > > > device's MSI-X table address in MMIO, the other is for user= space > > > > > > to get information about mask bit. > > > > > >=20 > > > > > > All the mask bit operation are kept in kernel, in order to > > > > > > accelerate. Userspace shouldn't access the device MMIO dire= ctly > > > > > > for the information, instead it should uses provided API to= do > > > > > > so. > > > > > >=20 > > > > > > Signed-off-by: Sheng Yang > > > > > > --- > > > > > >=20 > > > > > > arch/x86/kvm/x86.c | 1 + > > > > > > include/linux/kvm.h | 32 +++++ > > > > > > include/linux/kvm_host.h | 5 + > > > > > > virt/kvm/assigned-dev.c | 316 > > > > > > +++++++++++++++++++++++++++++++++++++++++++++- 4 files > >=20 > > changed, > >=20 > > > > 353 > > > >=20 > > > > > > insertions(+), 1 deletions(-) > > > > > >=20 > > > > > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > > > > > index f3f86b2..847f1e1 100644 > > > > > > --- a/arch/x86/kvm/x86.c > > > > > > +++ b/arch/x86/kvm/x86.c > > > > > > @@ -1926,6 +1926,7 @@ int kvm_dev_ioctl_check_extension(lon= g ext) > > > > > >=20 > > > > > > case KVM_CAP_DEBUGREGS: > > > > > > case KVM_CAP_X86_ROBUST_SINGLESTEP: > > > > > >=20 > > > > > > case KVM_CAP_XSAVE: > > > > > > + case KVM_CAP_MSIX_MASK: > > > > > > r =3D 1; > > > > > > break; > > > > > > =09 > > > > > > case KVM_CAP_COALESCED_MMIO: > > > > > > diff --git a/include/linux/kvm.h b/include/linux/kvm.h > > > > > > index 919ae53..32cd244 100644 > > > > > > --- a/include/linux/kvm.h > > > > > > +++ b/include/linux/kvm.h > > > > > > @@ -540,6 +540,9 @@ struct kvm_ppc_pvinfo { > > > > > >=20 > > > > > > #endif > > > > > > #define KVM_CAP_PPC_GET_PVINFO 57 > > > > > > #define KVM_CAP_PPC_IRQ_LEVEL 58 > > > > > >=20 > > > > > > +#ifdef __KVM_HAVE_MSIX > > > > > > +#define KVM_CAP_MSIX_MASK 59 > > > > > > +#endif > > > > > >=20 > > > > > > #ifdef KVM_CAP_IRQ_ROUTING > > > > > >=20 > > > > > > @@ -671,6 +674,9 @@ struct kvm_clock_data { > > > > > >=20 > > > > > > #define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struc= t > > > > > > kvm_xen_hvm_config) #define KVM_SET_CLOCK =20 > > > > > > _IOW(KVMIO, 0x7b, struct kvm_clock_data) #define KVM_GET_C= LOCK > > > > > > _IOR(KVMIO, 0x7c, struct kvm_clock_data) > > > > > >=20 > > > > > > +/* Available with KVM_CAP_MSIX_MASK */ > > > > > > +#define KVM_GET_MSIX_ENTRY _IOWR(KVMIO, 0x7d, stru= ct > > > > > > kvm_msix_entry) +#define KVM_UPDATE_MSIX_MMIO _IOW(KVM= IO,=20 > > > > > > 0x7e, struct kvm_msix_mmio) > > > > > >=20 > > > > > > /* Available with KVM_CAP_PIT_STATE2 */ > > > > > > #define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struc= t > > > > > > kvm_pit_state2) #define KVM_SET_PIT2 _IOW(KVM= IO,=20 > > > > > > 0xa0, struct kvm_pit_state2) > > > > > >=20 > > > > > > @@ -794,4 +800,30 @@ struct kvm_assigned_msix_entry { > > > > > >=20 > > > > > > __u16 padding[3]; > > > > > > =20 > > > > > > }; > > > > > >=20 > > > > > > +#define KVM_MSIX_TYPE_ASSIGNED_DEV 1 > > > > > > + > > > > > > +#define KVM_MSIX_FLAG_MASKBIT (1 << 0) > > > > > > +#define KVM_MSIX_FLAG_QUERY_MASKBIT (1 << 0) > > > > > > + > > > > > > +struct kvm_msix_entry { > > > > > > + __u32 id; > > > > > > + __u32 type; > > > > > > + __u32 entry; /* The index of entry in the MSI-X table */ > > > > > > + __u32 flags; > > > > > > + __u32 query_flags; > > > > > > + __u32 reserved[5]; > > > > > > +}; > > > > > > + > > > > > > +#define KVM_MSIX_MMIO_FLAG_REGISTER (1 << 0) > > > > > > +#define KVM_MSIX_MMIO_FLAG_UNREGISTER (1 << 1) > > > > > > + > > > > > > +struct kvm_msix_mmio { > > > > > > + __u32 id; > > > > > > + __u32 type; > > > > > > + __u64 base_addr; > > > > > > + __u32 max_entries_nr; > > > > > > + __u32 flags; > > > > > > + __u32 reserved[6]; > > > > > > +}; > > > > > > + > > > > > >=20 > > > > > > #endif /* __LINUX_KVM_H */ > > > > > >=20 > > > > > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_h= ost.h > > > > > > index 4b31539..9d49074 100644 > > > > > > --- a/include/linux/kvm_host.h > > > > > > +++ b/include/linux/kvm_host.h > > > > > > @@ -445,6 +445,7 @@ struct kvm_guest_msix_entry { > > > > > >=20 > > > > > > }; > > > > > > =20 > > > > > > #define KVM_ASSIGNED_ENABLED_IOMMU (1 << 0) > > > > > >=20 > > > > > > +#define KVM_ASSIGNED_ENABLED_MSIX_MMIO (1 << 1) > > > > > >=20 > > > > > > struct kvm_assigned_dev_kernel { > > > > > > =20 > > > > > > struct kvm_irq_ack_notifier ack_notifier; > > > > > > struct work_struct interrupt_work; > > > > > >=20 > > > > > > @@ -465,6 +466,10 @@ struct kvm_assigned_dev_kernel { > > > > > >=20 > > > > > > struct pci_dev *dev; > > > > > > struct kvm *kvm; > > > > > > spinlock_t assigned_dev_lock; > > > > > >=20 > > > > > > + DECLARE_BITMAP(msix_mask_bitmap, KVM_MAX_MSIX_PER_DEV); > > > > > > + gpa_t msix_mmio_base; > > > > > > + struct kvm_io_device msix_mmio_dev; > > > > > > + int msix_max_entries_nr; > > > > > >=20 > > > > > > }; > > > > > > =20 > > > > > > struct kvm_irq_mask_notifier { > > > > > >=20 > > > > > > diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-de= v.c > > > > > > index 5c6b96d..3010d7d 100644 > > > > > > --- a/virt/kvm/assigned-dev.c > > > > > > +++ b/virt/kvm/assigned-dev.c > > > > > > @@ -226,12 +226,27 @@ static void kvm_free_assigned_irq(str= uct > > > > > > kvm *kvm, > > > > > >=20 > > > > > > kvm_deassign_irq(kvm, assigned_dev, > > > > > > assigned_dev->irq_requested_type); > > > > > > =20 > > > > > > } > > > > > >=20 > > > > > > +static void unregister_msix_mmio(struct kvm *kvm, > > > > > > + struct kvm_assigned_dev_kernel *adev) > > > > > > +{ > > > > > > + if (adev->flags & KVM_ASSIGNED_ENABLED_MSIX_MMIO) { > > > > > > + mutex_lock(&kvm->slots_lock); > > > > > > + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, > > > > > > + &adev->msix_mmio_dev); > > > > > > + mutex_unlock(&kvm->slots_lock); > > > > > > + adev->flags &=3D ~KVM_ASSIGNED_ENABLED_MSIX_MMIO; > > > > > > + } > > > > > > +} > > > > > > + > > > > > >=20 > > > > > > static void kvm_free_assigned_device(struct kvm *kvm, > > > > > > =20 > > > > > > struct kvm_assigned_dev_kernel > > > > > > *assigned_dev) > > > > > > =20 > > > > > > { > > > > > > =20 > > > > > > kvm_free_assigned_irq(kvm, assigned_dev); > > > > > >=20 > > > > > > +#ifdef __KVM_HAVE_MSIX > > > > > > + unregister_msix_mmio(kvm, assigned_dev); > > > > > > +#endif > > > > > >=20 > > > > > > pci_reset_function(assigned_dev->dev); > > > > > > =09 > > > > > > pci_release_regions(assigned_dev->dev); > > > > > >=20 > > > > > > @@ -504,7 +519,7 @@ out: > > > > > > static int kvm_vm_ioctl_assign_device(struct kvm *kvm, > > > > > > =20 > > > > > > struct kvm_assigned_pci_dev *assigned_dev) > > > > > > =20 > > > > > > { > > > > > >=20 > > > > > > - int r =3D 0, idx; > > > > > > + int r =3D 0, idx, i; > > > > > >=20 > > > > > > struct kvm_assigned_dev_kernel *match; > > > > > > struct pci_dev *dev; > > > > > >=20 > > > > > > @@ -564,6 +579,10 @@ static int kvm_vm_ioctl_assign_device(= struct > > > > > > kvm *kvm, > > > > > >=20 > > > > > > list_add(&match->list, &kvm->arch.assigned_dev_head); > > > > > >=20 > > > > > > + /* The state after reset of MSI-X table is all masked */ > > > > > > + for (i =3D 0; i < KVM_MAX_MSIX_PER_DEV; i++) > > > > > > + set_bit(i, match->msix_mask_bitmap); > > > > > > + > > > > > >=20 > > > > > > if (assigned_dev->flags & KVM_ASSIGNED_ENABLED_IOMMU) { > > > > > > =09 > > > > > > if (!kvm->arch.iommu_domain) { > > > > > > =09 > > > > > > r =3D kvm_iommu_map_guest(kvm); > > > > > >=20 > > > > > > @@ -667,6 +686,43 @@ msix_nr_out: > > > > > > return r; > > > > > > =20 > > > > > > } > > > > > >=20 > > > > > > +static void update_msix_mask(struct kvm_assigned_dev_kerne= l > > > > > > *adev, + int idx, bool new_mask_flag) > > > > > > +{ > > > > > > + int irq; > > > > > > + bool old_mask_flag, need_flush =3D false; > > > > > > + > > > > > > + spin_lock_irq(&adev->assigned_dev_lock); > > > > > > + > > > > > > + if (!adev->dev->msix_enabled || > > > > > > + !(adev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX)) > > > > > > + goto out; > > > > > > + > > > > > > + old_mask_flag =3D test_bit(adev->guest_msix_entries[idx].= entry, > > > > > > + adev->msix_mask_bitmap); > > > > > > + if (old_mask_flag =3D=3D new_mask_flag) > > > > > > + goto out; > > > > > > + > > > > > > + irq =3D adev->host_msix_entries[idx].vector; > > > > > > + BUG_ON(irq =3D=3D 0); > > > > > > + > > > > > > + if (new_mask_flag) { > > > > > > + set_bit(adev->guest_msix_entries[idx].entry, > > > > > > + adev->msix_mask_bitmap); > > > > > > + disable_irq_nosync(irq); > > > > > > + need_flush =3D true; > > > > > > + } else { > > > > > > + clear_bit(adev->guest_msix_entries[idx].entry, > > > > > > + adev->msix_mask_bitmap); > > > > > > + enable_irq(irq); > > > > > > + } > > > > > > +out: > > > > > > + spin_unlock_irq(&adev->assigned_dev_lock); > > > > > > + > > > > > > + if (need_flush) > > > > > > + flush_work(&adev->interrupt_work); > > > > > > +} > > > > > > + > > > > > >=20 > > > > > > static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, > > > > > > =20 > > > > > > struct kvm_assigned_msix_entry *entry) > > > > > > =20 > > > > > > { > > > > > >=20 > > > > > > @@ -701,6 +757,233 @@ msix_entry_out: > > > > > > return r; > > > > > > =20 > > > > > > } > > > > > >=20 > > > > > > + > > > > > > +static int kvm_vm_ioctl_get_msix_entry(struct kvm *kvm, > > > > > > + struct kvm_msix_entry *entry) > > > > > > +{ > > > > > > + int r =3D 0; > > > > > > + struct kvm_assigned_dev_kernel *adev; > > > > > > + > > > > > > + if (entry->type !=3D KVM_MSIX_TYPE_ASSIGNED_DEV) > > > > > > + return -EINVAL; > > > > > > + > > > > > > + if (!entry->query_flags) > > > > > > + return -EINVAL; > > > > > > + > > > > > > + mutex_lock(&kvm->lock); > > > > > > + > > > > > > + adev =3D kvm_find_assigned_dev(&kvm->arch.assigned_dev_he= ad, > > > > > > + entry->id); > > > > > > + > > > > > > + if (!adev) { > > > > > > + r =3D -EINVAL; > > > > > > + goto out; > > > > > > + } > > > > > > + > > > > > > + if (entry->entry >=3D adev->msix_max_entries_nr) { > > > > > > + r =3D -ENOSPC; > > > > > > + goto out; > > > > > > + } > > > > > > + > > > > > > + if (entry->query_flags & KVM_MSIX_FLAG_QUERY_MASKBIT) { > > > > > > + if (test_bit(entry->entry, adev->msix_mask_bitmap)) > > > > > > + entry->flags |=3D KVM_MSIX_FLAG_MASKBIT; > > > > > > + else > > > > > > + entry->flags &=3D ~KVM_MSIX_FLAG_MASKBIT; > > > > > > + } > > > > > > + > > > > > > +out: > > > > > > + mutex_unlock(&kvm->lock); > > > > > > + > > > > > > + return r; > > > > > > +} > > > > > > + > > > > > > +static bool msix_mmio_in_range(struct kvm_assigned_dev_ker= nel > > > > > > *adev, + gpa_t addr, int len) > > > > > > +{ > > > > > > + gpa_t start, end; > > > > > > + > > > > > > + BUG_ON(adev->msix_mmio_base =3D=3D 0); > > > > >=20 > > > > > I thought we wanted to use a separate flag for this? > > > >=20 > > > > O, flag added, but forgot to update this one. > > > >=20 > > > > > > + start =3D adev->msix_mmio_base; > > > > > > + end =3D adev->msix_mmio_base + PCI_MSIX_ENTRY_SIZE * > > > > > > + adev->msix_max_entries_nr; > > > > > > + if (addr >=3D start && addr + len <=3D end) > > > > > > + return true; > > > > > > + > > > > > > + return false; > > > > > > +} > > > > > > + > > > > > > +static int msix_get_enabled_idx(struct kvm_assigned_dev_ke= rnel > > > > > > *adev, + gpa_t addr, int len) > > > > > > +{ > > > > > > + int i, index =3D (addr - adev->msix_mmio_base) / > > > > > > PCI_MSIX_ENTRY_SIZE; + > > > > > > + for (i =3D 0; i < adev->entries_nr; i++) > > > > > > + if (adev->guest_msix_entries[i].entry =3D=3D index) > > > > > > + return i; > > > > > > + > > > > > > + return -EINVAL; > > > > > > +} > > > > > > + > > > > >=20 > > > > > Hmm, we still have a linear scan on each write. Is the point= to > > > > > detect which entries need to be handled in kernel? Maybe anot= her > > > > > bitmap for this? Or just handle whole entry write in kernel = as > > > > > well, then we won't need this at all. > > > >=20 > > > > Still dont' like handling the whole entry writing. In fact here= we > > > > have two questions: > > > > 1. If the entry already enabled > > > > 2. If it is, then what's it sequence number. > > > >=20 > > > > Bitmap can solve question 1, but we still need O(n) scan for th= e > > > > second. So I think leave it like this should be fine. > > >=20 > > > The spec quoted above implies we must handle all entry writes: > > > a single write can update mask and data. > >=20 > > Anyway it's the work to be done by QEmu, if we want to cover the > > situation that signle write can update mask and data - that's surel= y > > QWORD, which hasn't been supported yet. >=20 > Must pass the transactions to qemu then :) >=20 > > We can back to them if there is someone really did it in that way. = But > > for all hypervisors using QEmu, I think we haven't seen such kind o= f > > behavior yet. >=20 > I would rather stick to the spec than go figure out what do BSD/Sun/M= ac do, > or will do. Sure, but no hurry for that. It doesn't similar to the API case, so we = can achieve=20 it incrementally. >=20 > > So we > > can leave it later. > >=20 > > -- > > regards > > Yang, Sheng > >=20 > > > > > > +static int msix_mmio_read(struct kvm_io_device *this, gpa_= t > > > > > > addr, int len, + void *val) > > > > > > +{ > > > > > > + struct kvm_assigned_dev_kernel *adev =3D > > > > > > + container_of(this, struct kvm_assigned_dev_kernel, > > > > > > + msix_mmio_dev); > > > > > > + int idx, r =3D 0; > > > > > > + u32 entry[4]; > > > > > > + struct kvm_kernel_irq_routing_entry e; > > > > > > + > > > > > > + mutex_lock(&adev->kvm->lock); > > > > > > + if (!msix_mmio_in_range(adev, addr, len)) { > > > > > > + r =3D -EOPNOTSUPP; > > > > > > + goto out; > > > > > > + } > > > > > > + if ((addr & 0x3) || len !=3D 4) > > > > > > + goto out; > > > > > > + > > > > > > + idx =3D msix_get_enabled_idx(adev, addr, len); > > > > > > + if (idx < 0) { > > > > > > + idx =3D (addr - adev->msix_mmio_base) / PCI_MSIX_ENTRY_S= IZE; > > > > > > + if ((addr % PCI_MSIX_ENTRY_SIZE) =3D=3D > > > > > > + PCI_MSIX_ENTRY_VECTOR_CTRL) > > > > > > + *(unsigned long *)val =3D > > > > > > + test_bit(idx, adev->msix_mask_bitmap) ? > > > > > > + PCI_MSIX_ENTRY_CTRL_MASKBIT : 0; > > > > > > + else > > > > > > + r =3D -EOPNOTSUPP; > > > > > > + goto out; > > > > > > + } > > > > > > + > > > > > > + r =3D kvm_get_irq_routing_entry(adev->kvm, > > > > > > + adev->guest_msix_entries[idx].vector, &e); > > > > > > + if (r || e.type !=3D KVM_IRQ_ROUTING_MSI) { > > > > > > + r =3D -EOPNOTSUPP; > > > > > > + goto out; > > > > > > + } > > > > > > + entry[0] =3D e.msi.address_lo; > > > > > > + entry[1] =3D e.msi.address_hi; > > > > > > + entry[2] =3D e.msi.data; > > > > > > + entry[3] =3D test_bit(adev->guest_msix_entries[idx].entry= , > > > > > > + adev->msix_mask_bitmap); > > > > > > + memcpy(val, &entry[addr % PCI_MSIX_ENTRY_SIZE / sizeof *e= ntry], > > > > > > len); > > > > >=20 > > > > > It seems weird to pass writes to userspace but do reads > > > > > in kernel. > > > > > Either we should support entry read and write or neither. > > > >=20 > > > > Entry read is for speed up, because kernel would write to the m= ask > > > > bit then try to read from it for flushing. >=20 > What I see linux doing is reading the 1st entry, always. > So this means we must handle all table reads in kernel, > we can't pass any to userspace. Anyway, only new kernels do this, > presumably they touch msix table rarely so it should not matter > for performance? This change is still due to the performance issue we identified. The ca= se is when=20 many SRIOV VF running on one machine with new kernel, the interrupt wou= ld also be=20 very frequently then host may decide to mask someone temporarily. This = behavior=20 can also be very frequently.=20 >=20 > > Don't think reading matter much > >=20 > > > > here, as long as userspace still own routing table. >=20 > Why insist on using the routing table then? Since we handle all of t= he > entry for reads, let's do it for writes too? > The only justification for the split was that supposedly registers we= re > always programmed separately. But now we see that spoec does not mand= ate > this. My point is, I don't really care if we are using routing table. It's no= more than=20 provide some data/address pair in this case. But I do want to push the = patch. The=20 patch here is only need data/address pair, and now routing table provid= e it. If=20 you want to change this, you can modify it after later. But it got noth= ing to do=20 with the current patch's logic. You can simply modify it as well, as yo= u needed in=20 the future. >=20 > > > > > > + > > > > > > +out: > > > > > > + mutex_unlock(&adev->kvm->lock); > > > > > > + return r; > > > > > > +} > > > > > > + > > > > > > +static int msix_mmio_write(struct kvm_io_device *this, gpa= _t > > > > > > addr, int len, + const void *val) > > > > > > +{ > > > > > >=20 > > > > > > + struct kvm_assigned_dev_kernel *adev =3D > > > > > > + container_of(this, struct kvm_assigned_dev_kernel, > > > > > > + msix_mmio_dev); > > > > > > + int idx, r =3D 0; > > > > > > + unsigned long new_val =3D *(unsigned long *)val; > > > > >=20 > > > > > Move this to after you did length and alignment checks, > > > > > it might not be safe here. > > > > >=20 > > > > > > + > > > > > > + mutex_lock(&adev->kvm->lock); > > > > > > + if (!msix_mmio_in_range(adev, addr, len)) { > > > > > > + r =3D -EOPNOTSUPP; > > > > > > + goto out; > > > > > > + } > > > > > > + if ((addr & 0x3) || len !=3D 4) > > > > >=20 > > > > > The lenght could be 8: > > > > >=20 > > > > > For all accesses to MSI-X Table and MSI-X PBA fields, softwar= e must > > > > > use aligned full DWORD or aligned full QWORD transactions; > > > > > otherwise, the result is undefined. > > > > >=20 > > > > > and > > > > >=20 > > > > > Software is permitted to fill in MSI-X Table entry DWORD fiel= ds > > > > > individually with DWORD writes, or software in certain cases = is > > > > > permitted to fill in appropriate pairs of DWORDs with a singl= e > > > > > QWORD write. Specifically, software is always permitted to fi= ll in > > > > > the Message Address and Message Upper Address fields with a s= ingle > > > > > QWORD write. If a given entry is currently masked (via its Ma= sk > > > > > bit or the Function Mask bit), software is permitted to fill = in > > > > > the Message Data and Vector Control fields with a single QWOR= D > > > > > write, taking advantage of the fact the Message Data field is > > > > > guaranteed to become visible to hardware no later than the Ve= ctor > > > > > Control field. However, if software wishes to mask a currentl= y > > > > > unmasked entry (without setting the Function Mask bit), softw= are > > > > > must set the entry=E2=80=99s Mask bit using a DWORD write to = the Vector > > > > > Control field, since performing a QWORD write to the Message = Data > > > > > and Vector Control fields might result in the Message Data fi= eld > > > > > being modified before the Mask bit in the Vector Control fiel= d > > > > > becomes set. > > > >=20 > > > > Haven't seen any device use QWORD accessing. Also QEmu can't ha= ndle > > > > QWORD MMIO as well. >=20 > That's a userspace bug. Not sure whether this justifies copyng the b= ug > in kernel. We just haven't implemented the full spec, but we can still cover all k= nown=20 situation now. So we can work on that later, incrementally. >=20 > > > > So I guess we can leave it later. >=20 > Maybe what we can do is handle the mask in kernel + > pass to userspace to handle the address/vector update? Maybe, and we can do that later.=20 It's somehow like the customer's demand keeping changing all the time, = but we have=20 to do something first and somether later, and cut some release in betwe= en. It would=20 be incremental, not completely 0 or completely 1. I'd like to have some= code base=20 checked in first. -- regards Yang, Sheng