All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sheng Yang <sheng@linux.intel.com>
To: Avi Kivity <avi@redhat.com>,
	Marcelo Tosatti <mtosatti@redhat.com>,
	"Michael S. Tsirkin" <mst@redhat.com>
Cc: kvm@vger.kernel.org, Sheng Yang <sheng@linux.intel.com>
Subject: [PATCH 4/5] qemu-kvm: device assignment: emulate MSI-X mask bits
Date: Thu, 11 Nov 2010 15:51:33 +0800	[thread overview]
Message-ID: <1289461894-7355-5-git-send-email-sheng@linux.intel.com> (raw)
In-Reply-To: <1289461894-7355-1-git-send-email-sheng@linux.intel.com>

This patch emulated MSI-X per vector mask bit on assigned device.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 hw/device-assignment.c |  167 +++++++++++++++++++++++++++++++++++++++++++++---
 hw/device-assignment.h |    2 +-
 2 files changed, 158 insertions(+), 11 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 5fe4a55..44b9d43 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -62,6 +62,11 @@ static void assigned_dev_load_option_rom(AssignedDevice *dev);
 
 static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev);
 
+static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn)
+{
+    return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn;
+}
+
 static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region,
                                        uint32_t addr, int len, uint32_t *val)
 {
@@ -264,6 +269,10 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num,
     AssignedDevRegion *region = &r_dev->v_addrs[region_num];
     PCIRegion *real_region = &r_dev->real_device.regions[region_num];
     int ret = 0;
+#ifdef KVM_CAP_MSIX_MASK
+    int cap_mask = kvm_check_extension(kvm_state, KVM_CAP_MSIX_MASK);
+    struct kvm_msix_mmio msix_mmio;
+#endif
 
     DEBUG("e_phys=%08" FMT_PCIBUS " r_virt=%p type=%d len=%08" FMT_PCIBUS " region_num=%d \n",
           e_phys, region->u.r_virtbase, type, e_size, region_num);
@@ -282,6 +291,21 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num,
 
             cpu_register_physical_memory(e_phys + offset,
                     TARGET_PAGE_SIZE, r_dev->mmio_index);
+
+#ifdef KVM_CAP_MSIX_MASK
+            if (cap_mask) {
+                memset(&msix_mmio, 0, sizeof msix_mmio);
+                msix_mmio.id = calc_assigned_dev_id(r_dev->h_segnr,
+                        r_dev->h_busnr, r_dev->h_devfn);
+                msix_mmio.type = KVM_MSIX_TYPE_ASSIGNED_DEV;
+                msix_mmio.base_addr = e_phys + offset;
+                msix_mmio.max_entries_nr = r_dev->max_msix_entries_nr;
+                msix_mmio.flags = KVM_MSIX_MMIO_FLAG_REGISTER;
+                ret = kvm_update_msix_mmio(kvm_context, &msix_mmio);
+                if (ret)
+                    fprintf(stderr, "fail to register in-kernel msix_mmio!\n");
+            }
+#endif
         }
     }
 
@@ -824,11 +848,6 @@ static void free_assigned_device(AssignedDevice *dev)
     }
 }
 
-static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn)
-{
-    return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn;
-}
-
 static void assign_failed_examine(AssignedDevice *dev)
 {
     char name[PATH_MAX], dir[PATH_MAX], driver[PATH_MAX] = {}, *ns;
@@ -1125,6 +1144,34 @@ static int get_msix_entries_max_nr(AssignedDevice *adev)
     return entries_max_nr;
 }
 
+static int assigned_dev_msix_entry_masked(AssignedDevice *adev, int entry)
+{
+    uint32_t msg_ctrl;
+    void *va = adev->msix_table_page;
+#ifdef KVM_CAP_MSIX_MASK
+    int cap_mask = kvm_check_extension(kvm_state, KVM_CAP_MSIX_MASK);
+    struct kvm_msix_entry msix_entry;
+    int r;
+
+    if (cap_mask) {
+        memset(&msix_entry, 0, sizeof msix_entry);
+        msix_entry.id = calc_assigned_dev_id(adev->h_segnr,
+                adev->h_busnr, (uint8_t)adev->h_devfn);
+        msix_entry.type = KVM_MSIX_TYPE_ASSIGNED_DEV;
+        msix_entry.entry = entry;
+        msix_entry.query_flags = KVM_MSIX_FLAG_QUERY_MASKBIT;
+        r = kvm_get_msix_entry(kvm_context, &msix_entry);
+        if (r) {
+            DEBUG("Fail to get mask bit of entry %d\n", entry);
+            return 1;
+        }
+        return (msix_entry.flags & KVM_MSIX_FLAG_MASKBIT);
+    }
+#endif
+    memcpy(&msg_ctrl, va + entry * 16 + 12, 4);
+    return (msg_ctrl & PCI_MSIX_CTRL_MASKBIT);
+}
+
 static int get_msix_valid_entries_nr(AssignedDevice *adev,
 				     uint16_t entries_max_nr)
 {
@@ -1137,7 +1184,7 @@ static int get_msix_valid_entries_nr(AssignedDevice *adev,
     for (i = 0; i < entries_max_nr; i++) {
         memcpy(&msg_ctrl, va + i * 16 + 12, 4);
         /* Ignore unused entry even it's unmasked */
-        if (msg_ctrl & PCI_MSIX_CTRL_MASKBIT)
+        if (assigned_dev_msix_entry_masked(adev, i))
             continue;
         entries_nr ++;
     }
@@ -1166,6 +1213,8 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev,
     }
 
     free_dev_irq_entries(adev);
+    memset(pci_dev->msix_entry_used, 0, KVM_MAX_MSIX_PER_DEV *
+                                        sizeof(*pci_dev->msix_entry_used));
     adev->irq_entries_nr = entries_nr;
     adev->entry = calloc(entries_nr, sizeof(struct kvm_irq_routing_entry));
     if (!adev->entry) {
@@ -1179,7 +1228,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev,
         if (entries_nr >= msix_nr.entry_nr)
             break;
         memcpy(&msg_ctrl, va + i * 16 + 12, 4);
-        if (msg_ctrl & PCI_MSIX_CTRL_MASKBIT)
+        if (assigned_dev_msix_entry_masked(adev, i))
             continue;
 
         memcpy(&msg_data, va + i * 16 + 8, 4);
@@ -1201,6 +1250,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev,
 
         msix_entry.gsi = adev->entry[entries_nr].gsi;
         msix_entry.entry = i;
+        pci_dev->msix_entry_used[i] = 1;
         r = kvm_assign_set_msix_entry(kvm_context, &msix_entry);
         if (r) {
             fprintf(stderr, "fail to set MSI-X entry! %s\n", strerror(-r));
@@ -1244,6 +1294,8 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev, int enable_msix)
             perror("assigned_dev_update_msix: deassign irq");
 
         assigned_dev->irq_requested_type = 0;
+        memset(pci_dev->msix_entry_used, 0, KVM_MAX_MSIX_PER_DEV *
+                                        sizeof(*pci_dev->msix_entry_used));
     }
 
     entries_max_nr = get_msix_entries_max_nr(assigned_dev);
@@ -1251,10 +1303,12 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev, int enable_msix)
         fprintf(stderr, "assigned_dev_update_msix: MSI-X entries_max_nr == 0");
         return;
     }
+    /*
+     * Guest may try to enable MSI-X before setting MSI-X entry done, so
+     * let's wait until guest unmask the entries.
+     */
     entries_nr = get_msix_valid_entries_nr(assigned_dev, entries_max_nr);
     if (entries_nr == 0) {
-        if (enable_msix)
-            fprintf(stderr, "MSI-X entry number is zero!\n");
         return;
     }
     if (enable_msix) {
@@ -1298,7 +1352,8 @@ static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t ad
         if (address <= ctrl_pos && address + len > ctrl_pos) {
             ctrl_pos--; /* control is word long */
             ctrl_word = (uint16_t *)(pci_dev->config + ctrl_pos);
-            assigned_dev_update_msix(pci_dev, (*ctrl_word & PCI_MSIX_ENABLE));
+            assigned_dev_update_msix(pci_dev,
+                    (*ctrl_word & PCI_MSIX_ENABLE) && !(*ctrl_word & PCI_MSIX_MASK));
 	}
         pos += PCI_CAPABILITY_CONFIG_MSIX_LENGTH;
     }
@@ -1352,6 +1407,7 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
         bar_nr = msix_table_entry & PCI_MSIX_BIR;
         msix_table_entry &= ~PCI_MSIX_BIR;
         dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry;
+        dev->max_msix_entries_nr = get_msix_entries_max_nr(dev);
         if (next_cap_pt != 0) {
             pci_dev->config[pci_dev->cap.start + next_cap_pt] =
                 pci_dev->cap.start + pci_dev->cap.length;
@@ -1396,10 +1452,97 @@ static void msix_mmio_writel(void *opaque,
     AssignedDevice *adev = opaque;
     unsigned int offset = addr & 0xfff;
     void *page = adev->msix_table_page;
+    int pos, ctrl_word, index;
+    struct kvm_irq_routing_entry new_entry = {};
+    int entry_idx, entries_max_nr, r = 0, i;
+    uint32_t msg_ctrl, msg_data, msg_upper_addr, msg_addr;
 
     DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n",
 		    addr, val);
     memcpy((void *)((char *)page + offset), &val, 4);
+
+    index = offset / 16;
+
+    /* Check if mask bit is being accessed */
+    memcpy(&msg_addr, (char *)page + index * 16, 4);
+    memcpy(&msg_upper_addr, (char *)page + index * 16 + 4, 4);
+    memcpy(&msg_data, (char *)page + index * 16 + 8, 4);
+    memcpy(&msg_ctrl, (char *)page + index * 16 + 12, 4);
+    DEBUG("MSI-X entries index %d: "
+            "msg_addr 0x%x, msg_upper_addr 0x%x, msg_data 0x%x, vec_ctl 0x%x\n",
+            index, msg_addr, msg_upper_addr, msg_data, msg_ctrl);
+
+    if (adev->cap.available & ASSIGNED_DEVICE_CAP_MSI)
+        pos = adev->dev.cap.start + PCI_CAPABILITY_CONFIG_MSI_LENGTH;
+    else
+        pos = adev->dev.cap.start;
+
+    ctrl_word = *(uint16_t *)(adev->dev.config + pos + 2);
+
+    if (!((ctrl_word & PCI_MSIX_ENABLE) && !(ctrl_word & PCI_MSIX_MASK)))
+        return;
+
+    if (!assigned_dev_msix_entry_masked(adev, index)) {
+        if (!adev->dev.msix_entry_used[index]) {
+            DEBUG("Try to modify unenabled MSI-X entry %d's mask. "
+                    "Reenable MSI-X.\n",
+                    index);
+            assigned_dev_update_msix(&adev->dev, 1);
+        }
+        return;
+    }
+
+    if (!adev->dev.msix_entry_used[index])
+        return;
+
+    /*
+     * We're here only because guest want to modify MSI data/addr, and
+     * kernel would filter those writing with mask bit unset.
+     */
+    entries_max_nr = get_msix_entries_max_nr(adev);
+
+    /*
+     * Find the index of routing entry, it can be different from 'index' if
+     * empty entry existed in between
+     */
+    entry_idx = -1;
+    for (i = 0; i <= index; i++) {
+        if (adev->dev.msix_entry_used[i])
+            entry_idx ++;
+    }
+    if (entry_idx >= entries_max_nr || entry_idx == -1) {
+        fprintf(stderr, "msix_mmio_writel: Entry idx %d exceed limit!\n",
+			entry_idx);
+        return;
+    }
+
+    if (!assigned_dev_msix_entry_masked(adev, index)) {
+        fprintf(stderr, "msix_mmio_writel: Trying write to unmasked entry!\n");
+        return;
+    }
+
+    new_entry.gsi = adev->entry[entry_idx].gsi;
+    new_entry.type = KVM_IRQ_ROUTING_MSI;
+    new_entry.flags = 0;
+    new_entry.u.msi.address_lo = msg_addr;
+    new_entry.u.msi.address_hi = msg_upper_addr;
+    new_entry.u.msi.data = msg_data;
+    if (memcmp(&adev->entry[entry_idx].u.msi, &new_entry.u.msi,
+                sizeof new_entry.u.msi)) {
+        r = kvm_update_routing_entry(&adev->entry[entry_idx], &new_entry);
+        if (r) {
+            perror("msix_mmio_writel: kvm_update_routing_entry failed\n");
+            return;
+        }
+        r = kvm_commit_irq_routes();
+        if (r) {
+            perror("msix_mmio_writel: kvm_commit_irq_routes failed\n");
+            return;
+        }
+    }
+    adev->entry[entry_idx].u.msi.address_lo = msg_addr;
+    adev->entry[entry_idx].u.msi.address_hi = msg_upper_addr;
+    adev->entry[entry_idx].u.msi.data = msg_data;
 }
 
 static void msix_mmio_writew(void *opaque,
@@ -1440,6 +1583,8 @@ static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
         *(uint32_t *)(dev->msix_table_page + i + 0xc) = 1;
     dev->mmio_index = cpu_register_io_memory(
                         msix_mmio_read, msix_mmio_write, dev);
+    dev->dev.msix_entry_used = qemu_mallocz(KVM_MAX_MSIX_PER_DEV *
+                                        sizeof *dev->dev.msix_entry_used);
     return 0;
 }
 
@@ -1456,6 +1601,8 @@ static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev)
                 strerror(errno));
     }
     dev->msix_table_page = NULL;
+    free(dev->dev.msix_entry_used);
+    dev->dev.msix_entry_used = NULL;
 }
 
 static int assigned_initfn(struct PCIDevice *pci_dev)
diff --git a/hw/device-assignment.h b/hw/device-assignment.h
index 2f5fa17..17c85c4 100644
--- a/hw/device-assignment.h
+++ b/hw/device-assignment.h
@@ -98,7 +98,7 @@ typedef struct AssignedDevice {
 #define ASSIGNED_DEVICE_MSIX_MASKED (1 << 2)
         uint32_t state;
     } cap;
-    int irq_entries_nr;
+    int irq_entries_nr, max_msix_entries_nr;
     struct kvm_irq_routing_entry *entry;
     void *msix_table_page;
     target_phys_addr_t msix_table_addr;
-- 
1.7.0.1


  parent reply	other threads:[~2010-11-11  7:51 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-11-11  7:51 [PATCH 0/5 v4] MSI-X mask support for userspace Sheng Yang
2010-11-11  7:51 ` [PATCH 1/5] qemu-kvm: device assignment: Reset MSI-X table mask bit as set Sheng Yang
2010-11-11  7:51 ` [PATCH 2/5] qemu-kvm: Ioctl for in-kernel mask support Sheng Yang
2010-11-11  7:51 ` [PATCH 3/5] qemu-kvm: device assignment: Some clean up about MSI-X code Sheng Yang
2010-11-11  7:51 ` Sheng Yang [this message]
2010-11-11  7:51 ` [PATCH 5/5] qemu-kvm: Header file update for MSI-X mask support Sheng Yang
  -- strict thread matches above, loose matches on Subject: below --
2010-10-20  8:29 [PATCH 0/5] MSI-X mask support for assigned device on userspace Sheng Yang
2010-10-20  8:29 ` [PATCH 4/5] qemu-kvm: device assignment: emulate MSI-X mask bits Sheng Yang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1289461894-7355-5-git-send-email-sheng@linux.intel.com \
    --to=sheng@linux.intel.com \
    --cc=avi@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=mtosatti@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.