From: Alexey Kardashevskiy <aik@au1.ibm.com>
To: Alex Williamson <alex.williamson@redhat.com>
Cc: aafabbri@cisco.com, kvm@vger.kernel.org, pmac@au1.ibm.com,
qemu-devel@nongnu.org, joerg.roedel@amd.com,
konrad.wilk@oracle.com, agraf@suse.de, dwg@au1.ibm.com,
chrisw@sous-sol.org, B08248@freescale.com,
iommu@lists.linux-foundation.org, avi@redhat.com,
linux-pci@vger.kernel.org, B07421@freescale.com, benve@cisco.com
Subject: Re: [Qemu-devel] [RFC PATCH] vfio: VFIO Driver core framework
Date: Tue, 29 Nov 2011 13:11:05 +1100 [thread overview]
Message-ID: <4ED43F39.8050606@au1.ibm.com> (raw)
In-Reply-To: <4ED43CFE.8040009@au1.ibm.com>
Hi all again,
It was actually the very first problem - endianess :-)
I am still not sure what format is better for cached config space or whether we should cache it all.
Also, as Benh already mentioned, vfio_virt_init reads a config space to a cache by pci_read_config_dword
for the whole space while some devices may not like it as they might distinguish length of PCI
transactions.
KERNEL patch:
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index b3bab99..9d563b4 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -757,6 +757,16 @@ static int vfio_virt_init(struct vfio_pci_device *vdev)
vdev->rbar[5] = *(u32 *)&vdev->vconfig[PCI_BASE_ADDRESS_5];
vdev->rbar[6] = *(u32 *)&vdev->vconfig[PCI_ROM_ADDRESS];
+ /*
+ * As pci_read_config_XXXX returns data in native format,
+ * and the cached copy is used in assumption that it is
+ * native PCI format, fix endianness in the cached copy.
+ */
+ lp = (u32 *)vdev->vconfig;
+ for (i = 0; i < pdev->cfg_size/sizeof(u32); i++, lp++) {
+ *lp = cpu_to_le32(*lp);
+ }
+
/* for sr-iov devices */
vdev->vconfig[PCI_VENDOR_ID] = pdev->vendor & 0xFF;
vdev->vconfig[PCI_VENDOR_ID+1] = pdev->vendor >> 8;
@@ -807,18 +817,18 @@ static void vfio_bar_fixup(struct vfio_pci_device *vdev)
else
mask = 0;
lp = (u32 *)(vdev->vconfig + PCI_BASE_ADDRESS_0 + 4*bar);
- *lp &= (u32)mask;
+ *lp &= cpu_to_le32((u32)mask);
if (pci_resource_flags(pdev, bar) & IORESOURCE_IO)
- *lp |= PCI_BASE_ADDRESS_SPACE_IO;
+ *lp |= cpu_to_le32(PCI_BASE_ADDRESS_SPACE_IO);
else if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) {
- *lp |= PCI_BASE_ADDRESS_SPACE_MEMORY;
+ *lp |= cpu_to_le32(PCI_BASE_ADDRESS_SPACE_MEMORY);
if (pci_resource_flags(pdev, bar) & IORESOURCE_PREFETCH)
- *lp |= PCI_BASE_ADDRESS_MEM_PREFETCH;
+ *lp |= cpu_to_le32(PCI_BASE_ADDRESS_MEM_PREFETCH);
if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM_64) {
- *lp |= PCI_BASE_ADDRESS_MEM_TYPE_64;
+ *lp |= cpu_to_le32(PCI_BASE_ADDRESS_MEM_TYPE_64);
lp++;
- *lp &= (u32)(mask >> 32);
+ *lp &= cpu_to_le32((u32)(mask >> 32));
bar++;
}
}
@@ -830,7 +840,7 @@ static void vfio_bar_fixup(struct vfio_pci_device *vdev)
} else
mask = 0;
lp = (u32 *)(vdev->vconfig + PCI_ROM_ADDRESS);
- *lp &= (u32)mask;
+ *lp &= cpu_to_le32((u32)mask);
vdev->bardirty = 0;
}
=== end ===
QEMU patch:
diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
index 980eec7..1c97c35 100644
--- a/hw/vfio_pci.c
+++ b/hw/vfio_pci.c
@@ -405,6 +405,8 @@ static void vfio_resource_write(void *opaque, target_phys_addr_t addr,
{
PCIResource *res = opaque;
+ fprintf(stderr, "change endianness????\n");
+
if (pwrite(res->fd, &data, size, res->offset + addr) != size) {
fprintf(stderr, "%s(,0x%"PRIx64", 0x%"PRIx64", %d) failed: %s\n",
__FUNCTION__, addr, data, size, strerror(errno));
@@ -429,6 +431,9 @@ static uint64_t vfio_resource_read(void *opaque,
DPRINTF("%s(BAR%d+0x%"PRIx64", %d) = 0x%"PRIx64"\n",
__FUNCTION__, res->bar, addr, size, data);
+ data = le32_to_cpu(data);
+ DPRINTF("%s(BAR%d+0x%"PRIx64", %d) = 0x%"PRIx64" --- CPU\n",
+ __FUNCTION__, res->bar, addr, size, data);
return data;
}
@@ -454,13 +459,25 @@ static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len)
val = pci_default_read_config(pdev, addr, len);
} else {
- if (pread(vdev->fd, &val, len, vdev->config_offset + addr) != len) {
+ u8 buf[4] = {0};
+ if (pread(vdev->fd, buf, len, vdev->config_offset + addr) != len) {
fprintf(stderr, "%s(%04x:%02x:%02x.%x, 0x%x, 0x%x) failed: %s\n",
__FUNCTION__, vdev->host.seg, vdev->host.bus,
vdev->host.dev, vdev->host.func, addr, len,
strerror(errno));
return -1;
}
+ switch (len) {
+ case 1: val = buf[0]; break;
+ case 2: val = le16_to_cpupu((uint16_t*)buf); break;
+ case 4: val = le32_to_cpupu((uint32_t*)buf); break;
+ default:
+ fprintf(stderr, "%s(%04x:%02x:%02x.%x, 0x%x, 0x%x) failed: %s\n",
+ __FUNCTION__, vdev->host.seg, vdev->host.bus,
+ vdev->host.dev, vdev->host.func, addr, len,
+ strerror(errno));
+ break;
+ }
}
DPRINTF("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x) %x\n", __FUNCTION__,
vdev->host.seg, vdev->host.bus, vdev->host.dev,
@@ -477,8 +494,20 @@ static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr,
vdev->host.seg, vdev->host.bus, vdev->host.dev,
vdev->host.func, addr, val, len);
+ u8 buf[4] = {0};
+ switch (len) {
+ case 1: buf[0] = val & 0xFF; break;
+ case 2: cpu_to_le16wu((uint16_t*)buf, val); break;
+ case 4: cpu_to_le32wu((uint32_t*)buf, val); break;
+ default:
+ fprintf(stderr, "%s(%04x:%02x:%02x.%x, 0x%x, 0x%x, 0x%x) failed: %s\n",
+ __FUNCTION__, vdev->host.seg, vdev->host.bus, vdev->host.dev,
+ vdev->host.func, addr, val, len, strerror(errno));
+ return;
+ }
+
/* Write everything to VFIO, let it filter out what we can't write */
- if (pwrite(vdev->fd, &val, len, vdev->config_offset + addr) != len) {
+ if (pwrite(vdev->fd, buf, len, vdev->config_offset + addr) != len) {
fprintf(stderr, "%s(%04x:%02x:%02x.%x, 0x%x, 0x%x, 0x%x) failed: %s\n",
__FUNCTION__, vdev->host.seg, vdev->host.bus, vdev->host.dev,
vdev->host.func, addr, val, len, strerror(errno));
@@ -675,6 +704,7 @@ static int vfio_setup_msi(VFIODevice *vdev)
vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) {
return -1;
}
+ ctrl = le16_to_cpu(ctrl);
msi_64bit = !!(ctrl & PCI_MSI_FLAGS_64BIT);
msi_maskbit = !!(ctrl & PCI_MSI_FLAGS_MASKBIT);
=== end ===
On 29/11/11 13:01, Alexey Kardashevskiy wrote:
> Hi all,
>
> Another problem I hit on POWER - MSI interrupts allocation. The existing VFIO does not expect a PBH
> to support less interrupts that a device might request. In my case, PHB's limit is 8 interrupts
> while my test card (10Gb ethernet CXGB3) wants 9. Below are the patches to demonstrate the idea.
>
>
>
>
>
> On 29/11/11 12:52, Alexey Kardashevskiy wrote:
>> Hi!
>>
>> I tried (successfully) to run it on POWER and while doing that I found some issues. I'll try to
>> explain them in separate mails.
>>
>>
>>
>> On 04/11/11 07:12, Alex Williamson wrote:
>>> VFIO provides a secure, IOMMU based interface for user space
>>> drivers, including device assignment to virtual machines.
>>> This provides the base management of IOMMU groups, devices,
>>> and IOMMU objects. See Documentation/vfio.txt included in
>>> this patch for user and kernel API description.
>>>
>>> Note, this implements the new API discussed at KVM Forum
>>> 2011, as represented by the drvier version 0.2. It's hoped
>>> that this provides a modular enough interface to support PCI
>>> and non-PCI userspace drivers across various architectures
>>> and IOMMU implementations.
>>>
>>> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
>>> ---
>>>
>>> Fingers crossed, this is the last RFC for VFIO, but we need
>>> the iommu group support before this can go upstream
>>> (http://lkml.indiana.edu/hypermail/linux/kernel/1110.2/02303.html),
>>> hoping this helps push that along.
>>>
>>> Since the last posting, this version completely modularizes
>>> the device backends and better defines the APIs between the
>>> core VFIO code and the device backends. I expect that we
>>> might also adopt a modular IOMMU interface as iommu_ops learns
>>> about different types of hardware. Also many, many cleanups.
>>> Check the complete git history for details:
>>>
>>> git://github.com/awilliam/linux-vfio.git vfio-ng
>>>
>>> (matching qemu tree: git://github.com/awilliam/qemu-vfio.git)
>>>
>>> This version, along with the supporting VFIO PCI backend can
>>> be found here:
>>>
>>> git://github.com/awilliam/linux-vfio.git vfio-next-20111103
>>>
>>> I've held off on implementing a kernel->user signaling
>>> mechanism for now since the previous netlink version produced
>>> too many gag reflexes. It's easy enough to set a bit in the
>>> group flags too indicate such support in the future, so I
>>> think we can move ahead without it.
>>>
>>> Appreciate any feedback or suggestions. Thanks,
>>>
>>> Alex
>>>
>>
>>
>
>
--
Alexey Kardashevskiy
IBM OzLabs, LTC Team
e-mail: aik@au1.ibm.com
notes: Alexey Kardashevskiy/Australia/IBM
next prev parent reply other threads:[~2011-11-29 2:11 UTC|newest]
Thread overview: 63+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-11-03 20:12 [Qemu-devel] [RFC PATCH] vfio: VFIO Driver core framework Alex Williamson
2011-11-09 4:17 ` Aaron Fabbri
2011-11-09 4:41 ` Alex Williamson
2011-11-09 8:11 ` Christian Benvenuti (benve)
2011-11-09 18:02 ` Alex Williamson
2011-11-09 21:08 ` Christian Benvenuti (benve)
2011-11-09 23:40 ` Alex Williamson
2011-11-10 0:57 ` Christian Benvenuti (benve)
2011-11-11 18:04 ` Alex Williamson
2011-11-11 22:22 ` Christian Benvenuti (benve)
2011-11-14 22:59 ` Alex Williamson
2011-11-15 0:05 ` David Gibson
2011-11-15 0:49 ` Benjamin Herrenschmidt
2011-11-11 17:51 ` Konrad Rzeszutek Wilk
2011-11-11 22:10 ` Alex Williamson
2011-11-15 0:00 ` David Gibson
2011-11-16 16:52 ` Konrad Rzeszutek Wilk
2011-11-17 20:22 ` Alex Williamson
2011-11-17 20:56 ` Scott Wood
2011-11-16 17:47 ` Scott Wood
2011-11-17 20:52 ` Alex Williamson
2011-11-12 0:14 ` Scott Wood
2011-11-14 20:54 ` Alex Williamson
2011-11-14 21:46 ` Alex Williamson
2011-11-14 22:26 ` Scott Wood
2011-11-14 22:48 ` Alexander Graf
2011-11-15 2:29 ` Alex Williamson
2011-11-15 6:34 ` David Gibson
2011-11-15 18:01 ` Alex Williamson
2011-11-17 0:02 ` David Gibson
2011-11-18 20:32 ` Alex Williamson
2011-11-18 21:09 ` Scott Wood
2011-11-22 19:16 ` Alex Williamson
2011-11-22 20:00 ` Scott Wood
2011-11-22 21:28 ` Alex Williamson
2011-11-21 2:47 ` David Gibson
2011-11-22 18:22 ` Alex Williamson
2011-11-15 20:10 ` Scott Wood
2011-11-15 21:40 ` Aaron Fabbri
2011-11-15 22:29 ` Scott Wood
2011-11-16 23:34 ` Alex Williamson
2011-11-29 1:52 ` Alexey Kardashevskiy
2011-11-29 2:01 ` Alexey Kardashevskiy
2011-11-29 2:11 ` Alexey Kardashevskiy [this message]
2011-11-29 3:54 ` Alex Williamson
2011-11-29 19:26 ` Alex Williamson
2011-11-29 23:20 ` Stuart Yoder
2011-11-29 23:44 ` Alex Williamson
2011-11-30 15:41 ` Stuart Yoder
2011-11-30 16:58 ` Alex Williamson
2011-12-01 20:58 ` Stuart Yoder
2011-12-01 21:25 ` Alex Williamson
2011-12-02 14:40 ` Stuart Yoder
2011-12-02 18:11 ` Bhushan Bharat-R65777
2011-12-02 18:27 ` Scott Wood
2011-12-02 18:35 ` Bhushan Bharat-R65777
2011-12-02 18:45 ` Bhushan Bharat-R65777
2011-12-02 18:52 ` Scott Wood
2011-12-02 18:21 ` Scott Wood
2011-11-29 3:46 ` Alex Williamson
2011-11-29 4:34 ` Alexey Kardashevskiy
2011-11-29 5:48 ` Alex Williamson
2011-12-02 5:06 ` Alexey Kardashevskiy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4ED43F39.8050606@au1.ibm.com \
--to=aik@au1.ibm.com \
--cc=B07421@freescale.com \
--cc=B08248@freescale.com \
--cc=aafabbri@cisco.com \
--cc=agraf@suse.de \
--cc=alex.williamson@redhat.com \
--cc=avi@redhat.com \
--cc=benve@cisco.com \
--cc=chrisw@sous-sol.org \
--cc=dwg@au1.ibm.com \
--cc=iommu@lists.linux-foundation.org \
--cc=joerg.roedel@amd.com \
--cc=konrad.wilk@oracle.com \
--cc=kvm@vger.kernel.org \
--cc=linux-pci@vger.kernel.org \
--cc=pmac@au1.ibm.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).