From: Alexey Kardashevskiy <aik@ozlabs.ru>
To: qemu-devel@nongnu.org
Cc: Alexey Kardashevskiy <aik@ozlabs.ru>,
Alexander Graf <agraf@suse.de>,
Gavin Shan <gwshan@linux.vnet.ibm.com>,
Alex Williamson <alex.williamson@redhat.com>,
qemu-ppc@nongnu.org, David Gibson <david@gibson.dropbear.id.au>
Subject: [Qemu-devel] [PATCH qemu v8 11/14] spapr_pci: Enable vfio-pci hotplug
Date: Thu, 18 Jun 2015 21:37:33 +1000 [thread overview]
Message-ID: <1434627456-13745-12-git-send-email-aik@ozlabs.ru> (raw)
In-Reply-To: <1434627456-13745-1-git-send-email-aik@ozlabs.ru>
sPAPR IOMMU is managing two copies of an TCE table:
1) a guest view of the table - this is what emulated devices use and
this is where H_GET_TCE reads from;
2) a hardware TCE table - only present if there is at least one vfio-pci
device on a PHB; it is updated via a memory listener on a PHB address
space which forwards map/unmap requests to vfio-pci IOMMU host driver.
At the moment presence of vfio-pci devices on a bus affect the way
the guest view table is allocated. If there is no vfio-pci on a PHB
and the host kernel supports KVM acceleration of H_PUT_TCE, a table
is allocated in KVM. However, if there is vfio-pci and we do yet not
support KVM acceleration for these, the table has to be allocated
by the userspace.
When vfio-pci device is hotplugged and there were no vfio-pci devices
already, the guest view table could have been allocated by KVM which
means that H_PUT_TCE is handled by the host kernel and since we
do not support vfio-pci in KVM, the hardware table will not be updated.
This reallocates the guest view table in QEMU if the first vfio-pci
device has just been plugged. spapr_tce_realloc_userspace() handles this.
This replays all the mappings to make sure that the tables are in sync.
This will not have a visible effect though as for a new device
the guest kernel will allocate-and-map new addresses and therefore
existing mappings from emulated devices will not be used by vfio-pci
devices.
This adds calls to spapr_phb_dma_capabilities_update() in PCI hotplug
hooks .
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
hw/ppc/spapr_iommu.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++---
hw/ppc/spapr_pci.c | 43 +++++++++++++++++++++++++++++++++++++++++++
include/hw/ppc/spapr.h | 2 ++
trace-events | 2 ++
4 files changed, 94 insertions(+), 3 deletions(-)
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 45c00d8..5e6bdb4 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -78,12 +78,13 @@ static uint64_t *spapr_tce_alloc_table(uint32_t liobn,
uint32_t nb_table,
uint32_t page_shift,
int *fd,
- bool vfio_accel)
+ bool vfio_accel,
+ bool force_userspace)
{
uint64_t *table = NULL;
uint64_t window_size = (uint64_t)nb_table << page_shift;
- if (kvm_enabled() && !(window_size >> 32)) {
+ if (kvm_enabled() && !force_userspace && !(window_size >> 32)) {
table = kvmppc_create_spapr_tce(liobn, window_size, fd, vfio_accel);
}
@@ -222,7 +223,8 @@ static void spapr_tce_table_do_enable(sPAPRTCETable *tcet, bool vfio_accel)
tcet->nb_table,
tcet->page_shift,
&tcet->fd,
- vfio_accel);
+ vfio_accel,
+ false);
memory_region_set_size(&tcet->iommu,
(uint64_t)tcet->nb_table << tcet->page_shift);
@@ -495,6 +497,48 @@ int spapr_dma_dt(void *fdt, int node_off, const char *propname,
return 0;
}
+static int spapr_tce_do_replay(sPAPRTCETable *tcet, uint64_t *table)
+{
+ target_ulong ioba = tcet->bus_offset, pgsz = (1ULL << tcet->page_shift);
+ long i, ret = 0;
+
+ for (i = 0; i < tcet->nb_table; ++i, ioba += pgsz) {
+ ret = put_tce_emu(tcet, ioba, table[i]);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+int spapr_tce_replay(sPAPRTCETable *tcet)
+{
+ return spapr_tce_do_replay(tcet, tcet->table);
+}
+
+int spapr_tce_realloc_userspace(sPAPRTCETable *tcet, bool replay)
+{
+ int ret = 0, oldfd;
+ uint64_t *oldtable;
+
+ oldtable = tcet->table;
+ oldfd = tcet->fd;
+ tcet->table = spapr_tce_alloc_table(tcet->liobn,
+ tcet->nb_table,
+ tcet->page_shift,
+ &tcet->fd,
+ false,
+ true); /* force_userspace */
+
+ if (replay) {
+ ret = spapr_tce_do_replay(tcet, oldtable);
+ }
+
+ spapr_tce_free_table(oldtable, oldfd, tcet->nb_table);
+
+ return ret;
+}
+
int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
sPAPRTCETable *tcet)
{
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index ca3772e..1f980fa 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -716,6 +716,33 @@ static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
return &phb->iommu_as;
}
+static int spapr_phb_dma_update(Object *child, void *opaque)
+{
+ int ret = 0;
+ sPAPRTCETable *tcet = (sPAPRTCETable *)
+ object_dynamic_cast(child, TYPE_SPAPR_TCE_TABLE);
+
+ if (!tcet) {
+ return 0;
+ }
+
+ if (tcet->fd >= 0) {
+ /*
+ * We got first vfio-pci device on accelerated table.
+ * VFIO acceleration is not possible.
+ * Reallocate table in userspace and replay mappings.
+ */
+ ret = spapr_tce_realloc_userspace(tcet, true);
+ trace_spapr_pci_dma_realloc_update(tcet->liobn, ret);
+ } else {
+ /* There was no acceleration, so just replay mappings. */
+ ret = spapr_tce_replay(tcet);
+ trace_spapr_pci_dma_update(tcet->liobn, ret);
+ }
+
+ return 0;
+}
+
static int spapr_phb_dma_capabilities_update(sPAPRPHBState *sphb)
{
int ret;
@@ -776,6 +803,20 @@ int spapr_phb_dma_reset(sPAPRPHBState *sphb)
return 0;
}
+static int spapr_phb_hotplug_dma_sync(sPAPRPHBState *sphb)
+{
+ int ret = 0;
+ bool had_vfio = sphb->has_vfio;
+
+ spapr_phb_dma_capabilities_update(sphb);
+
+ if (!had_vfio && sphb->has_vfio) {
+ object_child_foreach(OBJECT(sphb), spapr_phb_dma_update, NULL);
+ }
+
+ return ret;
+}
+
/* Macros to operate with address in OF binding to PCI */
#define b_x(x, p, l) (((x) & ((1<<(l))-1)) << (p))
#define b_n(x) b_x((x), 31, 1) /* 0 if relocatable */
@@ -1042,6 +1083,7 @@ static void spapr_phb_add_pci_device(sPAPRDRConnector *drc,
if (dev->hotplugged) {
fdt = spapr_create_pci_child_dt(phb, pdev, drc_index, drc_name,
&fdt_start_offset);
+ spapr_phb_hotplug_dma_sync(phb);
}
drck->attach(drc, DEVICE(pdev),
@@ -1065,6 +1107,7 @@ static void spapr_phb_remove_pci_device_cb(DeviceState *dev, void *opaque)
*/
pci_device_reset(PCI_DEVICE(dev));
object_unparent(OBJECT(dev));
+ spapr_phb_hotplug_dma_sync((sPAPRPHBState *)opaque);
}
static void spapr_phb_remove_pci_device(sPAPRDRConnector *drc,
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index e32e787..4645f16 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -588,6 +588,8 @@ int spapr_dma_dt(void *fdt, int node_off, const char *propname,
uint32_t liobn, uint64_t window, uint32_t size);
int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
sPAPRTCETable *tcet);
+int spapr_tce_replay(sPAPRTCETable *tcet);
+int spapr_tce_realloc_userspace(sPAPRTCETable *tcet, bool replay);
void spapr_pci_switch_vga(bool big_endian);
void spapr_hotplug_req_add_event(sPAPRDRConnector *drc);
void spapr_hotplug_req_remove_event(sPAPRDRConnector *drc);
diff --git a/trace-events b/trace-events
index a93af9a..3cd8bf7 100644
--- a/trace-events
+++ b/trace-events
@@ -1300,6 +1300,8 @@ spapr_pci_rtas_ibm_query_interrupt_source_number(unsigned ioa, unsigned intr) "q
spapr_pci_msi_write(uint64_t addr, uint64_t data, uint32_t dt_irq) "@%"PRIx64"<=%"PRIx64" IRQ %u"
spapr_pci_lsi_set(const char *busname, int pin, uint32_t irq) "%s PIN%d IRQ %u"
spapr_pci_msi_retry(unsigned config_addr, unsigned req_num, unsigned max_irqs) "Guest device at %x asked %u, have only %u"
+spapr_pci_dma_update(uint64_t liobn, long ret) "liobn=%"PRIx64" tcet=%ld"
+spapr_pci_dma_realloc_update(uint64_t liobn, long ret) "liobn=%"PRIx64" tcet=%ld"
# hw/pci/pci.c
pci_update_mappings_del(void *d, uint32_t bus, uint32_t func, uint32_t slot, int bar, uint64_t addr, uint64_t size) "d=%p %02x:%02x.%x %d,%#"PRIx64"+%#"PRIx64
--
2.4.0.rc3.8.gfb3e7d5
next prev parent reply other threads:[~2015-06-18 11:39 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-06-18 11:37 [Qemu-devel] [PATCH qemu v8 00/14] spapr: vfio: Enable Dynamic DMA windows (DDW) Alexey Kardashevskiy
2015-06-18 11:37 ` [Qemu-devel] [PATCH qemu v8 01/14] vmstate: Define VARRAY with VMS_ALLOC Alexey Kardashevskiy
2015-06-18 11:37 ` [Qemu-devel] [PATCH qemu v8 02/14] vfio: spapr: Move SPAPR-related code to a separate file Alexey Kardashevskiy
2015-06-18 21:10 ` Alex Williamson
2015-06-19 0:16 ` Alexey Kardashevskiy
2015-06-23 5:49 ` David Gibson
2015-06-18 11:37 ` [Qemu-devel] [PATCH qemu v8 03/14] spapr_pci_vfio: Enable multiple groups per container Alexey Kardashevskiy
2015-06-25 19:59 ` Alex Williamson
2015-06-30 3:32 ` Alexey Kardashevskiy
2015-06-18 11:37 ` [Qemu-devel] [PATCH qemu v8 04/14] spapr_pci: Convert finish_realize() to dma_capabilities_update()+dma_init_window() Alexey Kardashevskiy
2015-06-18 11:37 ` [Qemu-devel] [PATCH qemu v8 05/14] spapr_iommu: Move table allocation to helpers Alexey Kardashevskiy
2015-06-22 3:28 ` David Gibson
2015-06-18 11:37 ` [Qemu-devel] [PATCH qemu v8 06/14] spapr_iommu: Introduce "enabled" state for TCE table Alexey Kardashevskiy
2015-06-22 3:45 ` David Gibson
2015-06-18 11:37 ` [Qemu-devel] [PATCH qemu v8 07/14] spapr_iommu: Remove vfio_accel flag from sPAPRTCETable Alexey Kardashevskiy
2015-06-22 3:51 ` David Gibson
2015-06-18 11:37 ` [Qemu-devel] [PATCH qemu v8 08/14] spapr_iommu: Add root memory region Alexey Kardashevskiy
2015-06-18 11:37 ` [Qemu-devel] [PATCH qemu v8 09/14] spapr_pci: Do complete reset of DMA config when resetting PHB Alexey Kardashevskiy
2015-06-18 11:37 ` [Qemu-devel] [PATCH qemu v8 10/14] spapr_vfio_pci: Remove redundant spapr-pci-vfio-host-bridge Alexey Kardashevskiy
2015-06-22 4:41 ` David Gibson
2015-06-18 11:37 ` Alexey Kardashevskiy [this message]
2015-06-22 5:14 ` [Qemu-devel] [PATCH qemu v8 11/14] spapr_pci: Enable vfio-pci hotplug David Gibson
2015-06-18 11:37 ` [Qemu-devel] [PATCH qemu v8 12/14] linux headers update for DDW on SPAPR Alexey Kardashevskiy
2015-06-18 11:37 ` [Qemu-devel] [PATCH qemu v8 13/14] vfio: spapr: Add SPAPR IOMMU v2 support (DMA memory preregistering) Alexey Kardashevskiy
2015-06-18 11:37 ` [Qemu-devel] [PATCH qemu v8 14/14] spapr_pci/spapr_pci_vfio: Support Dynamic DMA Windows (DDW) Alexey Kardashevskiy
2015-06-23 6:38 ` David Gibson
2015-06-24 10:37 ` Alexey Kardashevskiy
2015-06-23 6:44 ` [Qemu-devel] [PATCH qemu v8 00/14] spapr: vfio: Enable Dynamic DMA windows (DDW) David Gibson
2015-06-24 10:52 ` Alexey Kardashevskiy
2015-06-25 19:59 ` Alex Williamson
2015-06-26 7:01 ` David Gibson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1434627456-13745-12-git-send-email-aik@ozlabs.ru \
--to=aik@ozlabs.ru \
--cc=agraf@suse.de \
--cc=alex.williamson@redhat.com \
--cc=david@gibson.dropbear.id.au \
--cc=gwshan@linux.vnet.ibm.com \
--cc=qemu-devel@nongnu.org \
--cc=qemu-ppc@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).