* [Qemu-devel] [PATCH 37/99] arm_gicv3_kvm: kvm_dist_get/put: skip the registers banked by GICR
From: Michael Roth @ 2018-07-23 20:16 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-stable, Shannon Zhao, Peter Maydell
In-Reply-To: <20180723201748.25573-1-mdroth@linux.vnet.ibm.com>
From: Shannon Zhao <zhaoshenglong@huawei.com>
While we skip the GIC_INTERNAL irqs, we don't change the register offset
accordingly. This will overlap the GICR registers value and leave the
last GIC_INTERNAL irq's registers out of update.
Fix this by skipping the registers banked by GICR.
Also for migration compatibility if the migration source (old version
qemu) doesn't send gicd_no_migration_shift_bug = 1 to destination, then
we shift the data of PPI to get the right data for SPI.
Fixes: 367b9f527becdd20ddf116e17a3c0c2bbc486920
Cc: qemu-stable@nongnu.org
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Shannon Zhao <zhaoshenglong@huawei.com>
Message-id: 1527816987-16108-1-git-send-email-zhaoshenglong@huawei.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit 910e204841954b95c051b2ee49ab0f5c735ff93c)
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
hw/intc/arm_gicv3_common.c | 79 ++++++++++++++++++++++++++++++
hw/intc/arm_gicv3_kvm.c | 38 ++++++++++++++
include/hw/intc/arm_gicv3_common.h | 1 +
3 files changed, 118 insertions(+)
diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
index 7b54d52376..864b7c6515 100644
--- a/hw/intc/arm_gicv3_common.c
+++ b/hw/intc/arm_gicv3_common.c
@@ -27,6 +27,7 @@
#include "hw/intc/arm_gicv3_common.h"
#include "gicv3_internal.h"
#include "hw/arm/linux-boot-if.h"
+#include "sysemu/kvm.h"
static int gicv3_pre_save(void *opaque)
{
@@ -141,6 +142,79 @@ static const VMStateDescription vmstate_gicv3_cpu = {
}
};
+static int gicv3_gicd_no_migration_shift_bug_pre_load(void *opaque)
+{
+ GICv3State *cs = opaque;
+
+ /*
+ * The gicd_no_migration_shift_bug flag is used for migration compatibility
+ * for old version QEMU which may have the GICD bmp shift bug under KVM mode.
+ * Strictly, what we want to know is whether the migration source is using
+ * KVM. Since we don't have any way to determine that, we look at whether the
+ * destination is using KVM; this is close enough because for the older QEMU
+ * versions with this bug KVM -> TCG migration didn't work anyway. If the
+ * source is a newer QEMU without this bug it will transmit the migration
+ * subsection which sets the flag to true; otherwise it will remain set to
+ * the value we select here.
+ */
+ if (kvm_enabled()) {
+ cs->gicd_no_migration_shift_bug = false;
+ }
+
+ return 0;
+}
+
+static int gicv3_gicd_no_migration_shift_bug_post_load(void *opaque,
+ int version_id)
+{
+ GICv3State *cs = opaque;
+
+ if (cs->gicd_no_migration_shift_bug) {
+ return 0;
+ }
+
+ /* Older versions of QEMU had a bug in the handling of state save/restore
+ * to the KVM GICv3: they got the offset in the bitmap arrays wrong,
+ * so that instead of the data for external interrupts 32 and up
+ * starting at bit position 32 in the bitmap, it started at bit
+ * position 64. If we're receiving data from a QEMU with that bug,
+ * we must move the data down into the right place.
+ */
+ memmove(cs->group, (uint8_t *)cs->group + GIC_INTERNAL / 8,
+ sizeof(cs->group) - GIC_INTERNAL / 8);
+ memmove(cs->grpmod, (uint8_t *)cs->grpmod + GIC_INTERNAL / 8,
+ sizeof(cs->grpmod) - GIC_INTERNAL / 8);
+ memmove(cs->enabled, (uint8_t *)cs->enabled + GIC_INTERNAL / 8,
+ sizeof(cs->enabled) - GIC_INTERNAL / 8);
+ memmove(cs->pending, (uint8_t *)cs->pending + GIC_INTERNAL / 8,
+ sizeof(cs->pending) - GIC_INTERNAL / 8);
+ memmove(cs->active, (uint8_t *)cs->active + GIC_INTERNAL / 8,
+ sizeof(cs->active) - GIC_INTERNAL / 8);
+ memmove(cs->edge_trigger, (uint8_t *)cs->edge_trigger + GIC_INTERNAL / 8,
+ sizeof(cs->edge_trigger) - GIC_INTERNAL / 8);
+
+ /*
+ * While this new version QEMU doesn't have this kind of bug as we fix it,
+ * so it needs to set the flag to true to indicate that and it's necessary
+ * for next migration to work from this new version QEMU.
+ */
+ cs->gicd_no_migration_shift_bug = true;
+
+ return 0;
+}
+
+const VMStateDescription vmstate_gicv3_gicd_no_migration_shift_bug = {
+ .name = "arm_gicv3/gicd_no_migration_shift_bug",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .pre_load = gicv3_gicd_no_migration_shift_bug_pre_load,
+ .post_load = gicv3_gicd_no_migration_shift_bug_post_load,
+ .fields = (VMStateField[]) {
+ VMSTATE_BOOL(gicd_no_migration_shift_bug, GICv3State),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
static const VMStateDescription vmstate_gicv3 = {
.name = "arm_gicv3",
.version_id = 1,
@@ -165,6 +239,10 @@ static const VMStateDescription vmstate_gicv3 = {
VMSTATE_STRUCT_VARRAY_POINTER_UINT32(cpu, GICv3State, num_cpu,
vmstate_gicv3_cpu, GICv3CPUState),
VMSTATE_END_OF_LIST()
+ },
+ .subsections = (const VMStateDescription * []) {
+ &vmstate_gicv3_gicd_no_migration_shift_bug,
+ NULL
}
};
@@ -364,6 +442,7 @@ static void arm_gicv3_common_reset(DeviceState *dev)
gicv3_gicd_group_set(s, i);
}
}
+ s->gicd_no_migration_shift_bug = true;
}
static void arm_gic_common_linux_init(ARMLinuxBootIf *obj,
diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index 3536795501..81cbd16817 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -164,6 +164,14 @@ static void kvm_dist_get_edge_trigger(GICv3State *s, uint32_t offset,
uint32_t reg;
int irq;
+ /* For the KVM GICv3, affinity routing is always enabled, and the first 2
+ * GICD_ICFGR<n> registers are always RAZ/WI. The corresponding
+ * functionality is replaced by GICR_ICFGR<n>. It doesn't need to sync
+ * them. So it should increase the offset to skip GIC_INTERNAL irqs.
+ * This matches the for_each_dist_irq_reg() macro which also skips the
+ * first GIC_INTERNAL irqs.
+ */
+ offset += (GIC_INTERNAL * 2) / 8;
for_each_dist_irq_reg(irq, s->num_irq, 2) {
kvm_gicd_access(s, offset, ®, false);
reg = half_unshuffle32(reg >> 1);
@@ -181,6 +189,14 @@ static void kvm_dist_put_edge_trigger(GICv3State *s, uint32_t offset,
uint32_t reg;
int irq;
+ /* For the KVM GICv3, affinity routing is always enabled, and the first 2
+ * GICD_ICFGR<n> registers are always RAZ/WI. The corresponding
+ * functionality is replaced by GICR_ICFGR<n>. It doesn't need to sync
+ * them. So it should increase the offset to skip GIC_INTERNAL irqs.
+ * This matches the for_each_dist_irq_reg() macro which also skips the
+ * first GIC_INTERNAL irqs.
+ */
+ offset += (GIC_INTERNAL * 2) / 8;
for_each_dist_irq_reg(irq, s->num_irq, 2) {
reg = *gic_bmp_ptr32(bmp, irq);
if (irq % 32 != 0) {
@@ -222,6 +238,15 @@ static void kvm_dist_getbmp(GICv3State *s, uint32_t offset, uint32_t *bmp)
uint32_t reg;
int irq;
+ /* For the KVM GICv3, affinity routing is always enabled, and the
+ * GICD_IGROUPR0/GICD_IGRPMODR0/GICD_ISENABLER0/GICD_ISPENDR0/
+ * GICD_ISACTIVER0 registers are always RAZ/WI. The corresponding
+ * functionality is replaced by the GICR registers. It doesn't need to sync
+ * them. So it should increase the offset to skip GIC_INTERNAL irqs.
+ * This matches the for_each_dist_irq_reg() macro which also skips the
+ * first GIC_INTERNAL irqs.
+ */
+ offset += (GIC_INTERNAL * 1) / 8;
for_each_dist_irq_reg(irq, s->num_irq, 1) {
kvm_gicd_access(s, offset, ®, false);
*gic_bmp_ptr32(bmp, irq) = reg;
@@ -235,6 +260,19 @@ static void kvm_dist_putbmp(GICv3State *s, uint32_t offset,
uint32_t reg;
int irq;
+ /* For the KVM GICv3, affinity routing is always enabled, and the
+ * GICD_IGROUPR0/GICD_IGRPMODR0/GICD_ISENABLER0/GICD_ISPENDR0/
+ * GICD_ISACTIVER0 registers are always RAZ/WI. The corresponding
+ * functionality is replaced by the GICR registers. It doesn't need to sync
+ * them. So it should increase the offset and clroffset to skip GIC_INTERNAL
+ * irqs. This matches the for_each_dist_irq_reg() macro which also skips the
+ * first GIC_INTERNAL irqs.
+ */
+ offset += (GIC_INTERNAL * 1) / 8;
+ if (clroffset != 0) {
+ clroffset += (GIC_INTERNAL * 1) / 8;
+ }
+
for_each_dist_irq_reg(irq, s->num_irq, 1) {
/* If this bitmap is a set/clear register pair, first write to the
* clear-reg to clear all bits before using the set-reg to write
diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h
index bccdfe17c6..d75b49d558 100644
--- a/include/hw/intc/arm_gicv3_common.h
+++ b/include/hw/intc/arm_gicv3_common.h
@@ -217,6 +217,7 @@ struct GICv3State {
uint32_t revision;
bool security_extn;
bool irq_reset_nonsecure;
+ bool gicd_no_migration_shift_bug;
int dev_fd; /* kvm device fd if backed by kvm vgic support */
Error *migration_blocker;
--
2.17.1
^ permalink raw reply related
* [Qemu-devel] [PATCH 38/99] block: Make bdrv_is_writable() public
From: Michael Roth @ 2018-07-23 20:16 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-stable, Max Reitz
In-Reply-To: <20180723201748.25573-1-mdroth@linux.vnet.ibm.com>
From: Max Reitz <mreitz@redhat.com>
This is a useful function for the whole block layer, so make it public.
At the same time, users outside of block.c probably do not need to make
use of the reopen functionality, so rename the current function to
bdrv_is_writable_after_reopen() create a new bdrv_is_writable() function
that just passes NULL to it for the reopen queue.
Cc: qemu-stable@nongnu.org
Signed-off-by: Max Reitz <mreitz@redhat.com>
Message-id: 20180606193702.7113-2-mreitz@redhat.com
Reviewed-by: John Snow <jsnow@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
(cherry picked from commit cc022140972f8b6ac3973c12ccf9dd6b1d2fd200)
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
block.c | 17 ++++++++++++++---
include/block/block.h | 1 +
2 files changed, 15 insertions(+), 3 deletions(-)
diff --git a/block.c b/block.c
index a2caadf0a0..9af22179ce 100644
--- a/block.c
+++ b/block.c
@@ -1620,13 +1620,24 @@ static int bdrv_reopen_get_flags(BlockReopenQueue *q, BlockDriverState *bs)
/* Returns whether the image file can be written to after the reopen queue @q
* has been successfully applied, or right now if @q is NULL. */
-static bool bdrv_is_writable(BlockDriverState *bs, BlockReopenQueue *q)
+static bool bdrv_is_writable_after_reopen(BlockDriverState *bs,
+ BlockReopenQueue *q)
{
int flags = bdrv_reopen_get_flags(q, bs);
return (flags & (BDRV_O_RDWR | BDRV_O_INACTIVE)) == BDRV_O_RDWR;
}
+/*
+ * Return whether the BDS can be written to. This is not necessarily
+ * the same as !bdrv_is_read_only(bs), as inactivated images may not
+ * be written to but do not count as read-only images.
+ */
+bool bdrv_is_writable(BlockDriverState *bs)
+{
+ return bdrv_is_writable_after_reopen(bs, NULL);
+}
+
static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs,
BdrvChild *c, const BdrvChildRole *role,
BlockReopenQueue *reopen_queue,
@@ -1664,7 +1675,7 @@ static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q,
/* Write permissions never work with read-only images */
if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
- !bdrv_is_writable(bs, q))
+ !bdrv_is_writable_after_reopen(bs, q))
{
error_setg(errp, "Block node is read-only");
return -EPERM;
@@ -1956,7 +1967,7 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
&perm, &shared);
/* Format drivers may touch metadata even if the guest doesn't write */
- if (bdrv_is_writable(bs, reopen_queue)) {
+ if (bdrv_is_writable_after_reopen(bs, reopen_queue)) {
perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
}
diff --git a/include/block/block.h b/include/block/block.h
index cdec3639a3..68a667a742 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -400,6 +400,7 @@ bool bdrv_is_read_only(BlockDriverState *bs);
int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
bool ignore_allow_rdw, Error **errp);
int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp);
+bool bdrv_is_writable(BlockDriverState *bs);
bool bdrv_is_sg(BlockDriverState *bs);
bool bdrv_is_inserted(BlockDriverState *bs);
void bdrv_lock_medium(BlockDriverState *bs, bool locked);
--
2.17.1
^ permalink raw reply related
* [Qemu-devel] [PATCH 35/99] Fix libusb-1.0.22 deprecated libusb_set_debug with libusb_set_option
From: Michael Roth @ 2018-07-23 20:16 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-stable, John Thomson, Gerd Hoffmann
In-Reply-To: <20180723201748.25573-1-mdroth@linux.vnet.ibm.com>
From: John Thomson <git@johnthomson.fastmail.com.au>
libusb-1.0.22 marked libusb_set_debug deprecated
it is replaced with
libusb_set_option(libusb_context, LIBUSB_OPTION_LOG_LEVEL, libusb_log_level);
details here: https://github.com/libusb/libusb/commit/539f22e2fd916558d11ab9a66f10f461c5593168
Warning here:
CC hw/usb/host-libusb.o
/builds/xen/src/qemu-xen/hw/usb/host-libusb.c: In function 'usb_host_init':
/builds/xen/src/qemu-xen/hw/usb/host-libusb.c:250:5: error: 'libusb_set_debug' is deprecated: Use libusb_set_option instead [-Werror=deprecated-declarations]
libusb_set_debug(ctx, loglevel);
^~~~~~~~~~~~~~~~
In file included from /builds/xen/src/qemu-xen/hw/usb/host-libusb.c:40:0:
/usr/include/libusb-1.0/libusb.h:1300:18: note: declared here
void LIBUSB_CALL libusb_set_debug(libusb_context *ctx, int level);
^~~~~~~~~~~~~~~~
cc1: all warnings being treated as errors
make: *** [/builds/xen/src/qemu-xen/rules.mak:66: hw/usb/host-libusb.o] Error 1
make: Leaving directory '/builds/xen/src/xen/tools/qemu-xen-build'
Signed-off-by: John Thomson <git@johnthomson.fastmail.com.au>
Message-id: 20180405132046.4968-1-git@johnthomson.fastmail.com.au
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
(cherry picked from commit 9d8fa0df49af16a208fa961c2968fba4daffcc07)
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
hw/usb/host-libusb.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c
index 1b0be071cc..dc0a8fe295 100644
--- a/hw/usb/host-libusb.c
+++ b/hw/usb/host-libusb.c
@@ -247,7 +247,11 @@ static int usb_host_init(void)
if (rc != 0) {
return -1;
}
+#if LIBUSB_API_VERSION >= 0x01000106
+ libusb_set_option(ctx, LIBUSB_OPTION_LOG_LEVEL, loglevel);
+#else
libusb_set_debug(ctx, loglevel);
+#endif
#ifdef CONFIG_WIN32
/* FIXME: add support for Windows. */
#else
--
2.17.1
^ permalink raw reply related
* [Qemu-devel] [PATCH 36/99] ahci: fix PxCI register race
From: Michael Roth @ 2018-07-23 20:16 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-stable, John Snow
In-Reply-To: <20180723201748.25573-1-mdroth@linux.vnet.ibm.com>
From: John Snow <jsnow@redhat.com>
Fixes: https://bugs.launchpad.net/qemu/+bug/1769189
AHCI presently signals completion prior to the PxCI register being
cleared to indicate completion. If a guest driver attempts to issue
a new command in its IRQ handler, it might be surprised to learn there
is still a command pending.
In the case of Windows 10's boot driver, it will actually poll the IRQ
register hoping to find out when the command is done running -- which
will never happen, as there isn't a command running.
Fix this: clear PxCI in ahci_cmd_done and not in the asynchronous BH.
Because it now runs synchronously, we don't need to check if the command
is actually done by spying on the ATA registers. We know it's done.
CC: qemu-stable <qemu-stable@nongnu.org>
Reported-by: François Guerraz <kubrick@fgv6.net>
Tested-by: Bruce Rogers <brogers@suse.com>
Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 20180531004323.4611-3-jsnow@redhat.com
Signed-off-by: John Snow <jsnow@redhat.com>
(cherry picked from commit 5694c7eacce6b263ad7497cc1bb76aad746cfd4e)
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
hw/ide/ahci.c | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index e22d7be05f..18b9a9c18b 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -532,13 +532,6 @@ static void ahci_check_cmd_bh(void *opaque)
qemu_bh_delete(ad->check_bh);
ad->check_bh = NULL;
- if ((ad->busy_slot != -1) &&
- !(ad->port.ifs[0].status & (BUSY_STAT|DRQ_STAT))) {
- /* no longer busy */
- ad->port_regs.cmd_issue &= ~(1 << ad->busy_slot);
- ad->busy_slot = -1;
- }
-
check_cmd(ad->hba, ad->port_no);
}
@@ -1425,6 +1418,12 @@ static void ahci_cmd_done(IDEDMA *dma)
trace_ahci_cmd_done(ad->hba, ad->port_no);
+ /* no longer busy */
+ if (ad->busy_slot != -1) {
+ ad->port_regs.cmd_issue &= ~(1 << ad->busy_slot);
+ ad->busy_slot = -1;
+ }
+
/* update d2h status */
ahci_write_fis_d2h(ad);
--
2.17.1
^ permalink raw reply related
* [Qemu-devel] [PATCH 33/99] intel-iommu: rework the page walk logic
From: Michael Roth @ 2018-07-23 20:16 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-stable, Peter Xu, Michael S . Tsirkin
In-Reply-To: <20180723201748.25573-1-mdroth@linux.vnet.ibm.com>
From: Peter Xu <peterx@redhat.com>
This patch fixes a potential small window that the DMA page table might
be incomplete or invalid when the guest sends domain/context
invalidations to a device. This can cause random DMA errors for
assigned devices.
This is a major change to the VT-d shadow page walking logic. It
includes but is not limited to:
- For each VTDAddressSpace, now we maintain what IOVA ranges we have
mapped and what we have not. With that information, now we only send
MAP or UNMAP when necessary. Say, we don't send MAP notifies if we
know we have already mapped the range, meanwhile we don't send UNMAP
notifies if we know we never mapped the range at all.
- Introduce vtd_sync_shadow_page_table[_range] APIs so that we can call
in any places to resync the shadow page table for a device.
- When we receive domain/context invalidation, we should not really run
the replay logic, instead we use the new sync shadow page table API to
resync the whole shadow page table without unmapping the whole
region. After this change, we'll only do the page walk once for each
domain invalidations (before this, it can be multiple, depending on
number of notifiers per address space).
While at it, the page walking logic is also refactored to be simpler.
CC: QEMU Stable <qemu-stable@nongnu.org>
Reported-by: Jintack Lim <jintack@cs.columbia.edu>
Tested-by: Jintack Lim <jintack@cs.columbia.edu>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit 63b88968f139b6a77f2f81e6f1eedf70c0170a85)
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
hw/i386/intel_iommu.c | 213 +++++++++++++++++++++++++---------
hw/i386/trace-events | 3 +-
include/hw/i386/intel_iommu.h | 2 +
3 files changed, 159 insertions(+), 59 deletions(-)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 61bb3d31e7..b5a09b7908 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -769,10 +769,77 @@ typedef struct {
static int vtd_page_walk_one(IOMMUTLBEntry *entry, vtd_page_walk_info *info)
{
+ VTDAddressSpace *as = info->as;
vtd_page_walk_hook hook_fn = info->hook_fn;
void *private = info->private;
+ DMAMap target = {
+ .iova = entry->iova,
+ .size = entry->addr_mask,
+ .translated_addr = entry->translated_addr,
+ .perm = entry->perm,
+ };
+ DMAMap *mapped = iova_tree_find(as->iova_tree, &target);
+
+ if (entry->perm == IOMMU_NONE && !info->notify_unmap) {
+ trace_vtd_page_walk_one_skip_unmap(entry->iova, entry->addr_mask);
+ return 0;
+ }
assert(hook_fn);
+
+ /* Update local IOVA mapped ranges */
+ if (entry->perm) {
+ if (mapped) {
+ /* If it's exactly the same translation, skip */
+ if (!memcmp(mapped, &target, sizeof(target))) {
+ trace_vtd_page_walk_one_skip_map(entry->iova, entry->addr_mask,
+ entry->translated_addr);
+ return 0;
+ } else {
+ /*
+ * Translation changed. Normally this should not
+ * happen, but it can happen when with buggy guest
+ * OSes. Note that there will be a small window that
+ * we don't have map at all. But that's the best
+ * effort we can do. The ideal way to emulate this is
+ * atomically modify the PTE to follow what has
+ * changed, but we can't. One example is that vfio
+ * driver only has VFIO_IOMMU_[UN]MAP_DMA but no
+ * interface to modify a mapping (meanwhile it seems
+ * meaningless to even provide one). Anyway, let's
+ * mark this as a TODO in case one day we'll have
+ * a better solution.
+ */
+ IOMMUAccessFlags cache_perm = entry->perm;
+ int ret;
+
+ /* Emulate an UNMAP */
+ entry->perm = IOMMU_NONE;
+ trace_vtd_page_walk_one(info->domain_id,
+ entry->iova,
+ entry->translated_addr,
+ entry->addr_mask,
+ entry->perm);
+ ret = hook_fn(entry, private);
+ if (ret) {
+ return ret;
+ }
+ /* Drop any existing mapping */
+ iova_tree_remove(as->iova_tree, &target);
+ /* Recover the correct permission */
+ entry->perm = cache_perm;
+ }
+ }
+ iova_tree_insert(as->iova_tree, &target);
+ } else {
+ if (!mapped) {
+ /* Skip since we didn't map this range at all */
+ trace_vtd_page_walk_one_skip_unmap(entry->iova, entry->addr_mask);
+ return 0;
+ }
+ iova_tree_remove(as->iova_tree, &target);
+ }
+
trace_vtd_page_walk_one(info->domain_id, entry->iova,
entry->translated_addr, entry->addr_mask,
entry->perm);
@@ -834,45 +901,34 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
*/
entry_valid = read_cur | write_cur;
- entry.target_as = &address_space_memory;
- entry.iova = iova & subpage_mask;
- entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur);
- entry.addr_mask = ~subpage_mask;
-
- if (vtd_is_last_slpte(slpte, level)) {
- /* NOTE: this is only meaningful if entry_valid == true */
- entry.translated_addr = vtd_get_slpte_addr(slpte, info->aw);
- if (!entry_valid && !info->notify_unmap) {
- trace_vtd_page_walk_skip_perm(iova, iova_next);
- goto next;
- }
- ret = vtd_page_walk_one(&entry, info);
- if (ret < 0) {
- return ret;
- }
- } else {
- if (!entry_valid) {
- if (info->notify_unmap) {
- /*
- * The whole entry is invalid; unmap it all.
- * Translated address is meaningless, zero it.
- */
- entry.translated_addr = 0x0;
- ret = vtd_page_walk_one(&entry, info);
- if (ret < 0) {
- return ret;
- }
- } else {
- trace_vtd_page_walk_skip_perm(iova, iova_next);
- }
- goto next;
- }
+ if (!vtd_is_last_slpte(slpte, level) && entry_valid) {
+ /*
+ * This is a valid PDE (or even bigger than PDE). We need
+ * to walk one further level.
+ */
ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte, info->aw),
iova, MIN(iova_next, end), level - 1,
read_cur, write_cur, info);
- if (ret < 0) {
- return ret;
- }
+ } else {
+ /*
+ * This means we are either:
+ *
+ * (1) the real page entry (either 4K page, or huge page)
+ * (2) the whole range is invalid
+ *
+ * In either case, we send an IOTLB notification down.
+ */
+ entry.target_as = &address_space_memory;
+ entry.iova = iova & subpage_mask;
+ entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur);
+ entry.addr_mask = ~subpage_mask;
+ /* NOTE: this is only meaningful if entry_valid == true */
+ entry.translated_addr = vtd_get_slpte_addr(slpte, info->aw);
+ ret = vtd_page_walk_one(&entry, info);
+ }
+
+ if (ret < 0) {
+ return ret;
}
next:
@@ -964,6 +1020,58 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
return 0;
}
+static int vtd_sync_shadow_page_hook(IOMMUTLBEntry *entry,
+ void *private)
+{
+ memory_region_notify_iommu((IOMMUMemoryRegion *)private, *entry);
+ return 0;
+}
+
+/* If context entry is NULL, we'll try to fetch it on our own. */
+static int vtd_sync_shadow_page_table_range(VTDAddressSpace *vtd_as,
+ VTDContextEntry *ce,
+ hwaddr addr, hwaddr size)
+{
+ IntelIOMMUState *s = vtd_as->iommu_state;
+ vtd_page_walk_info info = {
+ .hook_fn = vtd_sync_shadow_page_hook,
+ .private = (void *)&vtd_as->iommu,
+ .notify_unmap = true,
+ .aw = s->aw_bits,
+ .as = vtd_as,
+ };
+ VTDContextEntry ce_cache;
+ int ret;
+
+ if (ce) {
+ /* If the caller provided context entry, use it */
+ ce_cache = *ce;
+ } else {
+ /* If the caller didn't provide ce, try to fetch */
+ ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
+ vtd_as->devfn, &ce_cache);
+ if (ret) {
+ /*
+ * This should not really happen, but in case it happens,
+ * we just skip the sync for this time. After all we even
+ * don't have the root table pointer!
+ */
+ trace_vtd_err("Detected invalid context entry when "
+ "trying to sync shadow page table");
+ return 0;
+ }
+ }
+
+ info.domain_id = VTD_CONTEXT_ENTRY_DID(ce_cache.hi);
+
+ return vtd_page_walk(&ce_cache, addr, addr + size, &info);
+}
+
+static int vtd_sync_shadow_page_table(VTDAddressSpace *vtd_as)
+{
+ return vtd_sync_shadow_page_table_range(vtd_as, NULL, 0, UINT64_MAX);
+}
+
/*
* Fetch translation type for specific device. Returns <0 if error
* happens, otherwise return the shifted type to check against
@@ -1296,7 +1404,7 @@ static void vtd_iommu_replay_all(IntelIOMMUState *s)
VTDAddressSpace *vtd_as;
QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
- memory_region_iommu_replay_all(&vtd_as->iommu);
+ vtd_sync_shadow_page_table(vtd_as);
}
}
@@ -1371,14 +1479,13 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
vtd_switch_address_space(vtd_as);
/*
* So a device is moving out of (or moving into) a
- * domain, a replay() suites here to notify all the
- * IOMMU_NOTIFIER_MAP registers about this change.
+ * domain, resync the shadow page table.
* This won't bring bad even if we have no such
* notifier registered - the IOMMU notification
* framework will skip MAP notifications if that
* happened.
*/
- memory_region_iommu_replay_all(&vtd_as->iommu);
+ vtd_sync_shadow_page_table(vtd_as);
}
}
}
@@ -1436,18 +1543,11 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
vtd_as->devfn, &ce) &&
domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) {
- memory_region_iommu_replay_all(&vtd_as->iommu);
+ vtd_sync_shadow_page_table(vtd_as);
}
}
}
-static int vtd_page_invalidate_notify_hook(IOMMUTLBEntry *entry,
- void *private)
-{
- memory_region_notify_iommu((IOMMUMemoryRegion *)private, *entry);
- return 0;
-}
-
static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
uint16_t domain_id, hwaddr addr,
uint8_t am)
@@ -1462,21 +1562,12 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
vtd_as->devfn, &ce);
if (!ret && domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) {
if (vtd_as_has_map_notifier(vtd_as)) {
- vtd_page_walk_info info = {
- .hook_fn = vtd_page_invalidate_notify_hook,
- .private = (void *)&vtd_as->iommu,
- .notify_unmap = true,
- .aw = s->aw_bits,
- .as = vtd_as,
- .domain_id = domain_id,
- };
-
/*
* As long as we have MAP notifications registered in
* any of our IOMMU notifiers, we need to sync the
* shadow page table.
*/
- vtd_page_walk(&ce, addr, addr + size, &info);
+ vtd_sync_shadow_page_table_range(vtd_as, &ce, addr, size);
} else {
/*
* For UNMAP-only notifiers, we don't need to walk the
@@ -2806,6 +2897,7 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
vtd_dev_as->devfn = (uint8_t)devfn;
vtd_dev_as->iommu_state = s;
vtd_dev_as->context_cache_entry.context_cache_gen = 0;
+ vtd_dev_as->iova_tree = iova_tree_new();
/*
* Memory region relationships looks like (Address range shows
@@ -2858,6 +2950,7 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
hwaddr start = n->start;
hwaddr end = n->end;
IntelIOMMUState *s = as->iommu_state;
+ DMAMap map;
/*
* Note: all the codes in this function has a assumption that IOVA
@@ -2902,6 +2995,10 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
VTD_PCI_FUNC(as->devfn),
entry.iova, size);
+ map.iova = entry.iova;
+ map.size = entry.addr_mask;
+ iova_tree_remove(as->iova_tree, &map);
+
memory_region_notify_one(n, &entry);
}
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index ca23ba9fad..e14d06ec83 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -40,8 +40,9 @@ vtd_replay_ce_valid(uint8_t bus, uint8_t dev, uint8_t fn, uint16_t domain, uint6
vtd_replay_ce_invalid(uint8_t bus, uint8_t dev, uint8_t fn) "replay invalid context device %02"PRIx8":%02"PRIx8".%02"PRIx8
vtd_page_walk_level(uint64_t addr, uint32_t level, uint64_t start, uint64_t end) "walk (base=0x%"PRIx64", level=%"PRIu32") iova range 0x%"PRIx64" - 0x%"PRIx64
vtd_page_walk_one(uint16_t domain, uint64_t iova, uint64_t gpa, uint64_t mask, int perm) "domain 0x%"PRIu16" iova 0x%"PRIx64" -> gpa 0x%"PRIx64" mask 0x%"PRIx64" perm %d"
+vtd_page_walk_one_skip_map(uint64_t iova, uint64_t mask, uint64_t translated) "iova 0x%"PRIx64" mask 0x%"PRIx64" translated 0x%"PRIx64
+vtd_page_walk_one_skip_unmap(uint64_t iova, uint64_t mask) "iova 0x%"PRIx64" mask 0x%"PRIx64
vtd_page_walk_skip_read(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to unable to read"
-vtd_page_walk_skip_perm(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to perm empty"
vtd_page_walk_skip_reserve(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to rsrv set"
vtd_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)"
vtd_as_unmap_whole(uint8_t bus, uint8_t slot, uint8_t fn, uint64_t iova, uint64_t size) "Device %02x:%02x.%x start 0x%"PRIx64" size 0x%"PRIx64
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 156f35e919..fbfedcb1c0 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -27,6 +27,7 @@
#include "hw/i386/ioapic.h"
#include "hw/pci/msi.h"
#include "hw/sysbus.h"
+#include "qemu/iova-tree.h"
#define TYPE_INTEL_IOMMU_DEVICE "intel-iommu"
#define INTEL_IOMMU_DEVICE(obj) \
@@ -95,6 +96,7 @@ struct VTDAddressSpace {
QLIST_ENTRY(VTDAddressSpace) next;
/* Superset of notifier flags that this address space has */
IOMMUNotifierFlag notifier_flags;
+ IOVATree *iova_tree; /* Traces mapped IOVA ranges */
};
struct VTDBus {
--
2.17.1
^ permalink raw reply related
* [Qemu-devel] [PATCH 34/99] arm_gicv3_kvm: increase clroffset accordingly
From: Michael Roth @ 2018-07-23 20:16 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-stable, Shannon Zhao, Peter Maydell
In-Reply-To: <20180723201748.25573-1-mdroth@linux.vnet.ibm.com>
From: Shannon Zhao <zhaoshenglong@huawei.com>
It forgot to increase clroffset during the loop. So it only clear the
first 4 bytes.
Fixes: 367b9f527becdd20ddf116e17a3c0c2bbc486920
Cc: qemu-stable@nongnu.org
Signed-off-by: Shannon Zhao <zhaoshenglong@huawei.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Message-id: 1527047633-12368-1-git-send-email-zhaoshenglong@huawei.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit 34ffacae085914fce54590ea84bae9c6ad95e2a4)
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
hw/intc/arm_gicv3_kvm.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index ec371772b3..3536795501 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -243,6 +243,7 @@ static void kvm_dist_putbmp(GICv3State *s, uint32_t offset,
if (clroffset != 0) {
reg = 0;
kvm_gicd_access(s, clroffset, ®, true);
+ clroffset += 4;
}
reg = *gic_bmp_ptr32(bmp, irq);
kvm_gicd_access(s, offset, ®, true);
--
2.17.1
^ permalink raw reply related
* [Qemu-devel] [PATCH 32/99] util: implement simple iova tree
From: Michael Roth @ 2018-07-23 20:16 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-stable, Peter Xu, Michael S . Tsirkin
In-Reply-To: <20180723201748.25573-1-mdroth@linux.vnet.ibm.com>
From: Peter Xu <peterx@redhat.com>
Introduce a simplest iova tree implementation based on GTree.
CC: QEMU Stable <qemu-stable@nongnu.org>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit eecf5eedbdc0fc04f39abcf3afeedfbf21b25ca4)
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
MAINTAINERS | 6 ++
include/qemu/iova-tree.h | 134 +++++++++++++++++++++++++++++++++++++++
util/Makefile.objs | 1 +
util/iova-tree.c | 114 +++++++++++++++++++++++++++++++++
4 files changed, 255 insertions(+)
create mode 100644 include/qemu/iova-tree.h
create mode 100644 util/iova-tree.c
diff --git a/MAINTAINERS b/MAINTAINERS
index 24b70169bc..ada7c33485 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1781,6 +1781,12 @@ F: include/sysemu/replay.h
F: docs/replay.txt
F: stubs/replay.c
+IOVA Tree
+M: Peter Xu <peterx@redhat.com>
+S: Maintained
+F: include/qemu/iova-tree.h
+F: util/iova-tree.c
+
Usermode Emulation
------------------
Overall
diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h
new file mode 100644
index 0000000000..b061932097
--- /dev/null
+++ b/include/qemu/iova-tree.h
@@ -0,0 +1,134 @@
+/*
+ * An very simplified iova tree implementation based on GTree.
+ *
+ * Copyright 2018 Red Hat, Inc.
+ *
+ * Authors:
+ * Peter Xu <peterx@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ */
+#ifndef IOVA_TREE_H
+#define IOVA_TREE_H
+
+/*
+ * Currently the iova tree will only allow to keep ranges
+ * information, and no extra user data is allowed for each element. A
+ * benefit is that we can merge adjacent ranges internally within the
+ * tree. It can save a lot of memory when the ranges are splitted but
+ * mostly continuous.
+ *
+ * Note that current implementation does not provide any thread
+ * protections. Callers of the iova tree should be responsible
+ * for the thread safety issue.
+ */
+
+#include "qemu/osdep.h"
+#include "exec/memory.h"
+#include "exec/hwaddr.h"
+
+#define IOVA_OK (0)
+#define IOVA_ERR_INVALID (-1) /* Invalid parameters */
+#define IOVA_ERR_OVERLAP (-2) /* IOVA range overlapped */
+
+typedef struct IOVATree IOVATree;
+typedef struct DMAMap {
+ hwaddr iova;
+ hwaddr translated_addr;
+ hwaddr size; /* Inclusive */
+ IOMMUAccessFlags perm;
+} QEMU_PACKED DMAMap;
+typedef gboolean (*iova_tree_iterator)(DMAMap *map);
+
+/**
+ * iova_tree_new:
+ *
+ * Create a new iova tree.
+ *
+ * Returns: the tree pointer when succeeded, or NULL if error.
+ */
+IOVATree *iova_tree_new(void);
+
+/**
+ * iova_tree_insert:
+ *
+ * @tree: the iova tree to insert
+ * @map: the mapping to insert
+ *
+ * Insert an iova range to the tree. If there is overlapped
+ * ranges, IOVA_ERR_OVERLAP will be returned.
+ *
+ * Return: 0 if succeeded, or <0 if error.
+ */
+int iova_tree_insert(IOVATree *tree, DMAMap *map);
+
+/**
+ * iova_tree_remove:
+ *
+ * @tree: the iova tree to remove range from
+ * @map: the map range to remove
+ *
+ * Remove mappings from the tree that are covered by the map range
+ * provided. The range does not need to be exactly what has inserted,
+ * all the mappings that are included in the provided range will be
+ * removed from the tree. Here map->translated_addr is meaningless.
+ *
+ * Return: 0 if succeeded, or <0 if error.
+ */
+int iova_tree_remove(IOVATree *tree, DMAMap *map);
+
+/**
+ * iova_tree_find:
+ *
+ * @tree: the iova tree to search from
+ * @map: the mapping to search
+ *
+ * Search for a mapping in the iova tree that overlaps with the
+ * mapping range specified. Only the first found mapping will be
+ * returned.
+ *
+ * Return: DMAMap pointer if found, or NULL if not found. Note that
+ * the returned DMAMap pointer is maintained internally. User should
+ * only read the content but never modify or free the content. Also,
+ * user is responsible to make sure the pointer is valid (say, no
+ * concurrent deletion in progress).
+ */
+DMAMap *iova_tree_find(IOVATree *tree, DMAMap *map);
+
+/**
+ * iova_tree_find_address:
+ *
+ * @tree: the iova tree to search from
+ * @iova: the iova address to find
+ *
+ * Similar to iova_tree_find(), but it tries to find mapping with
+ * range iova=iova & size=0.
+ *
+ * Return: same as iova_tree_find().
+ */
+DMAMap *iova_tree_find_address(IOVATree *tree, hwaddr iova);
+
+/**
+ * iova_tree_foreach:
+ *
+ * @tree: the iova tree to iterate on
+ * @iterator: the interator for the mappings, return true to stop
+ *
+ * Iterate over the iova tree.
+ *
+ * Return: 1 if found any overlap, 0 if not, <0 if error.
+ */
+void iova_tree_foreach(IOVATree *tree, iova_tree_iterator iterator);
+
+/**
+ * iova_tree_destroy:
+ *
+ * @tree: the iova tree to destroy
+ *
+ * Destroy an existing iova tree.
+ *
+ * Return: None.
+ */
+void iova_tree_destroy(IOVATree *tree);
+
+#endif
diff --git a/util/Makefile.objs b/util/Makefile.objs
index 728c3541db..e1c3fed4dc 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -47,4 +47,5 @@ util-obj-y += qht.o
util-obj-y += range.o
util-obj-y += stats64.o
util-obj-y += systemd.o
+util-obj-y += iova-tree.o
util-obj-$(CONFIG_LINUX) += vfio-helpers.o
diff --git a/util/iova-tree.c b/util/iova-tree.c
new file mode 100644
index 0000000000..2d9cebfc89
--- /dev/null
+++ b/util/iova-tree.c
@@ -0,0 +1,114 @@
+/*
+ * IOVA tree implementation based on GTree.
+ *
+ * Copyright 2018 Red Hat, Inc.
+ *
+ * Authors:
+ * Peter Xu <peterx@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ */
+
+#include <glib.h>
+#include "qemu/iova-tree.h"
+
+struct IOVATree {
+ GTree *tree;
+};
+
+static int iova_tree_compare(gconstpointer a, gconstpointer b, gpointer data)
+{
+ const DMAMap *m1 = a, *m2 = b;
+
+ if (m1->iova > m2->iova + m2->size) {
+ return 1;
+ }
+
+ if (m1->iova + m1->size < m2->iova) {
+ return -1;
+ }
+
+ /* Overlapped */
+ return 0;
+}
+
+IOVATree *iova_tree_new(void)
+{
+ IOVATree *iova_tree = g_new0(IOVATree, 1);
+
+ /* We don't have values actually, no need to free */
+ iova_tree->tree = g_tree_new_full(iova_tree_compare, NULL, g_free, NULL);
+
+ return iova_tree;
+}
+
+DMAMap *iova_tree_find(IOVATree *tree, DMAMap *map)
+{
+ return g_tree_lookup(tree->tree, map);
+}
+
+DMAMap *iova_tree_find_address(IOVATree *tree, hwaddr iova)
+{
+ DMAMap map = { .iova = iova, .size = 0 };
+
+ return iova_tree_find(tree, &map);
+}
+
+static inline void iova_tree_insert_internal(GTree *gtree, DMAMap *range)
+{
+ /* Key and value are sharing the same range data */
+ g_tree_insert(gtree, range, range);
+}
+
+int iova_tree_insert(IOVATree *tree, DMAMap *map)
+{
+ DMAMap *new;
+
+ if (map->iova + map->size < map->iova || map->perm == IOMMU_NONE) {
+ return IOVA_ERR_INVALID;
+ }
+
+ /* We don't allow to insert range that overlaps with existings */
+ if (iova_tree_find(tree, map)) {
+ return IOVA_ERR_OVERLAP;
+ }
+
+ new = g_new0(DMAMap, 1);
+ memcpy(new, map, sizeof(*new));
+ iova_tree_insert_internal(tree->tree, new);
+
+ return IOVA_OK;
+}
+
+static gboolean iova_tree_traverse(gpointer key, gpointer value,
+ gpointer data)
+{
+ iova_tree_iterator iterator = data;
+ DMAMap *map = key;
+
+ g_assert(key == value);
+
+ return iterator(map);
+}
+
+void iova_tree_foreach(IOVATree *tree, iova_tree_iterator iterator)
+{
+ g_tree_foreach(tree->tree, iova_tree_traverse, iterator);
+}
+
+int iova_tree_remove(IOVATree *tree, DMAMap *map)
+{
+ DMAMap *overlap;
+
+ while ((overlap = iova_tree_find(tree, map))) {
+ g_tree_remove(tree->tree, overlap);
+ }
+
+ return IOVA_OK;
+}
+
+void iova_tree_destroy(IOVATree *tree)
+{
+ g_tree_destroy(tree->tree);
+ g_free(tree);
+}
--
2.17.1
^ permalink raw reply related
* [Qemu-devel] [PATCH 31/99] intel-iommu: trace domain id during page walk
From: Michael Roth @ 2018-07-23 20:16 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-stable, Peter Xu, Michael S . Tsirkin
In-Reply-To: <20180723201748.25573-1-mdroth@linux.vnet.ibm.com>
From: Peter Xu <peterx@redhat.com>
This patch only modifies the trace points.
Previously we were tracing page walk levels. They are redundant since
we have page mask (size) already. Now we trace something much more
useful which is the domain ID of the page walking. That can be very
useful when we trace more than one devices on the same system, so that
we can know which map is for which domain.
CC: QEMU Stable <qemu-stable@nongnu.org>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit d118c06ebbee2d23ddf873cae4a809311aa61310)
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
hw/i386/intel_iommu.c | 16 ++++++++++------
hw/i386/trace-events | 2 +-
2 files changed, 11 insertions(+), 7 deletions(-)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index a882894f49..61bb3d31e7 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -756,6 +756,7 @@ typedef int (*vtd_page_walk_hook)(IOMMUTLBEntry *entry, void *private);
* @notify_unmap: whether we should notify invalid entries
* @as: VT-d address space of the device
* @aw: maximum address width
+ * @domain: domain ID of the page walk
*/
typedef struct {
VTDAddressSpace *as;
@@ -763,17 +764,18 @@ typedef struct {
void *private;
bool notify_unmap;
uint8_t aw;
+ uint16_t domain_id;
} vtd_page_walk_info;
-static int vtd_page_walk_one(IOMMUTLBEntry *entry, int level,
- vtd_page_walk_info *info)
+static int vtd_page_walk_one(IOMMUTLBEntry *entry, vtd_page_walk_info *info)
{
vtd_page_walk_hook hook_fn = info->hook_fn;
void *private = info->private;
assert(hook_fn);
- trace_vtd_page_walk_one(level, entry->iova, entry->translated_addr,
- entry->addr_mask, entry->perm);
+ trace_vtd_page_walk_one(info->domain_id, entry->iova,
+ entry->translated_addr, entry->addr_mask,
+ entry->perm);
return hook_fn(entry, private);
}
@@ -844,7 +846,7 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
trace_vtd_page_walk_skip_perm(iova, iova_next);
goto next;
}
- ret = vtd_page_walk_one(&entry, level, info);
+ ret = vtd_page_walk_one(&entry, info);
if (ret < 0) {
return ret;
}
@@ -856,7 +858,7 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
* Translated address is meaningless, zero it.
*/
entry.translated_addr = 0x0;
- ret = vtd_page_walk_one(&entry, level, info);
+ ret = vtd_page_walk_one(&entry, info);
if (ret < 0) {
return ret;
}
@@ -1466,6 +1468,7 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
.notify_unmap = true,
.aw = s->aw_bits,
.as = vtd_as,
+ .domain_id = domain_id,
};
/*
@@ -2947,6 +2950,7 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
.notify_unmap = false,
.aw = s->aw_bits,
.as = vtd_as,
+ .domain_id = VTD_CONTEXT_ENTRY_DID(ce.hi),
};
vtd_page_walk(&ce, 0, ~0ULL, &info);
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index 22d44648af..ca23ba9fad 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -39,7 +39,7 @@ vtd_fault_disabled(void) "Fault processing disabled for context entry"
vtd_replay_ce_valid(uint8_t bus, uint8_t dev, uint8_t fn, uint16_t domain, uint64_t hi, uint64_t lo) "replay valid context device %02"PRIx8":%02"PRIx8".%02"PRIx8" domain 0x%"PRIx16" hi 0x%"PRIx64" lo 0x%"PRIx64
vtd_replay_ce_invalid(uint8_t bus, uint8_t dev, uint8_t fn) "replay invalid context device %02"PRIx8":%02"PRIx8".%02"PRIx8
vtd_page_walk_level(uint64_t addr, uint32_t level, uint64_t start, uint64_t end) "walk (base=0x%"PRIx64", level=%"PRIu32") iova range 0x%"PRIx64" - 0x%"PRIx64
-vtd_page_walk_one(uint32_t level, uint64_t iova, uint64_t gpa, uint64_t mask, int perm) "detected page level 0x%"PRIx32" iova 0x%"PRIx64" -> gpa 0x%"PRIx64" mask 0x%"PRIx64" perm %d"
+vtd_page_walk_one(uint16_t domain, uint64_t iova, uint64_t gpa, uint64_t mask, int perm) "domain 0x%"PRIu16" iova 0x%"PRIx64" -> gpa 0x%"PRIx64" mask 0x%"PRIx64" perm %d"
vtd_page_walk_skip_read(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to unable to read"
vtd_page_walk_skip_perm(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to perm empty"
vtd_page_walk_skip_reserve(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to rsrv set"
--
2.17.1
^ permalink raw reply related
* ✗ Fi.CI.CHECKPATCH: warning for drm/i915: Skip repeated calls to i915_gem_set_wedged() (rev4)
From: Patchwork @ 2018-07-23 20:19 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
In-Reply-To: <20180723145335.24579-1-chris@chris-wilson.co.uk>
== Series Details ==
Series: drm/i915: Skip repeated calls to i915_gem_set_wedged() (rev4)
URL : https://patchwork.freedesktop.org/series/47067/
State : warning
== Summary ==
$ dim checkpatch origin/drm-tip
a85ab6631205 drm/i915: Skip repeated calls to i915_gem_set_wedged()
-:61: WARNING:MEMORY_BARRIER: memory barrier without comment
#61: FILE: drivers/gpu/drm/i915/i915_gem.c:3389:
+ smp_mb__before_atomic();
total: 0 errors, 1 warnings, 0 checks, 74 lines checked
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply
* [Qemu-devel] [PATCH 29/99] intel-iommu: introduce vtd_page_walk_info
From: Michael Roth @ 2018-07-23 20:16 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-stable, Peter Xu, Michael S . Tsirkin
In-Reply-To: <20180723201748.25573-1-mdroth@linux.vnet.ibm.com>
From: Peter Xu <peterx@redhat.com>
During the recursive page walking of IOVA page tables, some stack
variables are constant variables and never changed during the whole page
walking procedure. Isolate them into a struct so that we don't need to
pass those contants down the stack every time and multiple times.
CC: QEMU Stable <qemu-stable@nongnu.org>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit fe215b0cbb8c1f4b4af0a64aa5c02042080dd537)
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
hw/i386/intel_iommu.c | 84 ++++++++++++++++++++++++++-----------------
1 file changed, 52 insertions(+), 32 deletions(-)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 38ccc741f9..e247269659 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -748,9 +748,27 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
typedef int (*vtd_page_walk_hook)(IOMMUTLBEntry *entry, void *private);
+/**
+ * Constant information used during page walking
+ *
+ * @hook_fn: hook func to be called when detected page
+ * @private: private data to be passed into hook func
+ * @notify_unmap: whether we should notify invalid entries
+ * @aw: maximum address width
+ */
+typedef struct {
+ vtd_page_walk_hook hook_fn;
+ void *private;
+ bool notify_unmap;
+ uint8_t aw;
+} vtd_page_walk_info;
+
static int vtd_page_walk_one(IOMMUTLBEntry *entry, int level,
- vtd_page_walk_hook hook_fn, void *private)
+ vtd_page_walk_info *info)
{
+ vtd_page_walk_hook hook_fn = info->hook_fn;
+ void *private = info->private;
+
assert(hook_fn);
trace_vtd_page_walk_one(level, entry->iova, entry->translated_addr,
entry->addr_mask, entry->perm);
@@ -763,17 +781,13 @@ static int vtd_page_walk_one(IOMMUTLBEntry *entry, int level,
* @addr: base GPA addr to start the walk
* @start: IOVA range start address
* @end: IOVA range end address (start <= addr < end)
- * @hook_fn: hook func to be called when detected page
- * @private: private data to be passed into hook func
* @read: whether parent level has read permission
* @write: whether parent level has write permission
- * @notify_unmap: whether we should notify invalid entries
- * @aw: maximum address width
+ * @info: constant information for the page walk
*/
static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
- uint64_t end, vtd_page_walk_hook hook_fn,
- void *private, uint32_t level, bool read,
- bool write, bool notify_unmap, uint8_t aw)
+ uint64_t end, uint32_t level, bool read,
+ bool write, vtd_page_walk_info *info)
{
bool read_cur, write_cur, entry_valid;
uint32_t offset;
@@ -823,24 +837,24 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
if (vtd_is_last_slpte(slpte, level)) {
/* NOTE: this is only meaningful if entry_valid == true */
- entry.translated_addr = vtd_get_slpte_addr(slpte, aw);
- if (!entry_valid && !notify_unmap) {
+ entry.translated_addr = vtd_get_slpte_addr(slpte, info->aw);
+ if (!entry_valid && !info->notify_unmap) {
trace_vtd_page_walk_skip_perm(iova, iova_next);
goto next;
}
- ret = vtd_page_walk_one(&entry, level, hook_fn, private);
+ ret = vtd_page_walk_one(&entry, level, info);
if (ret < 0) {
return ret;
}
} else {
if (!entry_valid) {
- if (notify_unmap) {
+ if (info->notify_unmap) {
/*
* The whole entry is invalid; unmap it all.
* Translated address is meaningless, zero it.
*/
entry.translated_addr = 0x0;
- ret = vtd_page_walk_one(&entry, level, hook_fn, private);
+ ret = vtd_page_walk_one(&entry, level, info);
if (ret < 0) {
return ret;
}
@@ -849,10 +863,9 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
}
goto next;
}
- ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte, aw), iova,
- MIN(iova_next, end), hook_fn, private,
- level - 1, read_cur, write_cur,
- notify_unmap, aw);
+ ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte, info->aw),
+ iova, MIN(iova_next, end), level - 1,
+ read_cur, write_cur, info);
if (ret < 0) {
return ret;
}
@@ -871,28 +884,24 @@ next:
* @ce: context entry to walk upon
* @start: IOVA address to start the walk
* @end: IOVA range end address (start <= addr < end)
- * @hook_fn: the hook that to be called for each detected area
- * @private: private data for the hook function
- * @aw: maximum address width
+ * @info: page walking information struct
*/
static int vtd_page_walk(VTDContextEntry *ce, uint64_t start, uint64_t end,
- vtd_page_walk_hook hook_fn, void *private,
- bool notify_unmap, uint8_t aw)
+ vtd_page_walk_info *info)
{
dma_addr_t addr = vtd_ce_get_slpt_base(ce);
uint32_t level = vtd_ce_get_level(ce);
- if (!vtd_iova_range_check(start, ce, aw)) {
+ if (!vtd_iova_range_check(start, ce, info->aw)) {
return -VTD_FR_ADDR_BEYOND_MGAW;
}
- if (!vtd_iova_range_check(end, ce, aw)) {
+ if (!vtd_iova_range_check(end, ce, info->aw)) {
/* Fix end so that it reaches the maximum */
- end = vtd_iova_limit(ce, aw);
+ end = vtd_iova_limit(ce, info->aw);
}
- return vtd_page_walk_level(addr, start, end, hook_fn, private,
- level, true, true, notify_unmap, aw);
+ return vtd_page_walk_level(addr, start, end, level, true, true, info);
}
/* Map a device to its corresponding domain (context-entry) */
@@ -1449,14 +1458,19 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
vtd_as->devfn, &ce);
if (!ret && domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) {
if (vtd_as_has_map_notifier(vtd_as)) {
+ vtd_page_walk_info info = {
+ .hook_fn = vtd_page_invalidate_notify_hook,
+ .private = (void *)&vtd_as->iommu,
+ .notify_unmap = true,
+ .aw = s->aw_bits,
+ };
+
/*
* As long as we have MAP notifications registered in
* any of our IOMMU notifiers, we need to sync the
* shadow page table.
*/
- vtd_page_walk(&ce, addr, addr + size,
- vtd_page_invalidate_notify_hook,
- (void *)&vtd_as->iommu, true, s->aw_bits);
+ vtd_page_walk(&ce, addr, addr + size, &info);
} else {
/*
* For UNMAP-only notifiers, we don't need to walk the
@@ -2924,8 +2938,14 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
ce.hi, ce.lo);
if (vtd_as_has_map_notifier(vtd_as)) {
/* This is required only for MAP typed notifiers */
- vtd_page_walk(&ce, 0, ~0ULL, vtd_replay_hook, (void *)n, false,
- s->aw_bits);
+ vtd_page_walk_info info = {
+ .hook_fn = vtd_replay_hook,
+ .private = (void *)n,
+ .notify_unmap = false,
+ .aw = s->aw_bits,
+ };
+
+ vtd_page_walk(&ce, 0, ~0ULL, &info);
}
} else {
trace_vtd_replay_ce_invalid(bus_n, PCI_SLOT(vtd_as->devfn),
--
2.17.1
^ permalink raw reply related
* [Qemu-devel] [PATCH 30/99] intel-iommu: pass in address space when page walk
From: Michael Roth @ 2018-07-23 20:16 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-stable, Peter Xu, Michael S . Tsirkin
In-Reply-To: <20180723201748.25573-1-mdroth@linux.vnet.ibm.com>
From: Peter Xu <peterx@redhat.com>
We pass in the VTDAddressSpace too. It'll be used in the follow up
patches.
CC: QEMU Stable <qemu-stable@nongnu.org>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit 2f764fa87d2a81812b313dd6d998e10126292653)
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
hw/i386/intel_iommu.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index e247269659..a882894f49 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -754,9 +754,11 @@ typedef int (*vtd_page_walk_hook)(IOMMUTLBEntry *entry, void *private);
* @hook_fn: hook func to be called when detected page
* @private: private data to be passed into hook func
* @notify_unmap: whether we should notify invalid entries
+ * @as: VT-d address space of the device
* @aw: maximum address width
*/
typedef struct {
+ VTDAddressSpace *as;
vtd_page_walk_hook hook_fn;
void *private;
bool notify_unmap;
@@ -1463,6 +1465,7 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
.private = (void *)&vtd_as->iommu,
.notify_unmap = true,
.aw = s->aw_bits,
+ .as = vtd_as,
};
/*
@@ -2943,6 +2946,7 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
.private = (void *)n,
.notify_unmap = false,
.aw = s->aw_bits,
+ .as = vtd_as,
};
vtd_page_walk(&ce, 0, ~0ULL, &info);
--
2.17.1
^ permalink raw reply related
* [Qemu-devel] [PATCH 02/99] device_tree: Increase FDT_MAX_SIZE to 1 MiB
From: Michael Roth @ 2018-07-23 20:16 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-stable, Geert Uytterhoeven, Peter Maydell
In-Reply-To: <20180723201748.25573-1-mdroth@linux.vnet.ibm.com>
From: Geert Uytterhoeven <geert+renesas@glider.be>
It is not uncommon for a contemporary FDT to be larger than 64 KiB,
leading to failures loading the device tree from sysfs:
qemu-system-aarch64: qemu_fdt_setprop: Couldn't set ...: FDT_ERR_NOSPACE
Hence increase the limit to 1 MiB, like on PPC.
For reference, the largest arm64 DTB created from the Linux sources is
ca. 75 KiB large (100 KiB when built with symbols/fixup support).
Cc: qemu-stable@nongnu.org
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Message-id: 1523541337-23919-1-git-send-email-geert+renesas@glider.be
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit 14ec3cbd7c1e31dca4d23f028100c8f43e156573)
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
device_tree.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/device_tree.c b/device_tree.c
index 19458b32bf..52c3358a55 100644
--- a/device_tree.c
+++ b/device_tree.c
@@ -29,7 +29,7 @@
#include <libfdt.h>
-#define FDT_MAX_SIZE 0x10000
+#define FDT_MAX_SIZE 0x100000
void *create_device_tree(int *sizep)
{
--
2.17.1
^ permalink raw reply related
* [Qemu-devel] [PATCH 28/99] intel-iommu: only do page walk for MAP notifiers
From: Michael Roth @ 2018-07-23 20:16 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-stable, Peter Xu, Michael S . Tsirkin
In-Reply-To: <20180723201748.25573-1-mdroth@linux.vnet.ibm.com>
From: Peter Xu <peterx@redhat.com>
For UNMAP-only IOMMU notifiers, we don't need to walk the page tables.
Fasten that procedure by skipping the page table walk. That should
boost performance for UNMAP-only notifiers like vhost.
CC: QEMU Stable <qemu-stable@nongnu.org>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit 4f8a62a933a79094e44bc1b16b63bb23e62d67b4)
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
hw/i386/intel_iommu.c | 44 +++++++++++++++++++++++++++++++----
include/hw/i386/intel_iommu.h | 2 ++
2 files changed, 41 insertions(+), 5 deletions(-)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 8d4069de9f..38ccc741f9 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -138,6 +138,12 @@ static inline void vtd_iommu_unlock(IntelIOMMUState *s)
qemu_mutex_unlock(&s->iommu_lock);
}
+/* Whether the address space needs to notify new mappings */
+static inline gboolean vtd_as_has_map_notifier(VTDAddressSpace *as)
+{
+ return as->notifier_flags & IOMMU_NOTIFIER_MAP;
+}
+
/* GHashTable functions */
static gboolean vtd_uint64_equal(gconstpointer v1, gconstpointer v2)
{
@@ -1436,14 +1442,36 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
VTDAddressSpace *vtd_as;
VTDContextEntry ce;
int ret;
+ hwaddr size = (1 << am) * VTD_PAGE_SIZE;
QLIST_FOREACH(vtd_as, &(s->vtd_as_with_notifiers), next) {
ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
vtd_as->devfn, &ce);
if (!ret && domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) {
- vtd_page_walk(&ce, addr, addr + (1 << am) * VTD_PAGE_SIZE,
- vtd_page_invalidate_notify_hook,
- (void *)&vtd_as->iommu, true, s->aw_bits);
+ if (vtd_as_has_map_notifier(vtd_as)) {
+ /*
+ * As long as we have MAP notifications registered in
+ * any of our IOMMU notifiers, we need to sync the
+ * shadow page table.
+ */
+ vtd_page_walk(&ce, addr, addr + size,
+ vtd_page_invalidate_notify_hook,
+ (void *)&vtd_as->iommu, true, s->aw_bits);
+ } else {
+ /*
+ * For UNMAP-only notifiers, we don't need to walk the
+ * page tables. We just deliver the PSI down to
+ * invalidate caches.
+ */
+ IOMMUTLBEntry entry = {
+ .target_as = &address_space_memory,
+ .iova = addr,
+ .translated_addr = 0,
+ .addr_mask = size - 1,
+ .perm = IOMMU_NONE,
+ };
+ memory_region_notify_iommu(&vtd_as->iommu, entry);
+ }
}
}
}
@@ -2383,6 +2411,9 @@ static void vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
exit(1);
}
+ /* Update per-address-space notifier flags */
+ vtd_as->notifier_flags = new;
+
if (old == IOMMU_NOTIFIER_NONE) {
QLIST_INSERT_HEAD(&s->vtd_as_with_notifiers, vtd_as, next);
} else if (new == IOMMU_NOTIFIER_NONE) {
@@ -2891,8 +2922,11 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
PCI_FUNC(vtd_as->devfn),
VTD_CONTEXT_ENTRY_DID(ce.hi),
ce.hi, ce.lo);
- vtd_page_walk(&ce, 0, ~0ULL, vtd_replay_hook, (void *)n, false,
- s->aw_bits);
+ if (vtd_as_has_map_notifier(vtd_as)) {
+ /* This is required only for MAP typed notifiers */
+ vtd_page_walk(&ce, 0, ~0ULL, vtd_replay_hook, (void *)n, false,
+ s->aw_bits);
+ }
} else {
trace_vtd_replay_ce_invalid(bus_n, PCI_SLOT(vtd_as->devfn),
PCI_FUNC(vtd_as->devfn));
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 016e74bedb..156f35e919 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -93,6 +93,8 @@ struct VTDAddressSpace {
IntelIOMMUState *iommu_state;
VTDContextCacheEntry context_cache_entry;
QLIST_ENTRY(VTDAddressSpace) next;
+ /* Superset of notifier flags that this address space has */
+ IOMMUNotifierFlag notifier_flags;
};
struct VTDBus {
--
2.17.1
^ permalink raw reply related
* [Qemu-devel] [PATCH 27/99] intel-iommu: add iommu lock
From: Michael Roth @ 2018-07-23 20:16 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-stable, Peter Xu, Michael S . Tsirkin
In-Reply-To: <20180723201748.25573-1-mdroth@linux.vnet.ibm.com>
From: Peter Xu <peterx@redhat.com>
SECURITY IMPLICATION: this patch fixes a potential race when multiple
threads access the IOMMU IOTLB cache.
Add a per-iommu big lock to protect IOMMU status. Currently the only
thing to be protected is the IOTLB/context cache, since that can be
accessed even without BQL, e.g., in IO dataplane.
Note that we don't need to protect device page tables since that's fully
controlled by the guest kernel. However there is still possibility that
malicious drivers will program the device to not obey the rule. In that
case QEMU can't really do anything useful, instead the guest itself will
be responsible for all uncertainties.
CC: QEMU Stable <qemu-stable@nongnu.org>
Reported-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit 1d9efa73e12ddf361ea997c2d532cc4afa6674d1)
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
hw/i386/intel_iommu.c | 56 +++++++++++++++++++++++++++++------
include/hw/i386/intel_iommu.h | 6 ++++
2 files changed, 53 insertions(+), 9 deletions(-)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 3df90457f8..8d4069de9f 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -128,6 +128,16 @@ static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr addr,
return new_val;
}
+static inline void vtd_iommu_lock(IntelIOMMUState *s)
+{
+ qemu_mutex_lock(&s->iommu_lock);
+}
+
+static inline void vtd_iommu_unlock(IntelIOMMUState *s)
+{
+ qemu_mutex_unlock(&s->iommu_lock);
+}
+
/* GHashTable functions */
static gboolean vtd_uint64_equal(gconstpointer v1, gconstpointer v2)
{
@@ -172,9 +182,9 @@ static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
}
/* Reset all the gen of VTDAddressSpace to zero and set the gen of
- * IntelIOMMUState to 1.
+ * IntelIOMMUState to 1. Must be called with IOMMU lock held.
*/
-static void vtd_reset_context_cache(IntelIOMMUState *s)
+static void vtd_reset_context_cache_locked(IntelIOMMUState *s)
{
VTDAddressSpace *vtd_as;
VTDBus *vtd_bus;
@@ -197,12 +207,20 @@ static void vtd_reset_context_cache(IntelIOMMUState *s)
s->context_cache_gen = 1;
}
-static void vtd_reset_iotlb(IntelIOMMUState *s)
+/* Must be called with IOMMU lock held. */
+static void vtd_reset_iotlb_locked(IntelIOMMUState *s)
{
assert(s->iotlb);
g_hash_table_remove_all(s->iotlb);
}
+static void vtd_reset_iotlb(IntelIOMMUState *s)
+{
+ vtd_iommu_lock(s);
+ vtd_reset_iotlb_locked(s);
+ vtd_iommu_unlock(s);
+}
+
static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint16_t source_id,
uint32_t level)
{
@@ -215,6 +233,7 @@ static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level)
return (addr & vtd_slpt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K;
}
+/* Must be called with IOMMU lock held */
static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id,
hwaddr addr)
{
@@ -235,6 +254,7 @@ out:
return entry;
}
+/* Must be with IOMMU lock held */
static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
uint16_t domain_id, hwaddr addr, uint64_t slpte,
uint8_t access_flags, uint32_t level)
@@ -246,7 +266,7 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
trace_vtd_iotlb_page_update(source_id, addr, slpte, domain_id);
if (g_hash_table_size(s->iotlb) >= VTD_IOTLB_MAX_SIZE) {
trace_vtd_iotlb_reset("iotlb exceeds size limit");
- vtd_reset_iotlb(s);
+ vtd_reset_iotlb_locked(s);
}
entry->gfn = gfn;
@@ -1106,7 +1126,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
IntelIOMMUState *s = vtd_as->iommu_state;
VTDContextEntry ce;
uint8_t bus_num = pci_bus_num(bus);
- VTDContextCacheEntry *cc_entry = &vtd_as->context_cache_entry;
+ VTDContextCacheEntry *cc_entry;
uint64_t slpte, page_mask;
uint32_t level;
uint16_t source_id = vtd_make_source_id(bus_num, devfn);
@@ -1123,6 +1143,10 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
*/
assert(!vtd_is_interrupt_addr(addr));
+ vtd_iommu_lock(s);
+
+ cc_entry = &vtd_as->context_cache_entry;
+
/* Try to fetch slpte form IOTLB */
iotlb_entry = vtd_lookup_iotlb(s, source_id, addr);
if (iotlb_entry) {
@@ -1182,7 +1206,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
* IOMMU region can be swapped back.
*/
vtd_pt_enable_fast_path(s, source_id);
-
+ vtd_iommu_unlock(s);
return true;
}
@@ -1203,6 +1227,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
vtd_update_iotlb(s, source_id, VTD_CONTEXT_ENTRY_DID(ce.hi), addr, slpte,
access_flags, level);
out:
+ vtd_iommu_unlock(s);
entry->iova = addr & page_mask;
entry->translated_addr = vtd_get_slpte_addr(slpte, s->aw_bits) & page_mask;
entry->addr_mask = ~page_mask;
@@ -1210,6 +1235,7 @@ out:
return true;
error:
+ vtd_iommu_unlock(s);
entry->iova = 0;
entry->translated_addr = 0;
entry->addr_mask = 0;
@@ -1258,10 +1284,13 @@ static void vtd_iommu_replay_all(IntelIOMMUState *s)
static void vtd_context_global_invalidate(IntelIOMMUState *s)
{
trace_vtd_inv_desc_cc_global();
+ /* Protects context cache */
+ vtd_iommu_lock(s);
s->context_cache_gen++;
if (s->context_cache_gen == VTD_CONTEXT_CACHE_GEN_MAX) {
- vtd_reset_context_cache(s);
+ vtd_reset_context_cache_locked(s);
}
+ vtd_iommu_unlock(s);
vtd_switch_address_space_all(s);
/*
* From VT-d spec 6.5.2.1, a global context entry invalidation
@@ -1313,7 +1342,9 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
if (vtd_as && ((devfn_it & mask) == (devfn & mask))) {
trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it),
VTD_PCI_FUNC(devfn_it));
+ vtd_iommu_lock(s);
vtd_as->context_cache_entry.context_cache_gen = 0;
+ vtd_iommu_unlock(s);
/*
* Do switch address space when needed, in case if the
* device passthrough bit is switched.
@@ -1377,8 +1408,10 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
trace_vtd_inv_desc_iotlb_domain(domain_id);
+ vtd_iommu_lock(s);
g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain,
&domain_id);
+ vtd_iommu_unlock(s);
QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
@@ -1426,7 +1459,9 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
info.domain_id = domain_id;
info.addr = addr;
info.mask = ~((1 << am) - 1);
+ vtd_iommu_lock(s);
g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info);
+ vtd_iommu_unlock(s);
vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am);
}
@@ -2929,8 +2964,10 @@ static void vtd_init(IntelIOMMUState *s)
s->cap |= VTD_CAP_CM;
}
- vtd_reset_context_cache(s);
- vtd_reset_iotlb(s);
+ vtd_iommu_lock(s);
+ vtd_reset_context_cache_locked(s);
+ vtd_reset_iotlb_locked(s);
+ vtd_iommu_unlock(s);
/* Define registers with default values and bit semantics */
vtd_define_long(s, DMAR_VER_REG, 0x10UL, 0, 0);
@@ -3070,6 +3107,7 @@ static void vtd_realize(DeviceState *dev, Error **errp)
}
QLIST_INIT(&s->vtd_as_with_notifiers);
+ qemu_mutex_init(&s->iommu_lock);
memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
"intel_iommu", DMAR_REG_SIZE);
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 032e33bcb2..016e74bedb 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -300,6 +300,12 @@ struct IntelIOMMUState {
OnOffAuto intr_eim; /* Toggle for EIM cabability */
bool buggy_eim; /* Force buggy EIM unless eim=off */
uint8_t aw_bits; /* Host/IOVA address width (in bits) */
+
+ /*
+ * Protects IOMMU states in general. Currently it protects the
+ * per-IOMMU IOTLB cache, and context entry cache in VTDAddressSpace.
+ */
+ QemuMutex iommu_lock;
};
/* Find the VTD Address space associated with the given bus pointer,
--
2.17.1
^ permalink raw reply related
* [Qemu-devel] [PATCH 26/99] intel-iommu: remove IntelIOMMUNotifierNode
From: Michael Roth @ 2018-07-23 20:16 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-stable, Peter Xu, Michael S . Tsirkin
In-Reply-To: <20180723201748.25573-1-mdroth@linux.vnet.ibm.com>
From: Peter Xu <peterx@redhat.com>
That is not really necessary. Removing that node struct and put the
list entry directly into VTDAddressSpace. It simplfies the code a lot.
Since at it, rename the old notifiers_list into vtd_as_with_notifiers.
CC: QEMU Stable <qemu-stable@nongnu.org>
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit b4a4ba0d68f50f218ee3957b6638dbee32a5eeef)
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
hw/i386/intel_iommu.c | 41 ++++++++++-------------------------
include/hw/i386/intel_iommu.h | 9 ++------
2 files changed, 13 insertions(+), 37 deletions(-)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index b359efd6f9..3df90457f8 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -1248,10 +1248,10 @@ static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s)
static void vtd_iommu_replay_all(IntelIOMMUState *s)
{
- IntelIOMMUNotifierNode *node;
+ VTDAddressSpace *vtd_as;
- QLIST_FOREACH(node, &s->notifiers_list, next) {
- memory_region_iommu_replay_all(&node->vtd_as->iommu);
+ QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
+ memory_region_iommu_replay_all(&vtd_as->iommu);
}
}
@@ -1372,7 +1372,6 @@ static void vtd_iotlb_global_invalidate(IntelIOMMUState *s)
static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
{
- IntelIOMMUNotifierNode *node;
VTDContextEntry ce;
VTDAddressSpace *vtd_as;
@@ -1381,8 +1380,7 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain,
&domain_id);
- QLIST_FOREACH(node, &s->notifiers_list, next) {
- vtd_as = node->vtd_as;
+ QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
vtd_as->devfn, &ce) &&
domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) {
@@ -1402,12 +1400,11 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
uint16_t domain_id, hwaddr addr,
uint8_t am)
{
- IntelIOMMUNotifierNode *node;
+ VTDAddressSpace *vtd_as;
VTDContextEntry ce;
int ret;
- QLIST_FOREACH(node, &(s->notifiers_list), next) {
- VTDAddressSpace *vtd_as = node->vtd_as;
+ QLIST_FOREACH(vtd_as, &(s->vtd_as_with_notifiers), next) {
ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
vtd_as->devfn, &ce);
if (!ret && domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) {
@@ -2344,8 +2341,6 @@ static void vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
{
VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
IntelIOMMUState *s = vtd_as->iommu_state;
- IntelIOMMUNotifierNode *node = NULL;
- IntelIOMMUNotifierNode *next_node = NULL;
if (!s->caching_mode && new & IOMMU_NOTIFIER_MAP) {
error_report("We need to set caching-mode=1 for intel-iommu to enable "
@@ -2354,21 +2349,9 @@ static void vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
}
if (old == IOMMU_NOTIFIER_NONE) {
- node = g_malloc0(sizeof(*node));
- node->vtd_as = vtd_as;
- QLIST_INSERT_HEAD(&s->notifiers_list, node, next);
- return;
- }
-
- /* update notifier node with new flags */
- QLIST_FOREACH_SAFE(node, &s->notifiers_list, next, next_node) {
- if (node->vtd_as == vtd_as) {
- if (new == IOMMU_NOTIFIER_NONE) {
- QLIST_REMOVE(node, next);
- g_free(node);
- }
- return;
- }
+ QLIST_INSERT_HEAD(&s->vtd_as_with_notifiers, vtd_as, next);
+ } else if (new == IOMMU_NOTIFIER_NONE) {
+ QLIST_REMOVE(vtd_as, next);
}
}
@@ -2838,12 +2821,10 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
static void vtd_address_space_unmap_all(IntelIOMMUState *s)
{
- IntelIOMMUNotifierNode *node;
VTDAddressSpace *vtd_as;
IOMMUNotifier *n;
- QLIST_FOREACH(node, &s->notifiers_list, next) {
- vtd_as = node->vtd_as;
+ QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
IOMMU_NOTIFIER_FOREACH(n, &vtd_as->iommu) {
vtd_address_space_unmap(vtd_as, n);
}
@@ -3088,7 +3069,7 @@ static void vtd_realize(DeviceState *dev, Error **errp)
return;
}
- QLIST_INIT(&s->notifiers_list);
+ QLIST_INIT(&s->vtd_as_with_notifiers);
memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
"intel_iommu", DMAR_REG_SIZE);
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 45ec8919b6..032e33bcb2 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -67,7 +67,6 @@ typedef union VTD_IR_TableEntry VTD_IR_TableEntry;
typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress;
typedef struct VTDIrq VTDIrq;
typedef struct VTD_MSIMessage VTD_MSIMessage;
-typedef struct IntelIOMMUNotifierNode IntelIOMMUNotifierNode;
/* Context-Entry */
struct VTDContextEntry {
@@ -93,6 +92,7 @@ struct VTDAddressSpace {
MemoryRegion iommu_ir; /* Interrupt region: 0xfeeXXXXX */
IntelIOMMUState *iommu_state;
VTDContextCacheEntry context_cache_entry;
+ QLIST_ENTRY(VTDAddressSpace) next;
};
struct VTDBus {
@@ -253,11 +253,6 @@ struct VTD_MSIMessage {
/* When IR is enabled, all MSI/MSI-X data bits should be zero */
#define VTD_IR_MSI_DATA (0)
-struct IntelIOMMUNotifierNode {
- VTDAddressSpace *vtd_as;
- QLIST_ENTRY(IntelIOMMUNotifierNode) next;
-};
-
/* The iommu (DMAR) device state struct */
struct IntelIOMMUState {
X86IOMMUState x86_iommu;
@@ -295,7 +290,7 @@ struct IntelIOMMUState {
GHashTable *vtd_as_by_busptr; /* VTDBus objects indexed by PCIBus* reference */
VTDBus *vtd_as_by_bus_num[VTD_PCI_BUS_MAX]; /* VTDBus objects indexed by bus number */
/* list of registered notifiers */
- QLIST_HEAD(, IntelIOMMUNotifierNode) notifiers_list;
+ QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
/* interrupt remapping */
bool intr_enabled; /* Whether guest enabled IR */
--
2.17.1
^ permalink raw reply related
* [Qemu-devel] [PATCH 25/99] intel-iommu: send PSI always even if across PDEs
From: Michael Roth @ 2018-07-23 20:16 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-stable, Peter Xu, Michael S . Tsirkin
In-Reply-To: <20180723201748.25573-1-mdroth@linux.vnet.ibm.com>
From: Peter Xu <peterx@redhat.com>
SECURITY IMPLICATION: without this patch, any guest with both assigned
device and a vIOMMU might encounter stale IO page mappings even if guest
has already unmapped the page, which may lead to guest memory
corruption. The stale mappings will only be limited to the guest's own
memory range, so it should not affect the host memory or other guests on
the host.
During IOVA page table walking, there is a special case when the PSI
covers one whole PDE (Page Directory Entry, which contains 512 Page
Table Entries) or more. In the past, we skip that entry and we don't
notify the IOMMU notifiers. This is not correct. We should send UNMAP
notification to registered UNMAP notifiers in this case.
For UNMAP only notifiers, this might cause IOTLBs cached in the devices
even if they were already invalid. For MAP/UNMAP notifiers like
vfio-pci, this will cause stale page mappings.
This special case doesn't trigger often, but it is very easy to be
triggered by nested device assignments, since in that case we'll
possibly map the whole L2 guest RAM region into the device's IOVA
address space (several GBs at least), which is far bigger than normal
kernel driver usages of the device (tens of MBs normally).
Without this patch applied to L1 QEMU, nested device assignment to L2
guests will dump some errors like:
qemu-system-x86_64: VFIO_MAP_DMA: -17
qemu-system-x86_64: vfio_dma_map(0x557305420c30, 0xad000, 0x1000,
0x7f89a920d000) = -17 (File exists)
CC: QEMU Stable <qemu-stable@nongnu.org>
Acked-by: Jason Wang <jasowang@redhat.com>
[peterx: rewrite the commit message]
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit 36d2d52bdb45f5b753a61fdaf0fe7891f1f5b61d)
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
hw/i386/intel_iommu.c | 42 ++++++++++++++++++++++++++++++------------
1 file changed, 30 insertions(+), 12 deletions(-)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index fb31de9416..b359efd6f9 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -722,6 +722,15 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
typedef int (*vtd_page_walk_hook)(IOMMUTLBEntry *entry, void *private);
+static int vtd_page_walk_one(IOMMUTLBEntry *entry, int level,
+ vtd_page_walk_hook hook_fn, void *private)
+{
+ assert(hook_fn);
+ trace_vtd_page_walk_one(level, entry->iova, entry->translated_addr,
+ entry->addr_mask, entry->perm);
+ return hook_fn(entry, private);
+}
+
/**
* vtd_page_walk_level - walk over specific level for IOVA range
*
@@ -781,28 +790,37 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
*/
entry_valid = read_cur | write_cur;
+ entry.target_as = &address_space_memory;
+ entry.iova = iova & subpage_mask;
+ entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur);
+ entry.addr_mask = ~subpage_mask;
+
if (vtd_is_last_slpte(slpte, level)) {
- entry.target_as = &address_space_memory;
- entry.iova = iova & subpage_mask;
/* NOTE: this is only meaningful if entry_valid == true */
entry.translated_addr = vtd_get_slpte_addr(slpte, aw);
- entry.addr_mask = ~subpage_mask;
- entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur);
if (!entry_valid && !notify_unmap) {
trace_vtd_page_walk_skip_perm(iova, iova_next);
goto next;
}
- trace_vtd_page_walk_one(level, entry.iova, entry.translated_addr,
- entry.addr_mask, entry.perm);
- if (hook_fn) {
- ret = hook_fn(&entry, private);
- if (ret < 0) {
- return ret;
- }
+ ret = vtd_page_walk_one(&entry, level, hook_fn, private);
+ if (ret < 0) {
+ return ret;
}
} else {
if (!entry_valid) {
- trace_vtd_page_walk_skip_perm(iova, iova_next);
+ if (notify_unmap) {
+ /*
+ * The whole entry is invalid; unmap it all.
+ * Translated address is meaningless, zero it.
+ */
+ entry.translated_addr = 0x0;
+ ret = vtd_page_walk_one(&entry, level, hook_fn, private);
+ if (ret < 0) {
+ return ret;
+ }
+ } else {
+ trace_vtd_page_walk_skip_perm(iova, iova_next);
+ }
goto next;
}
ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte, aw), iova,
--
2.17.1
^ permalink raw reply related
* [Qemu-devel] [PATCH 23/99] console: Avoid segfault in screendump
From: Michael Roth @ 2018-07-23 20:16 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-stable, Michal Privoznik, Gerd Hoffmann
In-Reply-To: <20180723201748.25573-1-mdroth@linux.vnet.ibm.com>
From: Michal Privoznik <mprivozn@redhat.com>
After f771c5440e04626f1 it is possible to select device and
head which to take screendump from. And even though we check if
provided head number falls within range, it may still happen that
the console has no surface yet leading to SIGSEGV:
qemu.git $ ./x86_64-softmmu/qemu-system-x86_64 \
-qmp stdio \
-device virtio-vga,id=video0,max_outputs=4
{"execute":"qmp_capabilities"}
{"execute":"screendump", "arguments":{"filename":"/tmp/screen.ppm", "device":"video0", "head":1}}
Segmentation fault
#0 0x00005628249dda88 in ppm_save (filename=0x56282826cbc0 "/tmp/screen.ppm", ds=0x0, errp=0x7fff52a6fae0) at ui/console.c:304
#1 0x00005628249ddd9b in qmp_screendump (filename=0x56282826cbc0 "/tmp/screen.ppm", has_device=true, device=0x5628276902d0 "video0", has_head=true, head=1, errp=0x7fff52a6fae0) at ui/console.c:375
#2 0x00005628247740df in qmp_marshal_screendump (args=0x562828265e00, ret=0x7fff52a6fb68, errp=0x7fff52a6fb60) at qapi/qapi-commands-ui.c:110
Here, @ds from frame #0 (or @surface from frame #1) is
dereferenced at the very beginning of ppm_save(). And because
it's NULL crash happens.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Message-id: cb05bb1909daa6ba62145c0194aafa05a14ed3d1.1526569138.git.mprivozn@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
(cherry picked from commit 08d9864fa4e0c616e076ca8b225d39a7ecb189af)
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
ui/console.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/ui/console.c b/ui/console.c
index 3fb2f4e09f..476bf901a4 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -370,6 +370,11 @@ void qmp_screendump(const char *filename, bool has_device, const char *device,
graphic_hw_update(con);
surface = qemu_console_surface(con);
+ if (!surface) {
+ error_setg(errp, "no surface");
+ return;
+ }
+
ppm_save(filename, surface, errp);
}
--
2.17.1
^ permalink raw reply related
* [Qemu-devel] [PATCH 24/99] hw/intc/arm_gicv3: Fix APxR<n> register dispatching
From: Michael Roth @ 2018-07-23 20:16 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-stable, Jan Kiszka, Peter Maydell
In-Reply-To: <20180723201748.25573-1-mdroth@linux.vnet.ibm.com>
From: Jan Kiszka <jan.kiszka@siemens.com>
There was a nasty flip in identifying which register group an access is
targeting. The issue caused spuriously raised priorities of the guest
when handing CPUs over in the Jailhouse hypervisor.
Cc: qemu-stable@nongnu.org
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Message-id: 28b927d3-da58-bce4-cc13-bfec7f9b1cb9@siemens.com
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit 887aae10f6150dfdc71c45d7588e8efe6c144019)
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
hw/intc/arm_gicv3_cpuif.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c
index 26f5eeda94..554be25cb7 100644
--- a/hw/intc/arm_gicv3_cpuif.c
+++ b/hw/intc/arm_gicv3_cpuif.c
@@ -431,7 +431,7 @@ static uint64_t icv_ap_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
GICv3CPUState *cs = icc_cs_from_env(env);
int regno = ri->opc2 & 3;
- int grp = ri->crm & 1 ? GICV3_G0 : GICV3_G1NS;
+ int grp = (ri->crm & 1) ? GICV3_G1NS : GICV3_G0;
uint64_t value = cs->ich_apr[grp][regno];
trace_gicv3_icv_ap_read(ri->crm & 1, regno, gicv3_redist_affid(cs), value);
@@ -443,7 +443,7 @@ static void icv_ap_write(CPUARMState *env, const ARMCPRegInfo *ri,
{
GICv3CPUState *cs = icc_cs_from_env(env);
int regno = ri->opc2 & 3;
- int grp = ri->crm & 1 ? GICV3_G0 : GICV3_G1NS;
+ int grp = (ri->crm & 1) ? GICV3_G1NS : GICV3_G0;
trace_gicv3_icv_ap_write(ri->crm & 1, regno, gicv3_redist_affid(cs), value);
@@ -1465,7 +1465,7 @@ static uint64_t icc_ap_read(CPUARMState *env, const ARMCPRegInfo *ri)
uint64_t value;
int regno = ri->opc2 & 3;
- int grp = ri->crm & 1 ? GICV3_G0 : GICV3_G1;
+ int grp = (ri->crm & 1) ? GICV3_G1 : GICV3_G0;
if (icv_access(env, grp == GICV3_G0 ? HCR_FMO : HCR_IMO)) {
return icv_ap_read(env, ri);
@@ -1487,7 +1487,7 @@ static void icc_ap_write(CPUARMState *env, const ARMCPRegInfo *ri,
GICv3CPUState *cs = icc_cs_from_env(env);
int regno = ri->opc2 & 3;
- int grp = ri->crm & 1 ? GICV3_G0 : GICV3_G1;
+ int grp = (ri->crm & 1) ? GICV3_G1 : GICV3_G0;
if (icv_access(env, grp == GICV3_G0 ? HCR_FMO : HCR_IMO)) {
icv_ap_write(env, ri, value);
@@ -2296,7 +2296,7 @@ static uint64_t ich_ap_read(CPUARMState *env, const ARMCPRegInfo *ri)
{
GICv3CPUState *cs = icc_cs_from_env(env);
int regno = ri->opc2 & 3;
- int grp = ri->crm & 1 ? GICV3_G0 : GICV3_G1NS;
+ int grp = (ri->crm & 1) ? GICV3_G1NS : GICV3_G0;
uint64_t value;
value = cs->ich_apr[grp][regno];
@@ -2309,7 +2309,7 @@ static void ich_ap_write(CPUARMState *env, const ARMCPRegInfo *ri,
{
GICv3CPUState *cs = icc_cs_from_env(env);
int regno = ri->opc2 & 3;
- int grp = ri->crm & 1 ? GICV3_G0 : GICV3_G1NS;
+ int grp = (ri->crm & 1) ? GICV3_G1NS : GICV3_G0;
trace_gicv3_ich_ap_write(ri->crm & 1, regno, gicv3_redist_affid(cs), value);
--
2.17.1
^ permalink raw reply related
* Re: [PATCH] mm/mempool: add missing parameter description
From: David Rientjes @ 2018-07-23 20:19 UTC (permalink / raw)
To: Mike Rapoport; +Cc: Andrew Morton, linux-mm, linux-kernel
In-Reply-To: <1532336274-26228-1-git-send-email-rppt@linux.vnet.ibm.com>
On Mon, 23 Jul 2018, Mike Rapoport wrote:
> The kernel-doc for mempool_init function is missing the description of the
> pool parameter. Add it.
>
> Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Acked-by: David Rientjes <rientjes@google.com>
My b.
^ permalink raw reply
* [Qemu-devel] [PATCH 22/99] s390x/ccw: make sure all ccw devices are properly reset
From: Michael Roth @ 2018-07-23 20:16 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-stable, Cornelia Huck
In-Reply-To: <20180723201748.25573-1-mdroth@linux.vnet.ibm.com>
From: Cornelia Huck <cohuck@redhat.com>
Thomas reported that the subchannel for a 3270 device that ended up
in a broken state (status pending even though not enabled) did not
get out of that state even after a reboot (which involves a subsytem
reset). The reason for this is that the 3270 device did not define
a reset handler.
Let's fix this by introducing a base reset handler (set up for all
ccw devices) that resets the subchannel and have virtio-ccw call
its virtio-specific reset procedure in addition to that.
CC: qemu-stable@nongnu.org
Reported-by: Thomas Huth <thuth@redhat.com>
Suggested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Tested-by: Thomas Huth <thuth@redhat.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Halil Pasic <pasic@linux.ibm.com>
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
(cherry picked from commit 838fb84f83c84f00d15b1bede5e080b495644458)
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
hw/s390x/ccw-device.c | 8 ++++++++
hw/s390x/virtio-ccw.c | 9 ++++++---
hw/s390x/virtio-ccw.h | 1 +
3 files changed, 15 insertions(+), 3 deletions(-)
diff --git a/hw/s390x/ccw-device.c b/hw/s390x/ccw-device.c
index f9bfa154d6..7cd73df4aa 100644
--- a/hw/s390x/ccw-device.c
+++ b/hw/s390x/ccw-device.c
@@ -40,6 +40,13 @@ static Property ccw_device_properties[] = {
DEFINE_PROP_END_OF_LIST(),
};
+static void ccw_device_reset(DeviceState *d)
+{
+ CcwDevice *ccw_dev = CCW_DEVICE(d);
+
+ css_reset_sch(ccw_dev->sch);
+}
+
static void ccw_device_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
@@ -48,6 +55,7 @@ static void ccw_device_class_init(ObjectClass *klass, void *data)
k->realize = ccw_device_realize;
k->refill_ids = ccw_device_refill_ids;
dc->props = ccw_device_properties;
+ dc->reset = ccw_device_reset;
}
const VMStateDescription vmstate_ccw_dev = {
diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index 40a33302a7..22df33b509 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -1058,10 +1058,12 @@ static void virtio_ccw_reset(DeviceState *d)
{
VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d);
VirtIODevice *vdev = virtio_bus_get_device(&dev->bus);
- CcwDevice *ccw_dev = CCW_DEVICE(d);
+ VirtIOCCWDeviceClass *vdc = VIRTIO_CCW_DEVICE_GET_CLASS(dev);
virtio_ccw_reset_virtio(dev, vdev);
- css_reset_sch(ccw_dev->sch);
+ if (vdc->parent_reset) {
+ vdc->parent_reset(d);
+ }
}
static void virtio_ccw_vmstate_change(DeviceState *d, bool running)
@@ -1715,12 +1717,13 @@ static void virtio_ccw_device_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
CCWDeviceClass *k = CCW_DEVICE_CLASS(dc);
+ VirtIOCCWDeviceClass *vdc = VIRTIO_CCW_DEVICE_CLASS(klass);
k->unplug = virtio_ccw_busdev_unplug;
dc->realize = virtio_ccw_busdev_realize;
dc->unrealize = virtio_ccw_busdev_unrealize;
dc->bus_type = TYPE_VIRTUAL_CSS_BUS;
- dc->reset = virtio_ccw_reset;
+ device_class_set_parent_reset(dc, virtio_ccw_reset, &vdc->parent_reset);
}
static const TypeInfo virtio_ccw_device_info = {
diff --git a/hw/s390x/virtio-ccw.h b/hw/s390x/virtio-ccw.h
index 2fc513001e..3453aa1f98 100644
--- a/hw/s390x/virtio-ccw.h
+++ b/hw/s390x/virtio-ccw.h
@@ -77,6 +77,7 @@ typedef struct VirtIOCCWDeviceClass {
CCWDeviceClass parent_class;
void (*realize)(VirtioCcwDevice *dev, Error **errp);
void (*unrealize)(VirtioCcwDevice *dev, Error **errp);
+ void (*parent_reset)(DeviceState *dev);
} VirtIOCCWDeviceClass;
/* Performance improves when virtqueue kick processing is decoupled from the
--
2.17.1
^ permalink raw reply related
* [Qemu-devel] [PATCH 21/99] virtio-ccw: common reset handler
From: Michael Roth @ 2018-07-23 20:16 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-stable, Cornelia Huck
In-Reply-To: <20180723201748.25573-1-mdroth@linux.vnet.ibm.com>
From: Cornelia Huck <cohuck@redhat.com>
All the different virtio ccw devices use the same reset handler,
so let's move setting it into the base virtio ccw device class.
CC: qemu-stable@nongnu.org
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Halil Pasic <pasic@linux.ibm.com>
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
(cherry picked from commit 0c53057adb04d254bc09511880670c92ab185fc6)
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
hw/s390x/virtio-ccw.c | 13 +------------
1 file changed, 1 insertion(+), 12 deletions(-)
diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index e51fbefd23..40a33302a7 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -1345,7 +1345,6 @@ static void virtio_ccw_net_class_init(ObjectClass *klass, void *data)
k->realize = virtio_ccw_net_realize;
k->unrealize = virtio_ccw_unrealize;
- dc->reset = virtio_ccw_reset;
dc->props = virtio_ccw_net_properties;
set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
}
@@ -1373,7 +1372,6 @@ static void virtio_ccw_blk_class_init(ObjectClass *klass, void *data)
k->realize = virtio_ccw_blk_realize;
k->unrealize = virtio_ccw_unrealize;
- dc->reset = virtio_ccw_reset;
dc->props = virtio_ccw_blk_properties;
set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
}
@@ -1401,7 +1399,6 @@ static void virtio_ccw_serial_class_init(ObjectClass *klass, void *data)
k->realize = virtio_ccw_serial_realize;
k->unrealize = virtio_ccw_unrealize;
- dc->reset = virtio_ccw_reset;
dc->props = virtio_ccw_serial_properties;
set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
}
@@ -1429,7 +1426,6 @@ static void virtio_ccw_balloon_class_init(ObjectClass *klass, void *data)
k->realize = virtio_ccw_balloon_realize;
k->unrealize = virtio_ccw_unrealize;
- dc->reset = virtio_ccw_reset;
dc->props = virtio_ccw_balloon_properties;
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
}
@@ -1457,7 +1453,6 @@ static void virtio_ccw_scsi_class_init(ObjectClass *klass, void *data)
k->realize = virtio_ccw_scsi_realize;
k->unrealize = virtio_ccw_unrealize;
- dc->reset = virtio_ccw_reset;
dc->props = virtio_ccw_scsi_properties;
set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
}
@@ -1484,7 +1479,6 @@ static void vhost_ccw_scsi_class_init(ObjectClass *klass, void *data)
k->realize = vhost_ccw_scsi_realize;
k->unrealize = virtio_ccw_unrealize;
- dc->reset = virtio_ccw_reset;
dc->props = vhost_ccw_scsi_properties;
set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
}
@@ -1521,7 +1515,6 @@ static void virtio_ccw_rng_class_init(ObjectClass *klass, void *data)
k->realize = virtio_ccw_rng_realize;
k->unrealize = virtio_ccw_unrealize;
- dc->reset = virtio_ccw_reset;
dc->props = virtio_ccw_rng_properties;
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
}
@@ -1559,7 +1552,6 @@ static void virtio_ccw_crypto_class_init(ObjectClass *klass, void *data)
k->realize = virtio_ccw_crypto_realize;
k->unrealize = virtio_ccw_unrealize;
- dc->reset = virtio_ccw_reset;
dc->props = virtio_ccw_crypto_properties;
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
}
@@ -1597,7 +1589,6 @@ static void virtio_ccw_gpu_class_init(ObjectClass *klass, void *data)
k->realize = virtio_ccw_gpu_realize;
k->unrealize = virtio_ccw_unrealize;
- dc->reset = virtio_ccw_reset;
dc->props = virtio_ccw_gpu_properties;
dc->hotpluggable = false;
set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories);
@@ -1626,7 +1617,6 @@ static void virtio_ccw_input_class_init(ObjectClass *klass, void *data)
k->realize = virtio_ccw_input_realize;
k->unrealize = virtio_ccw_unrealize;
- dc->reset = virtio_ccw_reset;
dc->props = virtio_ccw_input_properties;
set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
}
@@ -1730,6 +1720,7 @@ static void virtio_ccw_device_class_init(ObjectClass *klass, void *data)
dc->realize = virtio_ccw_busdev_realize;
dc->unrealize = virtio_ccw_busdev_unrealize;
dc->bus_type = TYPE_VIRTUAL_CSS_BUS;
+ dc->reset = virtio_ccw_reset;
}
static const TypeInfo virtio_ccw_device_info = {
@@ -1806,7 +1797,6 @@ static void virtio_ccw_9p_class_init(ObjectClass *klass, void *data)
k->unrealize = virtio_ccw_unrealize;
k->realize = virtio_ccw_9p_realize;
- dc->reset = virtio_ccw_reset;
dc->props = virtio_ccw_9p_properties;
set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
}
@@ -1856,7 +1846,6 @@ static void vhost_vsock_ccw_class_init(ObjectClass *klass, void *data)
k->unrealize = virtio_ccw_unrealize;
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
dc->props = vhost_vsock_ccw_properties;
- dc->reset = virtio_ccw_reset;
}
static void vhost_vsock_ccw_instance_init(Object *obj)
--
2.17.1
^ permalink raw reply related
* [Qemu-devel] [PATCH 20/99] pc-bios/s390-ccw: struct tpi_info must be declared as aligned(4)
From: Michael Roth @ 2018-07-23 20:16 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-stable, Thomas Huth, Cornelia Huck
In-Reply-To: <20180723201748.25573-1-mdroth@linux.vnet.ibm.com>
From: Thomas Huth <thuth@redhat.com>
I've run into a compilation error today with the current version of GCC 8:
In file included from s390-ccw.h:49,
from main.c:12:
cio.h:128:1: error: alignment 1 of 'struct tpi_info' is less than 4 [-Werror=packed-not-aligned]
} __attribute__ ((packed));
^
cc1: all warnings being treated as errors
Since the struct tpi_info contains an element ("struct subchannel_id schid")
which is marked as aligned(4), we've got to mark the struct tpi_info as
aligned(4), too.
CC: qemu-stable@nongnu.org
Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <1525774672-11913-1-git-send-email-thuth@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
(cherry picked from commit a6e4385dea94850d7b06b0542e7960c1063fdabd)
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
pc-bios/s390-ccw/cio.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pc-bios/s390-ccw/cio.h b/pc-bios/s390-ccw/cio.h
index 55eaeee4b6..1a0795f645 100644
--- a/pc-bios/s390-ccw/cio.h
+++ b/pc-bios/s390-ccw/cio.h
@@ -125,7 +125,7 @@ struct tpi_info {
__u32 reserved3 : 12;
__u32 int_type : 3;
__u32 reserved4 : 12;
-} __attribute__ ((packed));
+} __attribute__ ((packed, aligned(4)));
/* channel command word (type 1) */
struct ccw1 {
--
2.17.1
^ permalink raw reply related
* [Qemu-devel] [PATCH 01/99] tests: fix tpm-crb tpm-tis tests race
From: Michael Roth @ 2018-07-23 20:16 UTC (permalink / raw)
To: qemu-devel
Cc: qemu-stable, Marc-André Lureau, Stefan Berger,
Michael Tokarev
In-Reply-To: <20180723201748.25573-1-mdroth@linux.vnet.ibm.com>
From: Marc-André Lureau <marcandre.lureau@redhat.com>
No need to close the TPM data socket on the emulator end, qemu will
close it after a SHUTDOWN. This avoids a race between close() and
read() in the TPM data thread.
Reported-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Stefan Berger <stefanb@linux.vnet.ibm.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
(cherry picked from commit 7647d5c6b5e3b3f36a6e0441c81ae3fe797eb233)
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
tests/tpm-emu.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/tpm-emu.c b/tests/tpm-emu.c
index 4dada76834..8c2bd53cad 100644
--- a/tests/tpm-emu.c
+++ b/tests/tpm-emu.c
@@ -125,7 +125,7 @@ void *tpm_emu_ctrl_thread(void *data)
case CMD_SHUTDOWN: {
ptm_res res = 0;
qio_channel_write(ioc, (char *)&res, sizeof(res), &error_abort);
- qio_channel_close(s->tpm_ioc, &error_abort);
+ /* the tpm data thread is expected to finish now */
g_thread_join(s->emu_tpm_thread);
break;
}
--
2.17.1
^ permalink raw reply related
* Re: [patch 2/6] mm: fix vma_is_anonymous() false-positives
From: Linus Torvalds @ 2018-07-23 19:16 UTC (permalink / raw)
To: Kirill A. Shutemov
Cc: Andrew Morton, stable, Oleg Nesterov, marcel.ziswiler,
Dmitry Vyukov, Andrea Arcangeli, mm-commits
In-Reply-To: <20180723085601.n2xbjasgfuucy63e@black.fi.intel.com>
On Mon, Jul 23, 2018 at 1:55 AM Kirill A. Shutemov
<kirill.shutemov@linux.intel.com> wrote:
>
> I also would rather keep ->vm_ops non-NULL for anonymous VMAs. This way it's
> easier to catch broken drivers: place a WARN() in __vma_link_rb().
>
> With non-NULL vm_ops we can drop all vma->vm_ops checks. There's patch in
> -mm tree doing this[1].
Yeah, that patch is completely broken. See the separate email where I
pointed out how there are drivers that explicitly set vm_ops to NULL
while the vma is still live.
So no. That kind of stuff isn't even an option unless everything else
is cleaned up.
So I really think there's a lot more cleanup here before
'&anon_vm_ops" would make any sense what-so-ever.
But if we had a wrapper function for it, I'd at least find it more
palatable. Something simple like
static inline void vma_set_anonymous(struct vm_area_struct *vma)
{
static const struct vm_operations_struct anon_vm_ops = {};
vma->vm_ops = &anon_vm_ops;
vma->vm_flags |= VM_ANONYMOUS;
}
would look fine to me. And I'd rather have it in a flag than anywhere
else. That's what those flags *are* for.
In fact, I think one cleanup we should do on the way to this is to fix
the current "vm_flags". It's insanely different sizes on 32-bit and
64-bit, because we use 'unsigned long' for it. And it's a big
inconvenience, because we're tight on flags.
So we could make it a u64, which would get us reliably more flags. Or
even make a separate field for some of the more specialized flags, and
have two (separate) flag fields instead. Right now we do some odd
things due to packing (not as bad as the page flags, but not pretty).
Linus
^ permalink raw reply
* [Qemu-devel] [PATCH 19/99] s390x/css: disabled subchannels cannot be status pending
From: Michael Roth @ 2018-07-23 20:16 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-stable, Cornelia Huck
In-Reply-To: <20180723201748.25573-1-mdroth@linux.vnet.ibm.com>
From: Cornelia Huck <cohuck@redhat.com>
The 3270 code will try to post an attention interrupt when the
3270 emulator (e.g. x3270) attaches. If the guest has not yet
enabled the subchannel for the 3270 device, we will present a spurious
cc 1 (status pending) when it uses msch on it later on, e.g. when
trying to enable the subchannel.
To fix this, just don't do anything in css_conditional_io_interrupt()
if the subchannel is not enabled. The 3270 code will work fine with
that, and the other user of this function (virtio-ccw) never
attempts to post an interrupt for a disabled device to begin with.
CC: qemu-stable@nongnu.org
Reported-by: Thomas Huth <thuth@redhat.com>
Tested-by: Thomas Huth <thuth@redhat.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
(cherry picked from commit 6e9c893ecd00afd5344c35d0d0ded50eaa0938f6)
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
hw/s390x/css.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/hw/s390x/css.c b/hw/s390x/css.c
index 301bf1772f..56c3fa8c89 100644
--- a/hw/s390x/css.c
+++ b/hw/s390x/css.c
@@ -616,6 +616,14 @@ void css_inject_io_interrupt(SubchDev *sch)
void css_conditional_io_interrupt(SubchDev *sch)
{
+ /*
+ * If the subchannel is not enabled, it is not made status pending
+ * (see PoP p. 16-17, "Status Control").
+ */
+ if (!(sch->curr_status.pmcw.flags & PMCW_FLAGS_MASK_ENA)) {
+ return;
+ }
+
/*
* If the subchannel is not currently status pending, make it pending
* with alert status.
--
2.17.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.