* [PATCH v3 1/3] IOMMU: allow MSI message to IRTE propagation to fail
2013-04-12 10:18 [PATCH v3 0/3] x86/IOMMU: multi-vector MSI prerequisites Jan Beulich
@ 2013-04-12 10:22 ` Jan Beulich
2013-04-15 6:38 ` Zhang, Xiantao
2013-04-12 10:23 ` [PATCH v3 2/3] AMD IOMMU: allocate IRTE entries instead of using a static mapping Jan Beulich
` (2 subsequent siblings)
3 siblings, 1 reply; 12+ messages in thread
From: Jan Beulich @ 2013-04-12 10:22 UTC (permalink / raw)
To: xen-devel, Jan Beulich; +Cc: Jacob Shin, xiantao.zhang, suravee.suthikulpanit
[-- Attachment #1: Type: text/plain, Size: 11200 bytes --]
With the need to allocate multiple contiguous IRTEs for multi-vector
MSI, the chance of failure here increases. While on the AMD side
there's no allocation of IRTEs at present at all (and hence no way for
this allocation to fail, which is going to change with a later patch in
this series), VT-d already ignores an eventual error here, which this
patch fixes.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v3: Introduce _find_iommu_for_device() to take care of filtering out
the case where an MSI is being set up for the IOMMU itself.
--- a/xen/arch/x86/hpet.c
+++ b/xen/arch/x86/hpet.c
@@ -254,13 +254,22 @@ static void hpet_msi_mask(struct irq_des
ch->msi.msi_attrib.masked = 1;
}
-static void hpet_msi_write(struct hpet_event_channel *ch, struct msi_msg *msg)
+static int hpet_msi_write(struct hpet_event_channel *ch, struct msi_msg *msg)
{
ch->msi.msg = *msg;
+
if ( iommu_intremap )
- iommu_update_ire_from_msi(&ch->msi, msg);
+ {
+ int rc = iommu_update_ire_from_msi(&ch->msi, msg);
+
+ if ( rc )
+ return rc;
+ }
+
hpet_write32(msg->data, HPET_Tn_ROUTE(ch->idx));
hpet_write32(msg->address_lo, HPET_Tn_ROUTE(ch->idx) + 4);
+
+ return 0;
}
static void __maybe_unused
@@ -318,12 +327,12 @@ static hw_irq_controller hpet_msi_type =
.set_affinity = hpet_msi_set_affinity,
};
-static void __hpet_setup_msi_irq(struct irq_desc *desc)
+static int __hpet_setup_msi_irq(struct irq_desc *desc)
{
struct msi_msg msg;
msi_compose_msg(desc, &msg);
- hpet_msi_write(desc->action->dev_id, &msg);
+ return hpet_msi_write(desc->action->dev_id, &msg);
}
static int __init hpet_setup_msi_irq(struct hpet_event_channel *ch)
@@ -347,6 +356,8 @@ static int __init hpet_setup_msi_irq(str
desc->handler = &hpet_msi_type;
ret = request_irq(ch->msi.irq, hpet_interrupt_handler, 0, "HPET", ch);
+ if ( ret >= 0 )
+ ret = __hpet_setup_msi_irq(desc);
if ( ret < 0 )
{
if ( iommu_intremap )
@@ -354,7 +365,6 @@ static int __init hpet_setup_msi_irq(str
return ret;
}
- __hpet_setup_msi_irq(desc);
desc->msi_desc = &ch->msi;
return 0;
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -1938,7 +1938,14 @@ int map_domain_pirq(
if ( desc->handler != &no_irq_type )
dprintk(XENLOG_G_ERR, "dom%d: irq %d in use\n",
d->domain_id, irq);
- setup_msi_handler(desc, msi_desc);
+
+ ret = setup_msi_irq(desc, msi_desc);
+ if ( ret )
+ {
+ spin_unlock_irqrestore(&desc->lock, flags);
+ pci_disable_msi(msi_desc);
+ goto done;
+ }
if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV
&& !desc->arch.used_vectors )
@@ -1954,7 +1961,6 @@ int map_domain_pirq(
}
set_domain_irq_pirq(d, irq, info);
- setup_msi_irq(desc);
spin_unlock_irqrestore(&desc->lock, flags);
}
else
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -214,14 +214,18 @@ static void read_msi_msg(struct msi_desc
iommu_read_msi_from_ire(entry, msg);
}
-static void write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
+static int write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
{
entry->msg = *msg;
if ( iommu_intremap )
{
+ int rc;
+
ASSERT(msg != &entry->msg);
- iommu_update_ire_from_msi(entry, msg);
+ rc = iommu_update_ire_from_msi(entry, msg);
+ if ( rc )
+ return rc;
}
switch ( entry->msi_attrib.type )
@@ -264,6 +268,8 @@ static void write_msi_msg(struct msi_des
default:
BUG();
}
+
+ return 0;
}
void set_msi_affinity(struct irq_desc *desc, const cpumask_t *mask)
@@ -464,19 +470,15 @@ static struct msi_desc* alloc_msi_entry(
return entry;
}
-void setup_msi_handler(struct irq_desc *desc, struct msi_desc *msidesc)
+int setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc)
{
+ struct msi_msg msg;
+
desc->msi_desc = msidesc;
desc->handler = msi_maskable_irq(msidesc) ? &pci_msi_maskable
: &pci_msi_nonmaskable;
-}
-
-void setup_msi_irq(struct irq_desc *desc)
-{
- struct msi_msg msg;
-
msi_compose_msg(desc, &msg);
- write_msi_msg(desc->msi_desc, &msg);
+ return write_msi_msg(msidesc, &msg);
}
int msi_free_irq(struct msi_desc *entry)
--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -17,6 +17,7 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#include <xen/err.h>
#include <xen/sched.h>
#include <xen/hvm/iommu.h>
#include <asm/amd-iommu.h>
@@ -359,25 +360,35 @@ done:
}
}
-void amd_iommu_msi_msg_update_ire(
+static struct amd_iommu *_find_iommu_for_device(int seg, int bdf)
+{
+ struct amd_iommu *iommu = find_iommu_for_device(seg, bdf);
+
+ if ( iommu )
+ return iommu;
+
+ list_for_each_entry ( iommu, &amd_iommu_head, list )
+ if ( iommu->seg == seg && iommu->bdf == bdf )
+ return NULL;
+
+ AMD_IOMMU_DEBUG("No IOMMU for MSI dev = %04x:%02x:%02x.%u\n",
+ seg, PCI_BUS(bdf), PCI_SLOT(bdf), PCI_FUNC(bdf));
+ return ERR_PTR(-EINVAL);
+}
+
+int amd_iommu_msi_msg_update_ire(
struct msi_desc *msi_desc, struct msi_msg *msg)
{
struct pci_dev *pdev = msi_desc->dev;
int bdf, seg;
struct amd_iommu *iommu;
- if ( !iommu_intremap )
- return;
-
bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
seg = pdev ? pdev->seg : hpet_sbdf.seg;
- iommu = find_iommu_for_device(seg, bdf);
- if ( !iommu )
- {
- AMD_IOMMU_DEBUG("Fail to find iommu for MSI device id = %#x\n", bdf);
- return;
- }
+ iommu = _find_iommu_for_device(seg, bdf);
+ if ( IS_ERR_OR_NULL(iommu) )
+ return PTR_ERR(iommu);
if ( msi_desc->remap_index >= 0 )
{
@@ -395,7 +406,7 @@ void amd_iommu_msi_msg_update_ire(
}
if ( !msg )
- return;
+ return 0;
do {
update_intremap_entry_from_msi_msg(iommu, bdf, &msi_desc->remap_index,
@@ -404,6 +415,8 @@ void amd_iommu_msi_msg_update_ire(
break;
bdf += pdev->phantom_stride;
} while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
+
+ return 0;
}
void amd_iommu_read_msi_from_ire(
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -548,18 +548,20 @@ void iommu_update_ire_from_apic(
const struct iommu_ops *ops = iommu_get_ops();
ops->update_ire_from_apic(apic, reg, value);
}
-void iommu_update_ire_from_msi(
+
+int iommu_update_ire_from_msi(
struct msi_desc *msi_desc, struct msi_msg *msg)
{
const struct iommu_ops *ops = iommu_get_ops();
- ops->update_ire_from_msi(msi_desc, msg);
+ return iommu_intremap ? ops->update_ire_from_msi(msi_desc, msg) : 0;
}
void iommu_read_msi_from_ire(
struct msi_desc *msi_desc, struct msi_msg *msg)
{
const struct iommu_ops *ops = iommu_get_ops();
- ops->read_msi_from_ire(msi_desc, msg);
+ if ( iommu_intremap )
+ ops->read_msi_from_ire(msi_desc, msg);
}
unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg)
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -90,7 +90,7 @@ void io_apic_write_remap_rte(unsigned in
struct msi_desc;
struct msi_msg;
void msi_msg_read_remap_rte(struct msi_desc *, struct msi_msg *);
-void msi_msg_write_remap_rte(struct msi_desc *, struct msi_msg *);
+int msi_msg_write_remap_rte(struct msi_desc *, struct msi_msg *);
int intel_setup_hpet_msi(struct msi_desc *);
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -653,7 +653,7 @@ void msi_msg_read_remap_rte(
remap_entry_to_msi_msg(drhd->iommu, msg);
}
-void msi_msg_write_remap_rte(
+int msi_msg_write_remap_rte(
struct msi_desc *msi_desc, struct msi_msg *msg)
{
struct pci_dev *pdev = msi_desc->dev;
@@ -661,8 +661,8 @@ void msi_msg_write_remap_rte(
drhd = pdev ? acpi_find_matched_drhd_unit(pdev)
: hpet_to_drhd(msi_desc->hpet_id);
- if ( drhd )
- msi_msg_to_remap_entry(drhd->iommu, pdev, msi_desc, msg);
+ return drhd ? msi_msg_to_remap_entry(drhd->iommu, pdev, msi_desc, msg)
+ : -EINVAL;
}
int __init intel_setup_hpet_msi(struct msi_desc *msi_desc)
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -93,7 +93,7 @@ void *amd_iommu_alloc_intremap_table(voi
int amd_iommu_free_intremap_table(u16 seg, struct ivrs_mappings *);
void amd_iommu_ioapic_update_ire(
unsigned int apic, unsigned int reg, unsigned int value);
-void amd_iommu_msi_msg_update_ire(
+int amd_iommu_msi_msg_update_ire(
struct msi_desc *msi_desc, struct msi_msg *msg);
void amd_iommu_read_msi_from_ire(
struct msi_desc *msi_desc, struct msi_msg *msg);
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -78,8 +78,7 @@ extern int pci_enable_msi(struct msi_inf
extern void pci_disable_msi(struct msi_desc *desc);
extern int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool_t off);
extern void pci_cleanup_msi(struct pci_dev *pdev);
-extern void setup_msi_handler(struct irq_desc *, struct msi_desc *);
-extern void setup_msi_irq(struct irq_desc *);
+extern int setup_msi_irq(struct irq_desc *, struct msi_desc *);
extern void teardown_msi_irq(int irq);
extern int msi_free_vector(struct msi_desc *entry);
extern int pci_restore_msi_state(struct pci_dev *pdev);
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -106,7 +106,7 @@ struct iommu_ops {
u8 devfn, struct pci_dev *);
int (*get_device_group_id)(u16 seg, u8 bus, u8 devfn);
void (*update_ire_from_apic)(unsigned int apic, unsigned int reg, unsigned int value);
- void (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg *msg);
+ int (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg *msg);
void (*read_msi_from_ire)(struct msi_desc *msi_desc, struct msi_msg *msg);
unsigned int (*read_apic_from_ire)(unsigned int apic, unsigned int reg);
int (*setup_hpet_msi)(struct msi_desc *);
@@ -120,7 +120,7 @@ struct iommu_ops {
};
void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value);
-void iommu_update_ire_from_msi(struct msi_desc *msi_desc, struct msi_msg *msg);
+int iommu_update_ire_from_msi(struct msi_desc *msi_desc, struct msi_msg *msg);
void iommu_read_msi_from_ire(struct msi_desc *msi_desc, struct msi_msg *msg);
unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg);
int iommu_setup_hpet_msi(struct msi_desc *);
[-- Attachment #2: IOMMU-update-ire-from-msi-fail.patch --]
[-- Type: text/plain, Size: 11252 bytes --]
IOMMU: allow MSI message to IRTE propagation to fail
With the need to allocate multiple contiguous IRTEs for multi-vector
MSI, the chance of failure here increases. While on the AMD side
there's no allocation of IRTEs at present at all (and hence no way for
this allocation to fail, which is going to change with a later patch in
this series), VT-d already ignores an eventual error here, which this
patch fixes.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v3: Introduce _find_iommu_for_device() to take care of filtering out
the case where an MSI is being set up for the IOMMU itself.
--- a/xen/arch/x86/hpet.c
+++ b/xen/arch/x86/hpet.c
@@ -254,13 +254,22 @@ static void hpet_msi_mask(struct irq_des
ch->msi.msi_attrib.masked = 1;
}
-static void hpet_msi_write(struct hpet_event_channel *ch, struct msi_msg *msg)
+static int hpet_msi_write(struct hpet_event_channel *ch, struct msi_msg *msg)
{
ch->msi.msg = *msg;
+
if ( iommu_intremap )
- iommu_update_ire_from_msi(&ch->msi, msg);
+ {
+ int rc = iommu_update_ire_from_msi(&ch->msi, msg);
+
+ if ( rc )
+ return rc;
+ }
+
hpet_write32(msg->data, HPET_Tn_ROUTE(ch->idx));
hpet_write32(msg->address_lo, HPET_Tn_ROUTE(ch->idx) + 4);
+
+ return 0;
}
static void __maybe_unused
@@ -318,12 +327,12 @@ static hw_irq_controller hpet_msi_type =
.set_affinity = hpet_msi_set_affinity,
};
-static void __hpet_setup_msi_irq(struct irq_desc *desc)
+static int __hpet_setup_msi_irq(struct irq_desc *desc)
{
struct msi_msg msg;
msi_compose_msg(desc, &msg);
- hpet_msi_write(desc->action->dev_id, &msg);
+ return hpet_msi_write(desc->action->dev_id, &msg);
}
static int __init hpet_setup_msi_irq(struct hpet_event_channel *ch)
@@ -347,6 +356,8 @@ static int __init hpet_setup_msi_irq(str
desc->handler = &hpet_msi_type;
ret = request_irq(ch->msi.irq, hpet_interrupt_handler, 0, "HPET", ch);
+ if ( ret >= 0 )
+ ret = __hpet_setup_msi_irq(desc);
if ( ret < 0 )
{
if ( iommu_intremap )
@@ -354,7 +365,6 @@ static int __init hpet_setup_msi_irq(str
return ret;
}
- __hpet_setup_msi_irq(desc);
desc->msi_desc = &ch->msi;
return 0;
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -1938,7 +1938,14 @@ int map_domain_pirq(
if ( desc->handler != &no_irq_type )
dprintk(XENLOG_G_ERR, "dom%d: irq %d in use\n",
d->domain_id, irq);
- setup_msi_handler(desc, msi_desc);
+
+ ret = setup_msi_irq(desc, msi_desc);
+ if ( ret )
+ {
+ spin_unlock_irqrestore(&desc->lock, flags);
+ pci_disable_msi(msi_desc);
+ goto done;
+ }
if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV
&& !desc->arch.used_vectors )
@@ -1954,7 +1961,6 @@ int map_domain_pirq(
}
set_domain_irq_pirq(d, irq, info);
- setup_msi_irq(desc);
spin_unlock_irqrestore(&desc->lock, flags);
}
else
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -214,14 +214,18 @@ static void read_msi_msg(struct msi_desc
iommu_read_msi_from_ire(entry, msg);
}
-static void write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
+static int write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
{
entry->msg = *msg;
if ( iommu_intremap )
{
+ int rc;
+
ASSERT(msg != &entry->msg);
- iommu_update_ire_from_msi(entry, msg);
+ rc = iommu_update_ire_from_msi(entry, msg);
+ if ( rc )
+ return rc;
}
switch ( entry->msi_attrib.type )
@@ -264,6 +268,8 @@ static void write_msi_msg(struct msi_des
default:
BUG();
}
+
+ return 0;
}
void set_msi_affinity(struct irq_desc *desc, const cpumask_t *mask)
@@ -464,19 +470,15 @@ static struct msi_desc* alloc_msi_entry(
return entry;
}
-void setup_msi_handler(struct irq_desc *desc, struct msi_desc *msidesc)
+int setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc)
{
+ struct msi_msg msg;
+
desc->msi_desc = msidesc;
desc->handler = msi_maskable_irq(msidesc) ? &pci_msi_maskable
: &pci_msi_nonmaskable;
-}
-
-void setup_msi_irq(struct irq_desc *desc)
-{
- struct msi_msg msg;
-
msi_compose_msg(desc, &msg);
- write_msi_msg(desc->msi_desc, &msg);
+ return write_msi_msg(msidesc, &msg);
}
int msi_free_irq(struct msi_desc *entry)
--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -17,6 +17,7 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#include <xen/err.h>
#include <xen/sched.h>
#include <xen/hvm/iommu.h>
#include <asm/amd-iommu.h>
@@ -359,25 +360,35 @@ done:
}
}
-void amd_iommu_msi_msg_update_ire(
+static struct amd_iommu *_find_iommu_for_device(int seg, int bdf)
+{
+ struct amd_iommu *iommu = find_iommu_for_device(seg, bdf);
+
+ if ( iommu )
+ return iommu;
+
+ list_for_each_entry ( iommu, &amd_iommu_head, list )
+ if ( iommu->seg == seg && iommu->bdf == bdf )
+ return NULL;
+
+ AMD_IOMMU_DEBUG("No IOMMU for MSI dev = %04x:%02x:%02x.%u\n",
+ seg, PCI_BUS(bdf), PCI_SLOT(bdf), PCI_FUNC(bdf));
+ return ERR_PTR(-EINVAL);
+}
+
+int amd_iommu_msi_msg_update_ire(
struct msi_desc *msi_desc, struct msi_msg *msg)
{
struct pci_dev *pdev = msi_desc->dev;
int bdf, seg;
struct amd_iommu *iommu;
- if ( !iommu_intremap )
- return;
-
bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
seg = pdev ? pdev->seg : hpet_sbdf.seg;
- iommu = find_iommu_for_device(seg, bdf);
- if ( !iommu )
- {
- AMD_IOMMU_DEBUG("Fail to find iommu for MSI device id = %#x\n", bdf);
- return;
- }
+ iommu = _find_iommu_for_device(seg, bdf);
+ if ( IS_ERR_OR_NULL(iommu) )
+ return PTR_ERR(iommu);
if ( msi_desc->remap_index >= 0 )
{
@@ -395,7 +406,7 @@ void amd_iommu_msi_msg_update_ire(
}
if ( !msg )
- return;
+ return 0;
do {
update_intremap_entry_from_msi_msg(iommu, bdf, &msi_desc->remap_index,
@@ -404,6 +415,8 @@ void amd_iommu_msi_msg_update_ire(
break;
bdf += pdev->phantom_stride;
} while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
+
+ return 0;
}
void amd_iommu_read_msi_from_ire(
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -548,18 +548,20 @@ void iommu_update_ire_from_apic(
const struct iommu_ops *ops = iommu_get_ops();
ops->update_ire_from_apic(apic, reg, value);
}
-void iommu_update_ire_from_msi(
+
+int iommu_update_ire_from_msi(
struct msi_desc *msi_desc, struct msi_msg *msg)
{
const struct iommu_ops *ops = iommu_get_ops();
- ops->update_ire_from_msi(msi_desc, msg);
+ return iommu_intremap ? ops->update_ire_from_msi(msi_desc, msg) : 0;
}
void iommu_read_msi_from_ire(
struct msi_desc *msi_desc, struct msi_msg *msg)
{
const struct iommu_ops *ops = iommu_get_ops();
- ops->read_msi_from_ire(msi_desc, msg);
+ if ( iommu_intremap )
+ ops->read_msi_from_ire(msi_desc, msg);
}
unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg)
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -90,7 +90,7 @@ void io_apic_write_remap_rte(unsigned in
struct msi_desc;
struct msi_msg;
void msi_msg_read_remap_rte(struct msi_desc *, struct msi_msg *);
-void msi_msg_write_remap_rte(struct msi_desc *, struct msi_msg *);
+int msi_msg_write_remap_rte(struct msi_desc *, struct msi_msg *);
int intel_setup_hpet_msi(struct msi_desc *);
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -653,7 +653,7 @@ void msi_msg_read_remap_rte(
remap_entry_to_msi_msg(drhd->iommu, msg);
}
-void msi_msg_write_remap_rte(
+int msi_msg_write_remap_rte(
struct msi_desc *msi_desc, struct msi_msg *msg)
{
struct pci_dev *pdev = msi_desc->dev;
@@ -661,8 +661,8 @@ void msi_msg_write_remap_rte(
drhd = pdev ? acpi_find_matched_drhd_unit(pdev)
: hpet_to_drhd(msi_desc->hpet_id);
- if ( drhd )
- msi_msg_to_remap_entry(drhd->iommu, pdev, msi_desc, msg);
+ return drhd ? msi_msg_to_remap_entry(drhd->iommu, pdev, msi_desc, msg)
+ : -EINVAL;
}
int __init intel_setup_hpet_msi(struct msi_desc *msi_desc)
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -93,7 +93,7 @@ void *amd_iommu_alloc_intremap_table(voi
int amd_iommu_free_intremap_table(u16 seg, struct ivrs_mappings *);
void amd_iommu_ioapic_update_ire(
unsigned int apic, unsigned int reg, unsigned int value);
-void amd_iommu_msi_msg_update_ire(
+int amd_iommu_msi_msg_update_ire(
struct msi_desc *msi_desc, struct msi_msg *msg);
void amd_iommu_read_msi_from_ire(
struct msi_desc *msi_desc, struct msi_msg *msg);
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -78,8 +78,7 @@ extern int pci_enable_msi(struct msi_inf
extern void pci_disable_msi(struct msi_desc *desc);
extern int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool_t off);
extern void pci_cleanup_msi(struct pci_dev *pdev);
-extern void setup_msi_handler(struct irq_desc *, struct msi_desc *);
-extern void setup_msi_irq(struct irq_desc *);
+extern int setup_msi_irq(struct irq_desc *, struct msi_desc *);
extern void teardown_msi_irq(int irq);
extern int msi_free_vector(struct msi_desc *entry);
extern int pci_restore_msi_state(struct pci_dev *pdev);
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -106,7 +106,7 @@ struct iommu_ops {
u8 devfn, struct pci_dev *);
int (*get_device_group_id)(u16 seg, u8 bus, u8 devfn);
void (*update_ire_from_apic)(unsigned int apic, unsigned int reg, unsigned int value);
- void (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg *msg);
+ int (*update_ire_from_msi)(struct msi_desc *msi_desc, struct msi_msg *msg);
void (*read_msi_from_ire)(struct msi_desc *msi_desc, struct msi_msg *msg);
unsigned int (*read_apic_from_ire)(unsigned int apic, unsigned int reg);
int (*setup_hpet_msi)(struct msi_desc *);
@@ -120,7 +120,7 @@ struct iommu_ops {
};
void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value);
-void iommu_update_ire_from_msi(struct msi_desc *msi_desc, struct msi_msg *msg);
+int iommu_update_ire_from_msi(struct msi_desc *msi_desc, struct msi_msg *msg);
void iommu_read_msi_from_ire(struct msi_desc *msi_desc, struct msi_msg *msg);
unsigned int iommu_read_apic_from_ire(unsigned int apic, unsigned int reg);
int iommu_setup_hpet_msi(struct msi_desc *);
[-- Attachment #3: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 12+ messages in thread* [PATCH v3 2/3] AMD IOMMU: allocate IRTE entries instead of using a static mapping
2013-04-12 10:18 [PATCH v3 0/3] x86/IOMMU: multi-vector MSI prerequisites Jan Beulich
2013-04-12 10:22 ` [PATCH v3 1/3] IOMMU: allow MSI message to IRTE propagation to fail Jan Beulich
@ 2013-04-12 10:23 ` Jan Beulich
2013-04-12 10:23 ` [PATCH v3 3/3] AMD IOMMU: untie remap and vector maps Jan Beulich
2013-04-13 1:16 ` [PATCH v3 0/3] x86/IOMMU: multi-vector MSI prerequisites Suravee Suthikulpanit
3 siblings, 0 replies; 12+ messages in thread
From: Jan Beulich @ 2013-04-12 10:23 UTC (permalink / raw)
To: xen-devel; +Cc: Jacob Shin, xiantao.zhang, suravee.suthikulpanit
[-- Attachment #1: Type: text/plain, Size: 23093 bytes --]
For multi-vector MSI, where we surely don't want to allocate
contiguous vectors and be able to set affinities of the individual
vectors separately, we need to drop the use of the tuple of vector and
delivery mode to determine the IRTE to use, and instead allocate IRTEs
(which imo should have been done from the beginning).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v3: Adjust _find_iommu_for_device() to check for the IOMMU itself being
the subject _before_ looking up the matching IOMMU (since now that
we alter the MSI message, we need to specifically care about
skipping the modification when no remapping is to occur), and use
the function also in the MSI message read path. This assumes that
regardless of whether there is an IVRS mapping for the
corresponding PCI device, no remapping occurs for such MSIs. If
that isn't correct, the "return NULL" in the function would need to
be changed to "return iommu".
---
One thing I surely need confirmation on is whether this
BUG_ON(get_ivrs_mappings(iommu->seg)[req_id].intremap_table !=
get_ivrs_mappings(iommu->seg)[alias_id].intremap_table);
in update_intremap_entry_from_msi_msg() is valid. If it isn't, it's not
clear to me how to properly set up things for affected devices, as we
would need an identical index allocated for two different remap table
instances (which can hardly be expected to work out well).
--- a/xen/drivers/passthrough/amd/iommu_acpi.c
+++ b/xen/drivers/passthrough/amd/iommu_acpi.c
@@ -72,12 +72,15 @@ static void __init add_ivrs_mapping_entr
/* allocate per-device interrupt remapping table */
if ( amd_iommu_perdev_intremap )
ivrs_mappings[alias_id].intremap_table =
- amd_iommu_alloc_intremap_table();
+ amd_iommu_alloc_intremap_table(
+ &ivrs_mappings[alias_id].intremap_inuse);
else
{
if ( shared_intremap_table == NULL )
- shared_intremap_table = amd_iommu_alloc_intremap_table();
+ shared_intremap_table = amd_iommu_alloc_intremap_table(
+ &shared_intremap_inuse);
ivrs_mappings[alias_id].intremap_table = shared_intremap_table;
+ ivrs_mappings[alias_id].intremap_inuse = shared_intremap_inuse;
}
}
/* assgin iommu hardware */
@@ -671,7 +674,7 @@ static u16 __init parse_ivhd_device_spec
if ( IO_APIC_ID(apic) != special->handle )
continue;
- if ( ioapic_sbdf[special->handle].pin_setup )
+ if ( ioapic_sbdf[special->handle].pin_2_idx )
{
if ( ioapic_sbdf[special->handle].bdf == bdf &&
ioapic_sbdf[special->handle].seg == seg )
@@ -691,14 +694,16 @@ static u16 __init parse_ivhd_device_spec
ioapic_sbdf[special->handle].bdf = bdf;
ioapic_sbdf[special->handle].seg = seg;
- ioapic_sbdf[special->handle].pin_setup = xzalloc_array(
- unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
+ ioapic_sbdf[special->handle].pin_2_idx = xmalloc_array(
+ u16, nr_ioapic_entries[apic]);
if ( nr_ioapic_entries[apic] &&
- !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+ !ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
{
printk(XENLOG_ERR "IVHD Error: Out of memory\n");
return 0;
}
+ memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
+ nr_ioapic_entries[apic]);
}
break;
}
@@ -926,7 +931,7 @@ static int __init parse_ivrs_table(struc
for ( apic = 0; !error && iommu_intremap && apic < nr_ioapics; ++apic )
{
if ( !nr_ioapic_entries[apic] ||
- ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+ ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
continue;
printk(XENLOG_ERR "IVHD Error: no information for IO-APIC %#x\n",
@@ -935,13 +940,15 @@ static int __init parse_ivrs_table(struc
error = -ENXIO;
else
{
- ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array(
- unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
- if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+ ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx = xmalloc_array(
+ u16, nr_ioapic_entries[apic]);
+ if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
{
printk(XENLOG_ERR "IVHD Error: Out of memory\n");
error = -ENOMEM;
}
+ memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
+ nr_ioapic_entries[apic]);
}
}
--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -31,6 +31,7 @@
struct ioapic_sbdf ioapic_sbdf[MAX_IO_APICS];
struct hpet_sbdf hpet_sbdf;
void *shared_intremap_table;
+unsigned long *shared_intremap_inuse;
static DEFINE_SPINLOCK(shared_intremap_lock);
static spinlock_t* get_intremap_lock(int seg, int req_id)
@@ -46,30 +47,31 @@ static int get_intremap_requestor_id(int
return get_ivrs_mappings(seg)[bdf].dte_requestor_id;
}
-static int get_intremap_offset(u8 vector, u8 dm)
+static unsigned int alloc_intremap_entry(int seg, int bdf)
{
- int offset = 0;
- offset = (dm << INT_REMAP_INDEX_DM_SHIFT) & INT_REMAP_INDEX_DM_MASK;
- offset |= (vector << INT_REMAP_INDEX_VECTOR_SHIFT ) &
- INT_REMAP_INDEX_VECTOR_MASK;
- return offset;
+ unsigned long *inuse = get_ivrs_mappings(seg)[bdf].intremap_inuse;
+ unsigned int slot = find_first_zero_bit(inuse, INTREMAP_ENTRIES);
+
+ if ( slot < INTREMAP_ENTRIES )
+ __set_bit(slot, inuse);
+ return slot;
}
-static u8 *get_intremap_entry(int seg, int bdf, int offset)
+static u32 *get_intremap_entry(int seg, int bdf, int offset)
{
- u8 *table;
+ u32 *table = get_ivrs_mappings(seg)[bdf].intremap_table;
- table = (u8*)get_ivrs_mappings(seg)[bdf].intremap_table;
ASSERT( (table != NULL) && (offset < INTREMAP_ENTRIES) );
- return (u8*) (table + offset);
+ return table + offset;
}
static void free_intremap_entry(int seg, int bdf, int offset)
{
- u32* entry;
- entry = (u32*)get_intremap_entry(seg, bdf, offset);
+ u32 *entry = get_intremap_entry(seg, bdf, offset);
+
memset(entry, 0, sizeof(u32));
+ __clear_bit(offset, get_ivrs_mappings(seg)[bdf].intremap_inuse);
}
static void update_intremap_entry(u32* entry, u8 vector, u8 int_type,
@@ -98,18 +100,24 @@ static void update_intremap_entry(u32* e
INT_REMAP_ENTRY_VECTOR_SHIFT, entry);
}
-static void update_intremap_entry_from_ioapic(
+static void set_rte_index(struct IO_APIC_route_entry *rte, int offset)
+{
+ rte->vector = (u8)offset;
+ rte->delivery_mode = offset >> 8;
+}
+
+static int update_intremap_entry_from_ioapic(
int bdf,
struct amd_iommu *iommu,
- const struct IO_APIC_route_entry *rte,
- const struct IO_APIC_route_entry *old_rte)
+ struct IO_APIC_route_entry *rte,
+ u16 *index)
{
unsigned long flags;
u32* entry;
u8 delivery_mode, dest, vector, dest_mode;
int req_id;
spinlock_t *lock;
- int offset;
+ unsigned int offset;
req_id = get_intremap_requestor_id(iommu->seg, bdf);
lock = get_intremap_lock(iommu->seg, req_id);
@@ -121,16 +129,20 @@ static void update_intremap_entry_from_i
spin_lock_irqsave(lock, flags);
- offset = get_intremap_offset(vector, delivery_mode);
- if ( old_rte )
+ offset = *index;
+ if ( offset >= INTREMAP_ENTRIES )
{
- int old_offset = get_intremap_offset(old_rte->vector,
- old_rte->delivery_mode);
-
- if ( offset != old_offset )
- free_intremap_entry(iommu->seg, bdf, old_offset);
+ offset = alloc_intremap_entry(iommu->seg, req_id);
+ if ( offset >= INTREMAP_ENTRIES )
+ {
+ spin_unlock_irqrestore(lock, flags);
+ rte->mask = 1;
+ return -ENOSPC;
+ }
+ *index = offset;
}
- entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
+
+ entry = get_intremap_entry(iommu->seg, req_id, offset);
update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
spin_unlock_irqrestore(lock, flags);
@@ -141,6 +153,10 @@ static void update_intremap_entry_from_i
amd_iommu_flush_intremap(iommu, req_id);
spin_unlock_irqrestore(&iommu->lock, flags);
}
+
+ set_rte_index(rte, offset);
+
+ return 0;
}
int __init amd_iommu_setup_ioapic_remapping(void)
@@ -153,7 +169,7 @@ int __init amd_iommu_setup_ioapic_remapp
u16 seg, bdf, req_id;
struct amd_iommu *iommu;
spinlock_t *lock;
- int offset;
+ unsigned int offset;
/* Read ioapic entries and update interrupt remapping table accordingly */
for ( apic = 0; apic < nr_ioapics; apic++ )
@@ -184,19 +200,24 @@ int __init amd_iommu_setup_ioapic_remapp
dest = rte.dest.logical.logical_dest;
spin_lock_irqsave(lock, flags);
- offset = get_intremap_offset(vector, delivery_mode);
- entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
+ offset = alloc_intremap_entry(seg, req_id);
+ BUG_ON(offset >= INTREMAP_ENTRIES);
+ ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] = offset;
+ entry = get_intremap_entry(iommu->seg, req_id, offset);
update_intremap_entry(entry, vector,
delivery_mode, dest_mode, dest);
spin_unlock_irqrestore(lock, flags);
+ set_rte_index(&rte, offset);
+ ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] = offset;
+ __ioapic_write_entry(apic, pin, 1, rte);
+
if ( iommu->enabled )
{
spin_lock_irqsave(&iommu->lock, flags);
amd_iommu_flush_intremap(iommu, req_id);
spin_unlock_irqrestore(&iommu->lock, flags);
}
- set_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup);
}
}
return 0;
@@ -209,7 +230,7 @@ void amd_iommu_ioapic_update_ire(
struct IO_APIC_route_entry new_rte = { 0 };
unsigned int rte_lo = (reg & 1) ? reg - 1 : reg;
unsigned int pin = (reg - 0x10) / 2;
- int saved_mask, seg, bdf;
+ int saved_mask, seg, bdf, rc;
struct amd_iommu *iommu;
if ( !iommu_intremap )
@@ -247,7 +268,7 @@ void amd_iommu_ioapic_update_ire(
}
if ( new_rte.mask &&
- !test_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) )
+ ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] >= INTREMAP_ENTRIES )
{
ASSERT(saved_mask);
__io_apic_write(apic, reg, value);
@@ -262,14 +283,19 @@ void amd_iommu_ioapic_update_ire(
}
/* Update interrupt remapping entry */
- update_intremap_entry_from_ioapic(
- bdf, iommu, &new_rte,
- test_and_set_bit(pin,
- ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) ? &old_rte
- : NULL);
+ rc = update_intremap_entry_from_ioapic(
+ bdf, iommu, &new_rte,
+ &ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin]);
- /* Forward write access to IO-APIC RTE */
- __io_apic_write(apic, reg, value);
+ __io_apic_write(apic, reg, ((u32 *)&new_rte)[reg != rte_lo]);
+
+ if ( rc )
+ {
+ /* Keep the entry masked. */
+ printk(XENLOG_ERR "Remapping IO-APIC %#x pin %u failed (%d)\n",
+ IO_APIC_ID(apic), pin, rc);
+ return;
+ }
/* For lower bits access, return directly to avoid double writes */
if ( reg == rte_lo )
@@ -283,16 +309,41 @@ void amd_iommu_ioapic_update_ire(
}
}
-static void update_intremap_entry_from_msi_msg(
+unsigned int amd_iommu_read_ioapic_from_ire(
+ unsigned int apic, unsigned int reg)
+{
+ unsigned int val = __io_apic_read(apic, reg);
+
+ if ( !(reg & 1) )
+ {
+ unsigned int offset = val & (INTREMAP_ENTRIES - 1);
+ u16 bdf = ioapic_sbdf[IO_APIC_ID(apic)].bdf;
+ u16 seg = ioapic_sbdf[IO_APIC_ID(apic)].seg;
+ u16 req_id = get_intremap_requestor_id(seg, bdf);
+ const u32 *entry = get_intremap_entry(seg, req_id, offset);
+
+ val &= ~(INTREMAP_ENTRIES - 1);
+ val |= get_field_from_reg_u32(*entry,
+ INT_REMAP_ENTRY_INTTYPE_MASK,
+ INT_REMAP_ENTRY_INTTYPE_SHIFT) << 8;
+ val |= get_field_from_reg_u32(*entry,
+ INT_REMAP_ENTRY_VECTOR_MASK,
+ INT_REMAP_ENTRY_VECTOR_SHIFT);
+ }
+
+ return val;
+}
+
+static int update_intremap_entry_from_msi_msg(
struct amd_iommu *iommu, u16 bdf,
- int *remap_index, const struct msi_msg *msg)
+ int *remap_index, const struct msi_msg *msg, u32 *data)
{
unsigned long flags;
u32* entry;
u16 req_id, alias_id;
u8 delivery_mode, dest, vector, dest_mode;
spinlock_t *lock;
- int offset;
+ unsigned int offset;
req_id = get_dma_requestor_id(iommu->seg, bdf);
alias_id = get_intremap_requestor_id(iommu->seg, bdf);
@@ -303,15 +354,6 @@ static void update_intremap_entry_from_m
spin_lock_irqsave(lock, flags);
free_intremap_entry(iommu->seg, req_id, *remap_index);
spin_unlock_irqrestore(lock, flags);
-
- if ( ( req_id != alias_id ) &&
- get_ivrs_mappings(iommu->seg)[alias_id].intremap_table != NULL )
- {
- lock = get_intremap_lock(iommu->seg, alias_id);
- spin_lock_irqsave(lock, flags);
- free_intremap_entry(iommu->seg, alias_id, *remap_index);
- spin_unlock_irqrestore(lock, flags);
- }
goto done;
}
@@ -322,16 +364,24 @@ static void update_intremap_entry_from_m
delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK;
dest = (msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff;
- offset = get_intremap_offset(vector, delivery_mode);
- if ( *remap_index < 0)
+ offset = *remap_index;
+ if ( offset >= INTREMAP_ENTRIES )
+ {
+ offset = alloc_intremap_entry(iommu->seg, bdf);
+ if ( offset >= INTREMAP_ENTRIES )
+ {
+ spin_unlock_irqrestore(lock, flags);
+ return -ENOSPC;
+ }
*remap_index = offset;
- else
- BUG_ON(*remap_index != offset);
+ }
- entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
+ entry = get_intremap_entry(iommu->seg, req_id, offset);
update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
spin_unlock_irqrestore(lock, flags);
+ *data = (msg->data & ~(INTREMAP_ENTRIES - 1)) | offset;
+
/*
* In some special cases, a pci-e device(e.g SATA controller in IDE mode)
* will use alias id to index interrupt remapping table.
@@ -343,10 +393,8 @@ static void update_intremap_entry_from_m
if ( ( req_id != alias_id ) &&
get_ivrs_mappings(iommu->seg)[alias_id].intremap_table != NULL )
{
- spin_lock_irqsave(lock, flags);
- entry = (u32*)get_intremap_entry(iommu->seg, alias_id, offset);
- update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
- spin_unlock_irqrestore(lock, flags);
+ BUG_ON(get_ivrs_mappings(iommu->seg)[req_id].intremap_table !=
+ get_ivrs_mappings(iommu->seg)[alias_id].intremap_table);
}
done:
@@ -358,19 +406,22 @@ done:
amd_iommu_flush_intremap(iommu, alias_id);
spin_unlock_irqrestore(&iommu->lock, flags);
}
+
+ return 0;
}
static struct amd_iommu *_find_iommu_for_device(int seg, int bdf)
{
- struct amd_iommu *iommu = find_iommu_for_device(seg, bdf);
-
- if ( iommu )
- return iommu;
+ struct amd_iommu *iommu;
list_for_each_entry ( iommu, &amd_iommu_head, list )
if ( iommu->seg == seg && iommu->bdf == bdf )
return NULL;
+ iommu = find_iommu_for_device(seg, bdf);
+ if ( iommu )
+ return iommu;
+
AMD_IOMMU_DEBUG("No IOMMU for MSI dev = %04x:%02x:%02x.%u\n",
seg, PCI_BUS(bdf), PCI_SLOT(bdf), PCI_FUNC(bdf));
return ERR_PTR(-EINVAL);
@@ -380,8 +431,9 @@ int amd_iommu_msi_msg_update_ire(
struct msi_desc *msi_desc, struct msi_msg *msg)
{
struct pci_dev *pdev = msi_desc->dev;
- int bdf, seg;
+ int bdf, seg, rc;
struct amd_iommu *iommu;
+ u32 data;
bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
seg = pdev ? pdev->seg : hpet_sbdf.seg;
@@ -390,11 +442,12 @@ int amd_iommu_msi_msg_update_ire(
if ( IS_ERR_OR_NULL(iommu) )
return PTR_ERR(iommu);
- if ( msi_desc->remap_index >= 0 )
+ if ( msi_desc->remap_index >= 0 && !msg )
{
do {
update_intremap_entry_from_msi_msg(iommu, bdf,
- &msi_desc->remap_index, NULL);
+ &msi_desc->remap_index,
+ NULL, NULL);
if ( !pdev || !pdev->phantom_stride )
break;
bdf += pdev->phantom_stride;
@@ -409,19 +462,39 @@ int amd_iommu_msi_msg_update_ire(
return 0;
do {
- update_intremap_entry_from_msi_msg(iommu, bdf, &msi_desc->remap_index,
- msg);
- if ( !pdev || !pdev->phantom_stride )
+ rc = update_intremap_entry_from_msi_msg(iommu, bdf,
+ &msi_desc->remap_index,
+ msg, &data);
+ if ( rc || !pdev || !pdev->phantom_stride )
break;
bdf += pdev->phantom_stride;
} while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
- return 0;
+ msg->data = data;
+ return rc;
}
void amd_iommu_read_msi_from_ire(
struct msi_desc *msi_desc, struct msi_msg *msg)
{
+ unsigned int offset = msg->data & (INTREMAP_ENTRIES - 1);
+ const struct pci_dev *pdev = msi_desc->dev;
+ u16 bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
+ u16 seg = pdev ? pdev->seg : hpet_sbdf.seg;
+ const u32 *entry;
+
+ if ( IS_ERR_OR_NULL(_find_iommu_for_device(seg, bdf)) )
+ return;
+
+ entry = get_intremap_entry(seg, get_dma_requestor_id(seg, bdf), offset);
+
+ msg->data &= ~(INTREMAP_ENTRIES - 1);
+ msg->data |= get_field_from_reg_u32(*entry,
+ INT_REMAP_ENTRY_INTTYPE_MASK,
+ INT_REMAP_ENTRY_INTTYPE_SHIFT) << 8;
+ msg->data |= get_field_from_reg_u32(*entry,
+ INT_REMAP_ENTRY_VECTOR_MASK,
+ INT_REMAP_ENTRY_VECTOR_SHIFT);
}
int __init amd_iommu_free_intremap_table(
@@ -438,12 +511,14 @@ int __init amd_iommu_free_intremap_table
return 0;
}
-void* __init amd_iommu_alloc_intremap_table(void)
+void* __init amd_iommu_alloc_intremap_table(unsigned long **inuse_map)
{
void *tb;
tb = __alloc_amd_iommu_tables(INTREMAP_TABLE_ORDER);
BUG_ON(tb == NULL);
memset(tb, 0, PAGE_SIZE * (1UL << INTREMAP_TABLE_ORDER));
+ *inuse_map = xzalloc_array(unsigned long, BITS_TO_LONGS(INTREMAP_ENTRIES));
+ BUG_ON(*inuse_map == NULL);
return tb;
}
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -622,7 +622,7 @@ const struct iommu_ops amd_iommu_ops = {
.get_device_group_id = amd_iommu_group_id,
.update_ire_from_apic = amd_iommu_ioapic_update_ire,
.update_ire_from_msi = amd_iommu_msi_msg_update_ire,
- .read_apic_from_ire = __io_apic_read,
+ .read_apic_from_ire = amd_iommu_read_ioapic_from_ire,
.read_msi_from_ire = amd_iommu_read_msi_from_ire,
.setup_hpet_msi = amd_setup_hpet_msi,
.suspend = amd_iommu_suspend,
--- a/xen/include/asm-x86/amd-iommu.h
+++ b/xen/include/asm-x86/amd-iommu.h
@@ -119,6 +119,7 @@ struct ivrs_mappings {
/* per device interrupt remapping table */
void *intremap_table;
+ unsigned long *intremap_inuse;
spinlock_t intremap_lock;
/* ivhd device data settings */
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
@@ -458,10 +458,6 @@
#define MAX_AMD_IOMMUS 32
/* interrupt remapping table */
-#define INT_REMAP_INDEX_DM_MASK 0x1C00
-#define INT_REMAP_INDEX_DM_SHIFT 10
-#define INT_REMAP_INDEX_VECTOR_MASK 0x3FC
-#define INT_REMAP_INDEX_VECTOR_SHIFT 2
#define INT_REMAP_ENTRY_REMAPEN_MASK 0x00000001
#define INT_REMAP_ENTRY_REMAPEN_SHIFT 0
#define INT_REMAP_ENTRY_SUPIOPF_MASK 0x00000002
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -89,10 +89,12 @@ struct amd_iommu *find_iommu_for_device(
/* interrupt remapping */
int amd_iommu_setup_ioapic_remapping(void);
-void *amd_iommu_alloc_intremap_table(void);
+void *amd_iommu_alloc_intremap_table(unsigned long **);
int amd_iommu_free_intremap_table(u16 seg, struct ivrs_mappings *);
void amd_iommu_ioapic_update_ire(
unsigned int apic, unsigned int reg, unsigned int value);
+unsigned int amd_iommu_read_ioapic_from_ire(
+ unsigned int apic, unsigned int reg);
int amd_iommu_msi_msg_update_ire(
struct msi_desc *msi_desc, struct msi_msg *msg);
void amd_iommu_read_msi_from_ire(
@@ -101,15 +103,17 @@ int amd_setup_hpet_msi(struct msi_desc *
extern struct ioapic_sbdf {
u16 bdf, seg;
- unsigned long *pin_setup;
+ u16 *pin_2_idx;
} ioapic_sbdf[MAX_IO_APICS];
-extern void *shared_intremap_table;
extern struct hpet_sbdf {
u16 bdf, seg, id;
struct amd_iommu *iommu;
} hpet_sbdf;
+extern void *shared_intremap_table;
+extern unsigned long *shared_intremap_inuse;
+
/* power management support */
void amd_iommu_resume(void);
void amd_iommu_suspend(void);
[-- Attachment #2: AMD-IOMMU-irte-alloc.patch --]
[-- Type: text/plain, Size: 23159 bytes --]
AMD IOMMU: allocate IRTE entries instead of using a static mapping
For multi-vector MSI, where we surely don't want to allocate
contiguous vectors and be able to set affinities of the individual
vectors separately, we need to drop the use of the tuple of vector and
delivery mode to determine the IRTE to use, and instead allocate IRTEs
(which imo should have been done from the beginning).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v3: Adjust _find_iommu_for_device() to check for the IOMMU itself being
the subject _before_ looking up the matching IOMMU (since now that
we alter the MSI message, we need to specifically care about
skipping the modification when no remapping is to occur), and use
the function also in the MSI message read path. This assumes that
regardless of whether there is an IVRS mapping for the
corresponding PCI device, no remapping occurs for such MSIs. If
that isn't correct, the "return NULL" in the function would need to
be changed to "return iommu".
---
One thing I surely need confirmation on is whether this
BUG_ON(get_ivrs_mappings(iommu->seg)[req_id].intremap_table !=
get_ivrs_mappings(iommu->seg)[alias_id].intremap_table);
in update_intremap_entry_from_msi_msg() is valid. If it isn't, it's not
clear to me how to properly set up things for affected devices, as we
would need an identical index allocated for two different remap table
instances (which can hardly be expected to work out well).
--- a/xen/drivers/passthrough/amd/iommu_acpi.c
+++ b/xen/drivers/passthrough/amd/iommu_acpi.c
@@ -72,12 +72,15 @@ static void __init add_ivrs_mapping_entr
/* allocate per-device interrupt remapping table */
if ( amd_iommu_perdev_intremap )
ivrs_mappings[alias_id].intremap_table =
- amd_iommu_alloc_intremap_table();
+ amd_iommu_alloc_intremap_table(
+ &ivrs_mappings[alias_id].intremap_inuse);
else
{
if ( shared_intremap_table == NULL )
- shared_intremap_table = amd_iommu_alloc_intremap_table();
+ shared_intremap_table = amd_iommu_alloc_intremap_table(
+ &shared_intremap_inuse);
ivrs_mappings[alias_id].intremap_table = shared_intremap_table;
+ ivrs_mappings[alias_id].intremap_inuse = shared_intremap_inuse;
}
}
/* assgin iommu hardware */
@@ -671,7 +674,7 @@ static u16 __init parse_ivhd_device_spec
if ( IO_APIC_ID(apic) != special->handle )
continue;
- if ( ioapic_sbdf[special->handle].pin_setup )
+ if ( ioapic_sbdf[special->handle].pin_2_idx )
{
if ( ioapic_sbdf[special->handle].bdf == bdf &&
ioapic_sbdf[special->handle].seg == seg )
@@ -691,14 +694,16 @@ static u16 __init parse_ivhd_device_spec
ioapic_sbdf[special->handle].bdf = bdf;
ioapic_sbdf[special->handle].seg = seg;
- ioapic_sbdf[special->handle].pin_setup = xzalloc_array(
- unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
+ ioapic_sbdf[special->handle].pin_2_idx = xmalloc_array(
+ u16, nr_ioapic_entries[apic]);
if ( nr_ioapic_entries[apic] &&
- !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+ !ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
{
printk(XENLOG_ERR "IVHD Error: Out of memory\n");
return 0;
}
+ memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
+ nr_ioapic_entries[apic]);
}
break;
}
@@ -926,7 +931,7 @@ static int __init parse_ivrs_table(struc
for ( apic = 0; !error && iommu_intremap && apic < nr_ioapics; ++apic )
{
if ( !nr_ioapic_entries[apic] ||
- ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+ ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
continue;
printk(XENLOG_ERR "IVHD Error: no information for IO-APIC %#x\n",
@@ -935,13 +940,15 @@ static int __init parse_ivrs_table(struc
error = -ENXIO;
else
{
- ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array(
- unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
- if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+ ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx = xmalloc_array(
+ u16, nr_ioapic_entries[apic]);
+ if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
{
printk(XENLOG_ERR "IVHD Error: Out of memory\n");
error = -ENOMEM;
}
+ memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
+ nr_ioapic_entries[apic]);
}
}
--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -31,6 +31,7 @@
struct ioapic_sbdf ioapic_sbdf[MAX_IO_APICS];
struct hpet_sbdf hpet_sbdf;
void *shared_intremap_table;
+unsigned long *shared_intremap_inuse;
static DEFINE_SPINLOCK(shared_intremap_lock);
static spinlock_t* get_intremap_lock(int seg, int req_id)
@@ -46,30 +47,31 @@ static int get_intremap_requestor_id(int
return get_ivrs_mappings(seg)[bdf].dte_requestor_id;
}
-static int get_intremap_offset(u8 vector, u8 dm)
+static unsigned int alloc_intremap_entry(int seg, int bdf)
{
- int offset = 0;
- offset = (dm << INT_REMAP_INDEX_DM_SHIFT) & INT_REMAP_INDEX_DM_MASK;
- offset |= (vector << INT_REMAP_INDEX_VECTOR_SHIFT ) &
- INT_REMAP_INDEX_VECTOR_MASK;
- return offset;
+ unsigned long *inuse = get_ivrs_mappings(seg)[bdf].intremap_inuse;
+ unsigned int slot = find_first_zero_bit(inuse, INTREMAP_ENTRIES);
+
+ if ( slot < INTREMAP_ENTRIES )
+ __set_bit(slot, inuse);
+ return slot;
}
-static u8 *get_intremap_entry(int seg, int bdf, int offset)
+static u32 *get_intremap_entry(int seg, int bdf, int offset)
{
- u8 *table;
+ u32 *table = get_ivrs_mappings(seg)[bdf].intremap_table;
- table = (u8*)get_ivrs_mappings(seg)[bdf].intremap_table;
ASSERT( (table != NULL) && (offset < INTREMAP_ENTRIES) );
- return (u8*) (table + offset);
+ return table + offset;
}
static void free_intremap_entry(int seg, int bdf, int offset)
{
- u32* entry;
- entry = (u32*)get_intremap_entry(seg, bdf, offset);
+ u32 *entry = get_intremap_entry(seg, bdf, offset);
+
memset(entry, 0, sizeof(u32));
+ __clear_bit(offset, get_ivrs_mappings(seg)[bdf].intremap_inuse);
}
static void update_intremap_entry(u32* entry, u8 vector, u8 int_type,
@@ -98,18 +100,24 @@ static void update_intremap_entry(u32* e
INT_REMAP_ENTRY_VECTOR_SHIFT, entry);
}
-static void update_intremap_entry_from_ioapic(
+static void set_rte_index(struct IO_APIC_route_entry *rte, int offset)
+{
+ rte->vector = (u8)offset;
+ rte->delivery_mode = offset >> 8;
+}
+
+static int update_intremap_entry_from_ioapic(
int bdf,
struct amd_iommu *iommu,
- const struct IO_APIC_route_entry *rte,
- const struct IO_APIC_route_entry *old_rte)
+ struct IO_APIC_route_entry *rte,
+ u16 *index)
{
unsigned long flags;
u32* entry;
u8 delivery_mode, dest, vector, dest_mode;
int req_id;
spinlock_t *lock;
- int offset;
+ unsigned int offset;
req_id = get_intremap_requestor_id(iommu->seg, bdf);
lock = get_intremap_lock(iommu->seg, req_id);
@@ -121,16 +129,20 @@ static void update_intremap_entry_from_i
spin_lock_irqsave(lock, flags);
- offset = get_intremap_offset(vector, delivery_mode);
- if ( old_rte )
+ offset = *index;
+ if ( offset >= INTREMAP_ENTRIES )
{
- int old_offset = get_intremap_offset(old_rte->vector,
- old_rte->delivery_mode);
-
- if ( offset != old_offset )
- free_intremap_entry(iommu->seg, bdf, old_offset);
+ offset = alloc_intremap_entry(iommu->seg, req_id);
+ if ( offset >= INTREMAP_ENTRIES )
+ {
+ spin_unlock_irqrestore(lock, flags);
+ rte->mask = 1;
+ return -ENOSPC;
+ }
+ *index = offset;
}
- entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
+
+ entry = get_intremap_entry(iommu->seg, req_id, offset);
update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
spin_unlock_irqrestore(lock, flags);
@@ -141,6 +153,10 @@ static void update_intremap_entry_from_i
amd_iommu_flush_intremap(iommu, req_id);
spin_unlock_irqrestore(&iommu->lock, flags);
}
+
+ set_rte_index(rte, offset);
+
+ return 0;
}
int __init amd_iommu_setup_ioapic_remapping(void)
@@ -153,7 +169,7 @@ int __init amd_iommu_setup_ioapic_remapp
u16 seg, bdf, req_id;
struct amd_iommu *iommu;
spinlock_t *lock;
- int offset;
+ unsigned int offset;
/* Read ioapic entries and update interrupt remapping table accordingly */
for ( apic = 0; apic < nr_ioapics; apic++ )
@@ -184,19 +200,24 @@ int __init amd_iommu_setup_ioapic_remapp
dest = rte.dest.logical.logical_dest;
spin_lock_irqsave(lock, flags);
- offset = get_intremap_offset(vector, delivery_mode);
- entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
+ offset = alloc_intremap_entry(seg, req_id);
+ BUG_ON(offset >= INTREMAP_ENTRIES);
+ ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] = offset;
+ entry = get_intremap_entry(iommu->seg, req_id, offset);
update_intremap_entry(entry, vector,
delivery_mode, dest_mode, dest);
spin_unlock_irqrestore(lock, flags);
+ set_rte_index(&rte, offset);
+ ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] = offset;
+ __ioapic_write_entry(apic, pin, 1, rte);
+
if ( iommu->enabled )
{
spin_lock_irqsave(&iommu->lock, flags);
amd_iommu_flush_intremap(iommu, req_id);
spin_unlock_irqrestore(&iommu->lock, flags);
}
- set_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup);
}
}
return 0;
@@ -209,7 +230,7 @@ void amd_iommu_ioapic_update_ire(
struct IO_APIC_route_entry new_rte = { 0 };
unsigned int rte_lo = (reg & 1) ? reg - 1 : reg;
unsigned int pin = (reg - 0x10) / 2;
- int saved_mask, seg, bdf;
+ int saved_mask, seg, bdf, rc;
struct amd_iommu *iommu;
if ( !iommu_intremap )
@@ -247,7 +268,7 @@ void amd_iommu_ioapic_update_ire(
}
if ( new_rte.mask &&
- !test_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) )
+ ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] >= INTREMAP_ENTRIES )
{
ASSERT(saved_mask);
__io_apic_write(apic, reg, value);
@@ -262,14 +283,19 @@ void amd_iommu_ioapic_update_ire(
}
/* Update interrupt remapping entry */
- update_intremap_entry_from_ioapic(
- bdf, iommu, &new_rte,
- test_and_set_bit(pin,
- ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) ? &old_rte
- : NULL);
+ rc = update_intremap_entry_from_ioapic(
+ bdf, iommu, &new_rte,
+ &ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin]);
- /* Forward write access to IO-APIC RTE */
- __io_apic_write(apic, reg, value);
+ __io_apic_write(apic, reg, ((u32 *)&new_rte)[reg != rte_lo]);
+
+ if ( rc )
+ {
+ /* Keep the entry masked. */
+ printk(XENLOG_ERR "Remapping IO-APIC %#x pin %u failed (%d)\n",
+ IO_APIC_ID(apic), pin, rc);
+ return;
+ }
/* For lower bits access, return directly to avoid double writes */
if ( reg == rte_lo )
@@ -283,16 +309,41 @@ void amd_iommu_ioapic_update_ire(
}
}
-static void update_intremap_entry_from_msi_msg(
+unsigned int amd_iommu_read_ioapic_from_ire(
+ unsigned int apic, unsigned int reg)
+{
+ unsigned int val = __io_apic_read(apic, reg);
+
+ if ( !(reg & 1) )
+ {
+ unsigned int offset = val & (INTREMAP_ENTRIES - 1);
+ u16 bdf = ioapic_sbdf[IO_APIC_ID(apic)].bdf;
+ u16 seg = ioapic_sbdf[IO_APIC_ID(apic)].seg;
+ u16 req_id = get_intremap_requestor_id(seg, bdf);
+ const u32 *entry = get_intremap_entry(seg, req_id, offset);
+
+ val &= ~(INTREMAP_ENTRIES - 1);
+ val |= get_field_from_reg_u32(*entry,
+ INT_REMAP_ENTRY_INTTYPE_MASK,
+ INT_REMAP_ENTRY_INTTYPE_SHIFT) << 8;
+ val |= get_field_from_reg_u32(*entry,
+ INT_REMAP_ENTRY_VECTOR_MASK,
+ INT_REMAP_ENTRY_VECTOR_SHIFT);
+ }
+
+ return val;
+}
+
+static int update_intremap_entry_from_msi_msg(
struct amd_iommu *iommu, u16 bdf,
- int *remap_index, const struct msi_msg *msg)
+ int *remap_index, const struct msi_msg *msg, u32 *data)
{
unsigned long flags;
u32* entry;
u16 req_id, alias_id;
u8 delivery_mode, dest, vector, dest_mode;
spinlock_t *lock;
- int offset;
+ unsigned int offset;
req_id = get_dma_requestor_id(iommu->seg, bdf);
alias_id = get_intremap_requestor_id(iommu->seg, bdf);
@@ -303,15 +354,6 @@ static void update_intremap_entry_from_m
spin_lock_irqsave(lock, flags);
free_intremap_entry(iommu->seg, req_id, *remap_index);
spin_unlock_irqrestore(lock, flags);
-
- if ( ( req_id != alias_id ) &&
- get_ivrs_mappings(iommu->seg)[alias_id].intremap_table != NULL )
- {
- lock = get_intremap_lock(iommu->seg, alias_id);
- spin_lock_irqsave(lock, flags);
- free_intremap_entry(iommu->seg, alias_id, *remap_index);
- spin_unlock_irqrestore(lock, flags);
- }
goto done;
}
@@ -322,16 +364,24 @@ static void update_intremap_entry_from_m
delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK;
dest = (msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff;
- offset = get_intremap_offset(vector, delivery_mode);
- if ( *remap_index < 0)
+ offset = *remap_index;
+ if ( offset >= INTREMAP_ENTRIES )
+ {
+ offset = alloc_intremap_entry(iommu->seg, bdf);
+ if ( offset >= INTREMAP_ENTRIES )
+ {
+ spin_unlock_irqrestore(lock, flags);
+ return -ENOSPC;
+ }
*remap_index = offset;
- else
- BUG_ON(*remap_index != offset);
+ }
- entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
+ entry = get_intremap_entry(iommu->seg, req_id, offset);
update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
spin_unlock_irqrestore(lock, flags);
+ *data = (msg->data & ~(INTREMAP_ENTRIES - 1)) | offset;
+
/*
* In some special cases, a pci-e device(e.g SATA controller in IDE mode)
* will use alias id to index interrupt remapping table.
@@ -343,10 +393,8 @@ static void update_intremap_entry_from_m
if ( ( req_id != alias_id ) &&
get_ivrs_mappings(iommu->seg)[alias_id].intremap_table != NULL )
{
- spin_lock_irqsave(lock, flags);
- entry = (u32*)get_intremap_entry(iommu->seg, alias_id, offset);
- update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
- spin_unlock_irqrestore(lock, flags);
+ BUG_ON(get_ivrs_mappings(iommu->seg)[req_id].intremap_table !=
+ get_ivrs_mappings(iommu->seg)[alias_id].intremap_table);
}
done:
@@ -358,19 +406,22 @@ done:
amd_iommu_flush_intremap(iommu, alias_id);
spin_unlock_irqrestore(&iommu->lock, flags);
}
+
+ return 0;
}
static struct amd_iommu *_find_iommu_for_device(int seg, int bdf)
{
- struct amd_iommu *iommu = find_iommu_for_device(seg, bdf);
-
- if ( iommu )
- return iommu;
+ struct amd_iommu *iommu;
list_for_each_entry ( iommu, &amd_iommu_head, list )
if ( iommu->seg == seg && iommu->bdf == bdf )
return NULL;
+ iommu = find_iommu_for_device(seg, bdf);
+ if ( iommu )
+ return iommu;
+
AMD_IOMMU_DEBUG("No IOMMU for MSI dev = %04x:%02x:%02x.%u\n",
seg, PCI_BUS(bdf), PCI_SLOT(bdf), PCI_FUNC(bdf));
return ERR_PTR(-EINVAL);
@@ -380,8 +431,9 @@ int amd_iommu_msi_msg_update_ire(
struct msi_desc *msi_desc, struct msi_msg *msg)
{
struct pci_dev *pdev = msi_desc->dev;
- int bdf, seg;
+ int bdf, seg, rc;
struct amd_iommu *iommu;
+ u32 data;
bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
seg = pdev ? pdev->seg : hpet_sbdf.seg;
@@ -390,11 +442,12 @@ int amd_iommu_msi_msg_update_ire(
if ( IS_ERR_OR_NULL(iommu) )
return PTR_ERR(iommu);
- if ( msi_desc->remap_index >= 0 )
+ if ( msi_desc->remap_index >= 0 && !msg )
{
do {
update_intremap_entry_from_msi_msg(iommu, bdf,
- &msi_desc->remap_index, NULL);
+ &msi_desc->remap_index,
+ NULL, NULL);
if ( !pdev || !pdev->phantom_stride )
break;
bdf += pdev->phantom_stride;
@@ -409,19 +462,39 @@ int amd_iommu_msi_msg_update_ire(
return 0;
do {
- update_intremap_entry_from_msi_msg(iommu, bdf, &msi_desc->remap_index,
- msg);
- if ( !pdev || !pdev->phantom_stride )
+ rc = update_intremap_entry_from_msi_msg(iommu, bdf,
+ &msi_desc->remap_index,
+ msg, &data);
+ if ( rc || !pdev || !pdev->phantom_stride )
break;
bdf += pdev->phantom_stride;
} while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
- return 0;
+ msg->data = data;
+ return rc;
}
void amd_iommu_read_msi_from_ire(
struct msi_desc *msi_desc, struct msi_msg *msg)
{
+ unsigned int offset = msg->data & (INTREMAP_ENTRIES - 1);
+ const struct pci_dev *pdev = msi_desc->dev;
+ u16 bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
+ u16 seg = pdev ? pdev->seg : hpet_sbdf.seg;
+ const u32 *entry;
+
+ if ( IS_ERR_OR_NULL(_find_iommu_for_device(seg, bdf)) )
+ return;
+
+ entry = get_intremap_entry(seg, get_dma_requestor_id(seg, bdf), offset);
+
+ msg->data &= ~(INTREMAP_ENTRIES - 1);
+ msg->data |= get_field_from_reg_u32(*entry,
+ INT_REMAP_ENTRY_INTTYPE_MASK,
+ INT_REMAP_ENTRY_INTTYPE_SHIFT) << 8;
+ msg->data |= get_field_from_reg_u32(*entry,
+ INT_REMAP_ENTRY_VECTOR_MASK,
+ INT_REMAP_ENTRY_VECTOR_SHIFT);
}
int __init amd_iommu_free_intremap_table(
@@ -438,12 +511,14 @@ int __init amd_iommu_free_intremap_table
return 0;
}
-void* __init amd_iommu_alloc_intremap_table(void)
+void* __init amd_iommu_alloc_intremap_table(unsigned long **inuse_map)
{
void *tb;
tb = __alloc_amd_iommu_tables(INTREMAP_TABLE_ORDER);
BUG_ON(tb == NULL);
memset(tb, 0, PAGE_SIZE * (1UL << INTREMAP_TABLE_ORDER));
+ *inuse_map = xzalloc_array(unsigned long, BITS_TO_LONGS(INTREMAP_ENTRIES));
+ BUG_ON(*inuse_map == NULL);
return tb;
}
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -622,7 +622,7 @@ const struct iommu_ops amd_iommu_ops = {
.get_device_group_id = amd_iommu_group_id,
.update_ire_from_apic = amd_iommu_ioapic_update_ire,
.update_ire_from_msi = amd_iommu_msi_msg_update_ire,
- .read_apic_from_ire = __io_apic_read,
+ .read_apic_from_ire = amd_iommu_read_ioapic_from_ire,
.read_msi_from_ire = amd_iommu_read_msi_from_ire,
.setup_hpet_msi = amd_setup_hpet_msi,
.suspend = amd_iommu_suspend,
--- a/xen/include/asm-x86/amd-iommu.h
+++ b/xen/include/asm-x86/amd-iommu.h
@@ -119,6 +119,7 @@ struct ivrs_mappings {
/* per device interrupt remapping table */
void *intremap_table;
+ unsigned long *intremap_inuse;
spinlock_t intremap_lock;
/* ivhd device data settings */
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
@@ -458,10 +458,6 @@
#define MAX_AMD_IOMMUS 32
/* interrupt remapping table */
-#define INT_REMAP_INDEX_DM_MASK 0x1C00
-#define INT_REMAP_INDEX_DM_SHIFT 10
-#define INT_REMAP_INDEX_VECTOR_MASK 0x3FC
-#define INT_REMAP_INDEX_VECTOR_SHIFT 2
#define INT_REMAP_ENTRY_REMAPEN_MASK 0x00000001
#define INT_REMAP_ENTRY_REMAPEN_SHIFT 0
#define INT_REMAP_ENTRY_SUPIOPF_MASK 0x00000002
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -89,10 +89,12 @@ struct amd_iommu *find_iommu_for_device(
/* interrupt remapping */
int amd_iommu_setup_ioapic_remapping(void);
-void *amd_iommu_alloc_intremap_table(void);
+void *amd_iommu_alloc_intremap_table(unsigned long **);
int amd_iommu_free_intremap_table(u16 seg, struct ivrs_mappings *);
void amd_iommu_ioapic_update_ire(
unsigned int apic, unsigned int reg, unsigned int value);
+unsigned int amd_iommu_read_ioapic_from_ire(
+ unsigned int apic, unsigned int reg);
int amd_iommu_msi_msg_update_ire(
struct msi_desc *msi_desc, struct msi_msg *msg);
void amd_iommu_read_msi_from_ire(
@@ -101,15 +103,17 @@ int amd_setup_hpet_msi(struct msi_desc *
extern struct ioapic_sbdf {
u16 bdf, seg;
- unsigned long *pin_setup;
+ u16 *pin_2_idx;
} ioapic_sbdf[MAX_IO_APICS];
-extern void *shared_intremap_table;
extern struct hpet_sbdf {
u16 bdf, seg, id;
struct amd_iommu *iommu;
} hpet_sbdf;
+extern void *shared_intremap_table;
+extern unsigned long *shared_intremap_inuse;
+
/* power management support */
void amd_iommu_resume(void);
void amd_iommu_suspend(void);
[-- Attachment #3: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 12+ messages in thread