* [PATCH 1/5] xen/pci: Add xen_[find|register|unregister]_device_domain_owner functions.
2011-04-14 15:30 [PATCH] Provide infrastructure changes for backends (especially pciback) for 2.6.40 Konrad Rzeszutek Wilk
@ 2011-04-14 15:30 ` Konrad Rzeszutek Wilk
2011-04-14 15:30 ` [PATCH 2/5] xen/irq: Check if the PCI device is owned by a domain different than DOMID_SELF Konrad Rzeszutek Wilk
` (5 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-04-14 15:30 UTC (permalink / raw)
To: xen-devel, linux-kernel; +Cc: Konrad Rzeszutek Wilk
When the Xen PCI backend is told to enable or disable MSI/MSI-X functions,
the initial domain performs these operations. The initial domain needs
to know which domain (guest) is going to use the PCI device so when it
makes the appropiate hypercall to retrieve the MSI/MSI-X vector it will
also assign the PCI device to the appropiate domain (guest).
This boils down to us needing a mechanism to find, set and unset the domain
id that will be using the device.
[v2: EXPORT_SYMBOL -> EXPORT_SYMBOL_GPL.]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
arch/x86/include/asm/xen/pci.h | 16 +++++++++
arch/x86/pci/xen.c | 73 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 89 insertions(+), 0 deletions(-)
diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
index aa86209..4fbda9a 100644
--- a/arch/x86/include/asm/xen/pci.h
+++ b/arch/x86/include/asm/xen/pci.h
@@ -15,10 +15,26 @@ static inline int pci_xen_hvm_init(void)
#endif
#if defined(CONFIG_XEN_DOM0)
void __init xen_setup_pirqs(void);
+int xen_find_device_domain_owner(struct pci_dev *dev);
+int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain);
+int xen_unregister_device_domain_owner(struct pci_dev *dev);
#else
static inline void __init xen_setup_pirqs(void)
{
}
+static inline int xen_find_device_domain_owner(struct pci_dev *dev)
+{
+ return -1;
+}
+static inline int xen_register_device_domain_owner(struct pci_dev *dev,
+ uint16_t domain)
+{
+ return -1;
+}
+static inline int xen_unregister_device_domain_owner(struct pci_dev *dev)
+{
+ return -1;
+}
#endif
#if defined(CONFIG_PCI_MSI)
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index e37b407..6075f2d 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -461,3 +461,76 @@ void __init xen_setup_pirqs(void)
}
}
#endif
+
+struct xen_device_domain_owner {
+ domid_t domain;
+ struct pci_dev *dev;
+ struct list_head list;
+};
+
+static DEFINE_SPINLOCK(dev_domain_list_spinlock);
+static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list);
+
+static struct xen_device_domain_owner *find_device(struct pci_dev *dev)
+{
+ struct xen_device_domain_owner *owner;
+
+ list_for_each_entry(owner, &dev_domain_list, list) {
+ if (owner->dev == dev)
+ return owner;
+ }
+ return NULL;
+}
+
+int xen_find_device_domain_owner(struct pci_dev *dev)
+{
+ struct xen_device_domain_owner *owner;
+ int domain = -ENODEV;
+
+ spin_lock(&dev_domain_list_spinlock);
+ owner = find_device(dev);
+ if (owner)
+ domain = owner->domain;
+ spin_unlock(&dev_domain_list_spinlock);
+ return domain;
+}
+EXPORT_SYMBOL_GPL(xen_find_device_domain_owner);
+
+int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain)
+{
+ struct xen_device_domain_owner *owner;
+
+ owner = kzalloc(sizeof(struct xen_device_domain_owner), GFP_KERNEL);
+ if (!owner)
+ return -ENODEV;
+
+ spin_lock(&dev_domain_list_spinlock);
+ if (find_device(dev)) {
+ spin_unlock(&dev_domain_list_spinlock);
+ kfree(owner);
+ return -EEXIST;
+ }
+ owner->domain = domain;
+ owner->dev = dev;
+ list_add_tail(&owner->list, &dev_domain_list);
+ spin_unlock(&dev_domain_list_spinlock);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xen_register_device_domain_owner);
+
+int xen_unregister_device_domain_owner(struct pci_dev *dev)
+{
+ struct xen_device_domain_owner *owner;
+
+ spin_lock(&dev_domain_list_spinlock);
+ owner = find_device(dev);
+ if (!owner) {
+ spin_unlock(&dev_domain_list_spinlock);
+ return -ENODEV;
+ }
+ list_del(&owner->list);
+ spin_unlock(&dev_domain_list_spinlock);
+ kfree(owner);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner);
--
1.7.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 2/5] xen/irq: Check if the PCI device is owned by a domain different than DOMID_SELF.
2011-04-14 15:30 [PATCH] Provide infrastructure changes for backends (especially pciback) for 2.6.40 Konrad Rzeszutek Wilk
2011-04-14 15:30 ` [PATCH 1/5] xen/pci: Add xen_[find|register|unregister]_device_domain_owner functions Konrad Rzeszutek Wilk
@ 2011-04-14 15:30 ` Konrad Rzeszutek Wilk
2011-04-14 15:30 ` [PATCH 3/5] xen/irq: Add support to check if IRQ line is shared with other domains Konrad Rzeszutek Wilk
` (4 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-04-14 15:30 UTC (permalink / raw)
To: xen-devel, linux-kernel; +Cc: Konrad Rzeszutek Wilk, Jeremy Fitzhardinge
We check if there is a domain owner for the PCI device. In case of failure
(meaning no domain has registered for this device) we make DOMID_SELF the owner.
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
[v2: deal with rebasing on v2.6.37-1]
[v3: deal with rebasing on stable/irq.cleanup]
[v4: deal with rebasing on stable/irq.ween_of_nr_irqs]
[v5: deal with rebasing on v2.6.39-rc3]
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Xiantao Zhang <xiantao.zhang@intel.com>
---
arch/x86/pci/xen.c | 21 ++++++++++++++++-----
drivers/xen/events.c | 12 ++++++++----
include/xen/events.h | 3 ++-
3 files changed, 26 insertions(+), 10 deletions(-)
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 6075f2d..393981f 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -108,7 +108,8 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
}
irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, 0,
(type == PCI_CAP_ID_MSIX) ?
- "msi-x" : "msi");
+ "msi-x" : "msi",
+ DOMID_SELF);
if (irq < 0)
goto error;
dev_dbg(&dev->dev,
@@ -148,7 +149,8 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0,
(type == PCI_CAP_ID_MSIX) ?
"pcifront-msi-x" :
- "pcifront-msi");
+ "pcifront-msi",
+ DOMID_SELF);
if (irq < 0)
goto free;
i++;
@@ -190,9 +192,16 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
list_for_each_entry(msidesc, &dev->msi_list, list) {
struct physdev_map_pirq map_irq;
+ domid_t domid;
+
+ domid = ret = xen_find_device_domain_owner(dev);
+ /* N.B. Casting int's -ENODEV to uint16_t results in 0xFFED,
+ * hence check ret value for < 0. */
+ if (ret < 0)
+ domid = DOMID_SELF;
memset(&map_irq, 0, sizeof(map_irq));
- map_irq.domid = DOMID_SELF;
+ map_irq.domid = domid;
map_irq.type = MAP_PIRQ_TYPE_MSI;
map_irq.index = -1;
map_irq.pirq = -1;
@@ -215,14 +224,16 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
if (ret) {
- dev_warn(&dev->dev, "xen map irq failed %d\n", ret);
+ dev_warn(&dev->dev, "xen map irq failed %d for %d domain\n",
+ ret, domid);
goto out;
}
ret = xen_bind_pirq_msi_to_irq(dev, msidesc,
map_irq.pirq, map_irq.index,
(type == PCI_CAP_ID_MSIX) ?
- "msi-x" : "msi");
+ "msi-x" : "msi",
+ domid);
if (ret < 0)
goto out;
}
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 42d6c93..ac0e228 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -101,6 +101,7 @@ struct irq_info
unsigned short gsi;
unsigned char vector;
unsigned char flags;
+ uint16_t domid;
} pirq;
} u;
};
@@ -184,6 +185,7 @@ static void xen_irq_info_pirq_init(unsigned irq,
unsigned short pirq,
unsigned short gsi,
unsigned short vector,
+ uint16_t domid,
unsigned char flags)
{
struct irq_info *info = info_for_irq(irq);
@@ -193,6 +195,7 @@ static void xen_irq_info_pirq_init(unsigned irq,
info->u.pirq.pirq = pirq;
info->u.pirq.gsi = gsi;
info->u.pirq.vector = vector;
+ info->u.pirq.domid = domid;
info->u.pirq.flags = flags;
}
@@ -655,7 +658,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
goto out;
}
- xen_irq_info_pirq_init(irq, 0, pirq, gsi, irq_op.vector,
+ xen_irq_info_pirq_init(irq, 0, pirq, gsi, irq_op.vector, DOMID_SELF,
shareable ? PIRQ_SHAREABLE : 0);
out:
@@ -680,7 +683,8 @@ int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
}
int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
- int pirq, int vector, const char *name)
+ int pirq, int vector, const char *name,
+ domid_t domid)
{
int irq, ret;
@@ -693,7 +697,7 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_level_irq,
name);
- xen_irq_info_pirq_init(irq, 0, pirq, 0, vector, 0);
+ xen_irq_info_pirq_init(irq, 0, pirq, 0, vector, domid, 0);
ret = irq_set_msi_desc(irq, msidesc);
if (ret < 0)
goto error_irq;
@@ -722,7 +726,7 @@ int xen_destroy_irq(int irq)
if (xen_initial_domain()) {
unmap_irq.pirq = info->u.pirq.pirq;
- unmap_irq.domid = DOMID_SELF;
+ unmap_irq.domid = info->u.pirq.domid;
rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
if (rc) {
printk(KERN_WARNING "unmap irq failed %d\n", rc);
diff --git a/include/xen/events.h b/include/xen/events.h
index f1b87ad..9aecc0b 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -85,7 +85,8 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc);
/* Bind an PSI pirq to an irq. */
int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
- int pirq, int vector, const char *name);
+ int pirq, int vector, const char *name,
+ domid_t domid);
#endif
/* De-allocates the above mentioned physical interrupt. */
--
1.7.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 3/5] xen/irq: Add support to check if IRQ line is shared with other domains.
2011-04-14 15:30 [PATCH] Provide infrastructure changes for backends (especially pciback) for 2.6.40 Konrad Rzeszutek Wilk
2011-04-14 15:30 ` [PATCH 1/5] xen/pci: Add xen_[find|register|unregister]_device_domain_owner functions Konrad Rzeszutek Wilk
2011-04-14 15:30 ` [PATCH 2/5] xen/irq: Check if the PCI device is owned by a domain different than DOMID_SELF Konrad Rzeszutek Wilk
@ 2011-04-14 15:30 ` Konrad Rzeszutek Wilk
2011-04-14 15:30 ` [PATCH 4/5] xen/irq: Export 'xen_pirq_from_irq' function Konrad Rzeszutek Wilk
` (3 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-04-14 15:30 UTC (permalink / raw)
To: xen-devel, linux-kernel; +Cc: Konrad Rzeszutek Wilk
We do this via the PHYSDEVOP_irq_status_query support hypervisor call.
We will get a positive value if another domain has binded its
PIRQ to the specified GSI (IRQ line).
[v2: Deal with v2.6.37-rc1 rebase fallout]
[v3: Deal with stable/irq.cleanup fallout]
[v4: xen_ignore_irq->xen_test_irq_shared]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
drivers/xen/events.c | 12 ++++++++++++
include/xen/events.h | 3 +++
2 files changed, 15 insertions(+), 0 deletions(-)
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index ac0e228..0ac7a14 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -1508,6 +1508,18 @@ void xen_poll_irq(int irq)
xen_poll_irq_timeout(irq, 0 /* no timeout */);
}
+/* Check whether the IRQ line is shared with other guests. */
+int xen_test_irq_shared(int irq)
+{
+ struct irq_info *info = info_for_irq(irq);
+ struct physdev_irq_status_query irq_status = { .irq = info->u.pirq.pirq };
+
+ if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
+ return 0;
+ return !(irq_status.flags & XENIRQSTAT_shared);
+}
+EXPORT_SYMBOL_GPL(xen_test_irq_shared);
+
void xen_irq_resume(void)
{
unsigned int cpu, evtchn;
diff --git a/include/xen/events.h b/include/xen/events.h
index 9aecc0b..932e540 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -95,4 +95,7 @@ int xen_destroy_irq(int irq);
/* Return irq from pirq */
int xen_irq_from_pirq(unsigned pirq);
+/* Determine whether to ignore this IRQ if it is passed to a guest. */
+int xen_test_irq_shared(int irq);
+
#endif /* _XEN_EVENTS_H */
--
1.7.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 4/5] xen/irq: Export 'xen_pirq_from_irq' function.
2011-04-14 15:30 [PATCH] Provide infrastructure changes for backends (especially pciback) for 2.6.40 Konrad Rzeszutek Wilk
` (2 preceding siblings ...)
2011-04-14 15:30 ` [PATCH 3/5] xen/irq: Add support to check if IRQ line is shared with other domains Konrad Rzeszutek Wilk
@ 2011-04-14 15:30 ` Konrad Rzeszutek Wilk
2011-04-14 15:30 ` [PATCH 5/5] xen/irq: The Xen hypervisor cleans up the PIRQs if the other domain forgot Konrad Rzeszutek Wilk
` (2 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-04-14 15:30 UTC (permalink / raw)
To: xen-devel, linux-kernel; +Cc: Konrad Rzeszutek Wilk
We need this to find the real Xen PIRQ value for a device
that requests an MSI or MSI-X. In the past we used
'xen_gsi_from_irq' since that function would return
an Xen PIRQ or GSI depending on the provided IRQ. Now that
we have seperated that we need to use the correct
function.
[v2: Deal with rebase on stable/irq.cleanup]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
drivers/xen/events.c | 6 ++++++
include/xen/events.h | 3 +++
2 files changed, 9 insertions(+), 0 deletions(-)
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 0ac7a14..e4e8e9a 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -763,6 +763,12 @@ out:
return irq;
}
+
+int xen_pirq_from_irq(unsigned irq)
+{
+ return pirq_from_irq(irq);
+}
+EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
int bind_evtchn_to_irq(unsigned int evtchn)
{
int irq;
diff --git a/include/xen/events.h b/include/xen/events.h
index 932e540..9af21e1 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -95,6 +95,9 @@ int xen_destroy_irq(int irq);
/* Return irq from pirq */
int xen_irq_from_pirq(unsigned pirq);
+/* Return the pirq allocated to the irq. */
+int xen_pirq_from_irq(unsigned irq);
+
/* Determine whether to ignore this IRQ if it is passed to a guest. */
int xen_test_irq_shared(int irq);
--
1.7.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 5/5] xen/irq: The Xen hypervisor cleans up the PIRQs if the other domain forgot.
2011-04-14 15:30 [PATCH] Provide infrastructure changes for backends (especially pciback) for 2.6.40 Konrad Rzeszutek Wilk
` (3 preceding siblings ...)
2011-04-14 15:30 ` [PATCH 4/5] xen/irq: Export 'xen_pirq_from_irq' function Konrad Rzeszutek Wilk
@ 2011-04-14 15:30 ` Konrad Rzeszutek Wilk
2011-04-18 15:20 ` [PATCH] Provide infrastructure changes for backends (and blkback) for 2.6.40 Konrad Rzeszutek Wilk
2011-04-18 15:20 ` [PATCH] xen/p2m/m2p/gnttab: Support GNTMAP_host_map in the M2P override Konrad Rzeszutek Wilk
6 siblings, 0 replies; 8+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-04-14 15:30 UTC (permalink / raw)
To: xen-devel, linux-kernel; +Cc: Konrad Rzeszutek Wilk
And if the other domain forgot to clean up its PIRQs we don't need
to fail the operation. Just take a note of it and continue on.
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
drivers/xen/events.c | 9 ++++++++-
1 files changed, 8 insertions(+), 1 deletions(-)
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index e4e8e9a..e51f3c5 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -728,7 +728,14 @@ int xen_destroy_irq(int irq)
unmap_irq.pirq = info->u.pirq.pirq;
unmap_irq.domid = info->u.pirq.domid;
rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
- if (rc) {
+ /* If another domain quits without making the pci_disable_msix
+ * call, the Xen hypervisor takes care of freeing the PIRQs
+ * (free_domain_pirqs).
+ */
+ if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF))
+ printk(KERN_INFO "domain %d does not have %d anymore\n",
+ info->u.pirq.domid, info->u.pirq.pirq);
+ else if (rc) {
printk(KERN_WARNING "unmap irq failed %d\n", rc);
goto out;
}
--
1.7.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH] Provide infrastructure changes for backends (and blkback) for 2.6.40
2011-04-14 15:30 [PATCH] Provide infrastructure changes for backends (especially pciback) for 2.6.40 Konrad Rzeszutek Wilk
` (4 preceding siblings ...)
2011-04-14 15:30 ` [PATCH 5/5] xen/irq: The Xen hypervisor cleans up the PIRQs if the other domain forgot Konrad Rzeszutek Wilk
@ 2011-04-18 15:20 ` Konrad Rzeszutek Wilk
2011-04-18 15:20 ` [PATCH] xen/p2m/m2p/gnttab: Support GNTMAP_host_map in the M2P override Konrad Rzeszutek Wilk
6 siblings, 0 replies; 8+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-04-18 15:20 UTC (permalink / raw)
To: linux-kernel; +Cc: xen-devel
On Thu, Apr 14, 2011 at 11:30:09AM -0400, Konrad Rzeszutek Wilk wrote:
> I've posted these patches in the past, and fixed some of the issues, and I think
> there were reviewed. These five pathches are proposed for 2.6.40 to to allow the
> Xen-pciback backend module to work.
And also the blkback backend. The attached patch allows the M2P override code
to deal with grants that do not have the GNTMAP_contains_pte flag set.
The change is pretty straightforward. The difficulty is more when we unmap the
grant and we need to call m2p_remove_override and pass the flag whether to clear
the PTE or not. On the map part it was quite easy to check the:
map_ops[i].flags & GNTMAP_contains_pte
but that is not possible on the unmap (there are no .flags parameter on the unmap
grant). For simplicity and the use cases (the backends do not use those API
calss to setup grants, instead they use the m2p_* API calls directlly) we will
return -EOPNOTSUPP for !GNTMAP_contains_pte.
In the future we could keep track of the page and whether it had !GNTMAP_contains_pte
and make the decision whether to clear the PTE or not. One way to do this is to keep
an list of pages of the pages that have GNTMAP_contains_pte flag set (or vice-versa).
^ permalink raw reply [flat|nested] 8+ messages in thread* [PATCH] xen/p2m/m2p/gnttab: Support GNTMAP_host_map in the M2P override.
2011-04-14 15:30 [PATCH] Provide infrastructure changes for backends (especially pciback) for 2.6.40 Konrad Rzeszutek Wilk
` (5 preceding siblings ...)
2011-04-18 15:20 ` [PATCH] Provide infrastructure changes for backends (and blkback) for 2.6.40 Konrad Rzeszutek Wilk
@ 2011-04-18 15:20 ` Konrad Rzeszutek Wilk
6 siblings, 0 replies; 8+ messages in thread
From: Konrad Rzeszutek Wilk @ 2011-04-18 15:20 UTC (permalink / raw)
To: linux-kernel; +Cc: xen-devel, Konrad Rzeszutek Wilk
We only supported the M2P (and P2M) override only for the
GNTMAP_contains_pte type mappings. Meaning that we grants
operations would "contain the machine address of the PTE to update"
If the flag is unset, then the grant operation is
"contains a host virtual address". The latter case means that
the Hypervisor takes care of updating our page table
(specifically the PTE entry) with the guest's MFN. As such we should
not try to do anything with the PTE. Previous to this patch
we would try to clear the PTE which resulted in Xen hypervisor
being upset with us:
(XEN) mm.c:1066:d0 Attempt to implicitly unmap a granted PTE c0100000ccc59067
(XEN) domain_crash called from mm.c:1067
(XEN) Domain 0 (vcpu#0) crashed on cpu#3:
(XEN) ----[ Xen-4.0-110228 x86_64 debug=y Not tainted ]----
and crashing us.
This patch allows us to inhibit the PTE clearing in the PV guest
if the GNTMAP_contains_pte is not set.
On the m2p_remove_override path we provide the same parameter.
Sadly in the grant-table driver we do not have a mechanism to
tell m2p_remove_override whether to clear the PTE or not. Since
the grant-table driver is used by user-space, we can safely assume
that it operates only on PTE's. Hence the implementation for
it to work on !GNTMAP_contains_pte returns -EOPNOTSUPP. In the future
we can implement the support for this. It will require some extra
accounting structure to keep track of the page[i], and the flag.
[v1: Added documentation details, made it return -EOPNOTSUPP instead
of trying to do a half-way implementation]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
arch/x86/include/asm/xen/page.h | 5 +++--
arch/x86/xen/p2m.c | 10 ++++------
drivers/xen/grant-table.c | 31 ++++++++++++++++++++++++-------
3 files changed, 31 insertions(+), 15 deletions(-)
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index c61934f..64a619d 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -47,8 +47,9 @@ extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
extern unsigned long set_phys_range_identity(unsigned long pfn_s,
unsigned long pfn_e);
-extern int m2p_add_override(unsigned long mfn, struct page *page);
-extern int m2p_remove_override(struct page *page);
+extern int m2p_add_override(unsigned long mfn, struct page *page,
+ bool clear_pte);
+extern int m2p_remove_override(struct page *page, bool clear_pte);
extern struct page *m2p_find_override(unsigned long mfn);
extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 141eb0d..2d2b32a 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -650,7 +650,7 @@ static unsigned long mfn_hash(unsigned long mfn)
}
/* Add an MFN override for a particular page */
-int m2p_add_override(unsigned long mfn, struct page *page)
+int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte)
{
unsigned long flags;
unsigned long pfn;
@@ -662,7 +662,6 @@ int m2p_add_override(unsigned long mfn, struct page *page)
if (!PageHighMem(page)) {
address = (unsigned long)__va(pfn << PAGE_SHIFT);
ptep = lookup_address(address, &level);
-
if (WARN(ptep == NULL || level != PG_LEVEL_4K,
"m2p_add_override: pfn %lx not mapped", pfn))
return -EINVAL;
@@ -674,10 +673,9 @@ int m2p_add_override(unsigned long mfn, struct page *page)
if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
return -ENOMEM;
- if (!PageHighMem(page))
+ if (clear_pte && !PageHighMem(page))
/* Just zap old mapping for now */
pte_clear(&init_mm, address, ptep);
-
spin_lock_irqsave(&m2p_override_lock, flags);
list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]);
spin_unlock_irqrestore(&m2p_override_lock, flags);
@@ -685,7 +683,7 @@ int m2p_add_override(unsigned long mfn, struct page *page)
return 0;
}
-int m2p_remove_override(struct page *page)
+int m2p_remove_override(struct page *page, bool clear_pte)
{
unsigned long flags;
unsigned long mfn;
@@ -713,7 +711,7 @@ int m2p_remove_override(struct page *page)
spin_unlock_irqrestore(&m2p_override_lock, flags);
set_phys_to_machine(pfn, page->index);
- if (!PageHighMem(page))
+ if (clear_pte && !PageHighMem(page))
set_pte_at(&init_mm, address, ptep,
pfn_pte(pfn, PAGE_KERNEL));
/* No tlb flush necessary because the caller already
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 3745a31..fd725cd 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -466,13 +466,30 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
if (map_ops[i].status)
continue;
- /* m2p override only supported for GNTMAP_contains_pte mappings */
- if (!(map_ops[i].flags & GNTMAP_contains_pte))
- continue;
- pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
+ if (map_ops[i].flags & GNTMAP_contains_pte) {
+ pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
(map_ops[i].host_addr & ~PAGE_MASK));
- mfn = pte_mfn(*pte);
- ret = m2p_add_override(mfn, pages[i]);
+ mfn = pte_mfn(*pte);
+ } else {
+ /* If you really wanted to do this:
+ * mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
+ *
+ * The reason we do not implement it is b/c on the
+ * unmap path (gnttab_unmap_refs) we have no means of
+ * checking whether the page is !GNTMAP_contains_pte.
+ *
+ * That is without some extra data-structure to carry
+ * the struct page, bool clear_pte, and list_head next
+ * tuples and deal with allocation/delallocation, etc.
+ *
+ * The users of this API set the GNTMAP_contains_pte
+ * flag so lets just return not supported until it
+ * becomes neccessary to implement.
+ */
+ return -EOPNOTSUPP;
+ }
+ ret = m2p_add_override(mfn, pages[i],
+ map_ops[i].flags & GNTMAP_contains_pte);
if (ret)
return ret;
}
@@ -494,7 +511,7 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
return ret;
for (i = 0; i < count; i++) {
- ret = m2p_remove_override(pages[i]);
+ ret = m2p_remove_override(pages[i], true /* clear the PTE */);
if (ret)
return ret;
}
--
1.7.1
^ permalink raw reply related [flat|nested] 8+ messages in thread