From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Chen, Tiejun" Subject: Re: [v7][RFC][PATCH 06/13] hvmloader/ram: check if guest memory is out of reserved device memory maps Date: Thu, 06 Nov 2014 17:28:42 +0800 Message-ID: <545B3F4A.5070808@intel.com> References: <1414136077-18599-1-git-send-email-tiejun.chen@intel.com> <544DFDB2.2010508@intel.com> <544E29C70200007800042595@mail.emea.novell.com> <544F49F9.3070208@intel.com> <544F78B80200007800042B95@mail.emea.novell.com> <54509A8A.9060606@intel.com> <5450BE27020000780004304A@mail.emea.novell.com> <5451AC56.7010303@intel.com> <54521100020000780004363A@mail.emea.novell.com> <545320F2.5030103@intel.com> <545354500200007800043D94@mail.emea.novell.com> <5457174C.8020400@intel.com> <5457515102000078000443B0@mail.emea.novell.com> <54574D8F.8060407@intel.com> <54575E2D0200007800044443@mail.emea.novell.com> <545767C4.7070806@intel.com> <5457787002000078000445C7@mail.emea.novell.com> <54576DF7.8060408@intel.com> <545784830200007800044627@mail.emea.novell.com> <54585EAA.20904@intel.com> <545894610200007800044A5B@mail.emea.novell.com> <545992A2.8070309@intel.com> <545A57AD02000078000C1037@mail.emea.novell.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii"; Format="flowed" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <545A57AD02000078000C1037@mail.emea.novell.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Jan Beulich Cc: yang.z.zhang@intel.com, kevin.tian@intel.com, tim@xen.org, xen-devel@lists.xen.org List-Id: xen-devel@lists.xenproject.org On 2014/11/6 1:00, Jan Beulich wrote: >>>> "Chen, Tiejun" 11/05/14 3:59 AM >>> > > Everything up to here sounded reasonable. > >> Next, we need a new domctl to provide these info, 'pci_rdmforce' and >> 'rdm_force' when parse the config file. Certainly we may need to >> introduce and set two new fields in strcut domain, and since then we >> just use these fields to modify our current existing iommu callback >> inside to support our policy. I mean we just expose those associated >> RMRR entry. I think its easy to implement this since inside Xen we can >> know which entry is owned by which device. So this can benefit us to >> avoid modifying any tools codes and most Xen codes we already addressed. > > Whether a domctl is the right approach I can't really tell with this > somewhat vague description. > So based on our current patch, I generate a draft patch as follows to show my idea. Note I just tried to compile this. diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c index 009e351..ff22228 100644 --- a/tools/libxc/xc_domain.c +++ b/tools/libxc/xc_domain.c @@ -1642,6 +1642,21 @@ int xc_assign_device( return do_domctl(xch, &domctl); } +int xc_domain_device_setrdm(xc_interface *xch, + uint32_t domid, + uint32_t pci_rdmforce, + uint32_t pci_dev_bitmap) +{ + DECLARE_DOMCTL; + + domctl.cmd = XEN_DOMCTL_set_rdm; + domctl.domain = domid; + domctl.u.set_rdm.pci_rdmforce = pci_rdmforce; + domctl.u.set_rdm.pci_dev_bitmap = pci_dev_bitmap; + + return do_domctl(xch, &domctl); +} + int xc_get_device_group( xc_interface *xch, uint32_t domid, diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c index 3e191c3..ca8b754 100644 --- a/tools/libxl/libxl_dm.c +++ b/tools/libxl/libxl_dm.c @@ -90,6 +90,19 @@ const char *libxl__domain_device_model(libxl__gc *gc, return dm; } +int libxl__domain_device_setrdm(libxl__gc *gc, + const libxl_domain_build_info *info, + uint32_t dm_domid) +{ + int ret; + libxl_ctx *ctx = libxl__gc_owner(gc); + + ret = xc_domain_device_setrdm(ctx->xch, dm_domid, info->pci_rdmforce, + info->pci_dev_bitmap); + + return ret; +} + const libxl_vnc_info *libxl__dm_vnc(const libxl_domain_config *guest_config) { const libxl_vnc_info *vnc = NULL; diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index 4361421..06938ee 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -1471,6 +1471,11 @@ _hidden int libxl__domain_build(libxl__gc *gc, /* for device model creation */ _hidden const char *libxl__domain_device_model(libxl__gc *gc, const libxl_domain_build_info *info); + +_hidden int libxl__domain_device_setrdm(libxl__gc *gc, + const libxl_domain_build_info *info, + uint32_t domid); + _hidden int libxl__need_xenpv_qemu(libxl__gc *gc, int nr_consoles, libxl__device_console *consoles, int nr_vfbs, libxl_device_vfb *vfbs, diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl index ca3f724..ed20823 100644 --- a/tools/libxl/libxl_types.idl +++ b/tools/libxl/libxl_types.idl @@ -398,6 +398,8 @@ libxl_domain_build_info = Struct("domain_build_info",[ ("kernel", string), ("cmdline", string), ("ramdisk", string), + ("pci_rdmforce", uint32), + ("pci_dev_bitmap", uint32), ("u", KeyedUnion(None, libxl_domain_type, "type", [("hvm", Struct(None, [("firmware", string), ("bios", libxl_bios_type), @@ -518,6 +520,7 @@ libxl_device_pci = Struct("device_pci", [ ("power_mgmt", bool), ("permissive", bool), ("seize", bool), + ("rdmforce", bool), ]) libxl_device_vtpm = Struct("device_vtpm", [ diff --git a/tools/libxl/libxlu_pci.c b/tools/libxl/libxlu_pci.c index 26fb143..989eac8 100644 --- a/tools/libxl/libxlu_pci.c +++ b/tools/libxl/libxlu_pci.c @@ -143,6 +143,8 @@ int xlu_pci_parse_bdf(XLU_Config *cfg, libxl_device_pci *pcidev, const char *str pcidev->permissive = atoi(tok); }else if ( !strcmp(optkey, "seize") ) { pcidev->seize = atoi(tok); + }else if ( !strcmp(optkey, "rdmforce") ) { + pcidev->rdmforce = atoi(tok); }else{ XLU__PCI_ERR(cfg, "Unknown PCI BDF option: %s", optkey); } diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c index 3c9f146..64a1e51 100644 --- a/tools/libxl/xl_cmdimpl.c +++ b/tools/libxl/xl_cmdimpl.c @@ -904,6 +904,7 @@ static void replace_string(char **str, const char *val) *str = xstrdup(val); } +#define PCI_BDF(b,d,f) ((((b) & 0xff) << 8) | PCI_DEVFN(d,f)) static void parse_config_data(const char *config_source, const char *config_data, int config_len, @@ -919,6 +920,7 @@ static void parse_config_data(const char *config_source, int pci_msitranslate = 0; int pci_permissive = 0; int pci_seize = 0; + int pci_rdmforce = 0; int i, e; libxl_domain_create_info *c_info = &d_config->c_info; @@ -1699,6 +1701,9 @@ skip_vfb: if (!xlu_cfg_get_long (config, "pci_seize", &l, 0)) pci_seize = l; + if (!xlu_cfg_get_long (config, "pci_rdmforce", &l, 0)) + pci_rdmforce = l; + /* To be reworked (automatically enabled) once the auto ballooning * after guest starts is done (with PCI devices passed in). */ if (c_info->type == LIBXL_DOMAIN_TYPE_PV) { @@ -1719,9 +1724,16 @@ skip_vfb: pcidev->power_mgmt = pci_power_mgmt; pcidev->permissive = pci_permissive; pcidev->seize = pci_seize; + pcidev->rdmforce = pci_rdmforce; if (!xlu_pci_parse_bdf(config, pcidev, buf)) + { + /* Just fake this wit a bitmap. */ + b_info.pci_dev_bitmap |= 1 << PCI_BDF(pcidev->bus, pcidev->dev, + pcidev->func); d_config->num_pcidevs++; + } } + b_info.pci_rdmforce = pci_rdmforce; if (d_config->num_pcidevs && c_info->type == LIBXL_DOMAIN_TYPE_PV) libxl_defbool_set(&b_info->u.pv.e820_host, true); } diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index dc18f1b..3bbd28f 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -2434,6 +2434,7 @@ static void ept_handle_violation(unsigned long qualification, paddr_t gpa) * handle such a scenario as its own logic. */ ret = iommu_get_reserved_device_memory(p2m_check_reserved_device_memory, + d, &gfn); if ( ret ) { diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c index 113d996..ba489ce 100644 --- a/xen/arch/x86/mm/p2m.c +++ b/xen/arch/x86/mm/p2m.c @@ -691,6 +691,7 @@ guest_physmap_add_entry(struct domain *d, unsigned long gfn, if ( !is_hardware_domain(d) ) { rc = iommu_get_reserved_device_memory(p2m_check_reserved_device_memory, + d, &gfn); /* We always avoid populating reserved device memory. */ if ( rc == 1 ) diff --git a/xen/common/compat/memory.c b/xen/common/compat/memory.c index af613b9..cd99ba7 100644 --- a/xen/common/compat/memory.c +++ b/xen/common/compat/memory.c @@ -315,6 +315,7 @@ int compat_memory_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) compat) grdm.used_entries = 0; rc = iommu_get_reserved_device_memory(get_reserved_device_memory, + current->domain, &grdm); if ( !rc && grdm.map.nr_entries < grdm.used_entries ) diff --git a/xen/common/mem_access.c b/xen/common/mem_access.c index 4c84f88..e7973ee 100644 --- a/xen/common/mem_access.c +++ b/xen/common/mem_access.c @@ -69,6 +69,7 @@ static int mem_access_check_rdm(struct domain *d, uint64_aligned_t start, { gfn = start + i; rc = iommu_get_reserved_device_memory(p2m_check_reserved_device_memory, + d, &gfn); if ( rc < 0 ) printk(XENLOG_WARNING diff --git a/xen/common/memory.c b/xen/common/memory.c index 2177c56..21db828 100644 --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -1140,6 +1140,7 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) grdm.used_entries = 0; rc = iommu_get_reserved_device_memory(get_reserved_device_memory, + current->domain, &grdm); if ( !rc && grdm.map.nr_entries < grdm.used_entries ) diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c index 7c17e8d..7a5c66d 100644 --- a/xen/drivers/passthrough/iommu.c +++ b/xen/drivers/passthrough/iommu.c @@ -344,14 +344,15 @@ void iommu_crash_shutdown(void) iommu_enabled = iommu_intremap = 0; } -int iommu_get_reserved_device_memory(iommu_grdm_t *func, void *ctxt) +int iommu_get_reserved_device_memory(iommu_grdm_t *func, struct domain* d, + void *ctxt) { const struct iommu_ops *ops = iommu_get_ops(); if ( !iommu_enabled || !ops->get_reserved_device_memory ) return 0; - return ops->get_reserved_device_memory(func, ctxt); + return ops->get_reserved_device_memory(func, d, ctxt); } bool_t iommu_has_feature(struct domain *d, enum iommu_feature feature) diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c index 1eba833..ee689ff 100644 --- a/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c @@ -1540,6 +1540,18 @@ int iommu_do_pci_domctl( } break; + case XEN_DOMCTL_set_rdm: + if ( unlikely(d->is_dying) ) + { + ret = -EINVAL; + break; + } + + d->arch.hvm_domain.pci_rdmforce = domctl->u.set_rdm.pci_rdmforce; + d->arch.hvm_domain.pci_dev_bitmap = domctl->u.set_rdm.pci_dev_bitmap; + + break; + case XEN_DOMCTL_assign_device: if ( unlikely(d->is_dying) ) { diff --git a/xen/drivers/passthrough/vtd/dmar.c b/xen/drivers/passthrough/vtd/dmar.c index 546eca9..138840c 100644 --- a/xen/drivers/passthrough/vtd/dmar.c +++ b/xen/drivers/passthrough/vtd/dmar.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "dmar.h" #include "iommu.h" @@ -921,18 +922,28 @@ int platform_supports_x2apic(void) return cpu_has_x2apic && ((dmar_flags & mask) == ACPI_DMAR_INTR_REMAP); } -int intel_iommu_get_reserved_device_memory(iommu_grdm_t *func, void *ctxt) +int intel_iommu_get_reserved_device_memory(iommu_grdm_t *func, struct domain *d, + void *ctxt) { struct acpi_rmrr_unit *rmrr; int rc = 0; + int i; + u16 bdf; - list_for_each_entry(rmrr, &acpi_rmrr_units, list) + for_each_rmrr_device ( rmrr, bdf, i ) { - rc = func(PFN_DOWN(rmrr->base_address), - PFN_UP(rmrr->end_address) - PFN_DOWN(rmrr->base_address), - ctxt); - if ( rc ) - break; + if ( d->arch.hvm_domain.pci_rdmforce ) + { + if ( d->arch.hvm_domain.pci_dev_bitmap & (uint32_t)(1 << bdf) ) + { + rc = func(PFN_DOWN(rmrr->base_address), + PFN_UP(rmrr->end_address) - + PFN_DOWN(rmrr->base_address), + ctxt); + if ( rc ) + break; + } + } } return rc; diff --git a/xen/drivers/passthrough/vtd/extern.h b/xen/drivers/passthrough/vtd/extern.h index f9ee9b0..df9fed3 100644 --- a/xen/drivers/passthrough/vtd/extern.h +++ b/xen/drivers/passthrough/vtd/extern.h @@ -75,7 +75,8 @@ int domain_context_mapping_one(struct domain *domain, struct iommu *iommu, u8 bus, u8 devfn, const struct pci_dev *); int domain_context_unmap_one(struct domain *domain, struct iommu *iommu, u8 bus, u8 devfn); -int intel_iommu_get_reserved_device_memory(iommu_grdm_t *func, void *ctxt); +int intel_iommu_get_reserved_device_memory(iommu_grdm_t *func, struct domain *d, + void *ctxt); unsigned int io_apic_read_remap_rte(unsigned int apic, unsigned int reg); void io_apic_write_remap_rte(unsigned int apic, diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h index 2757c7f..5dab8dd 100644 --- a/xen/include/asm-x86/hvm/domain.h +++ b/xen/include/asm-x86/hvm/domain.h @@ -90,6 +90,9 @@ struct hvm_domain { /* Cached CF8 for guest PCI config cycles */ uint32_t pci_cf8; + uint32_t pci_rdmforce; + uint32_t pci_dev_bitmap; + struct pl_time pl_time; struct hvm_io_handler *io_handler; diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h index 58b19e7..8b7cd5b 100644 --- a/xen/include/public/domctl.h +++ b/xen/include/public/domctl.h @@ -484,6 +484,14 @@ struct xen_domctl_assign_device { typedef struct xen_domctl_assign_device xen_domctl_assign_device_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_device_t); +/* Control whether/how we check and reserve device memory. */ +struct xen_domctl_set_rdm { + uint32_t pci_rdmforce; + uint32_t pci_dev_bitmap; +}; +typedef struct xen_domctl_set_rdm xen_domctl_set_rdm_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_rdm_t); + /* Retrieve sibling devices infomation of machine_sbdf */ /* XEN_DOMCTL_get_device_group */ struct xen_domctl_get_device_group { @@ -1056,6 +1064,7 @@ struct xen_domctl { #define XEN_DOMCTL_set_vcpu_msrs 73 #define XEN_DOMCTL_setvnumainfo 74 #define XEN_DOMCTL_psr_cmt_op 75 +#define XEN_DOMCTL_set_rdm 76 #define XEN_DOMCTL_gdbsx_guestmemio 1000 #define XEN_DOMCTL_gdbsx_pausevcpu 1001 #define XEN_DOMCTL_gdbsx_unpausevcpu 1002 @@ -1118,7 +1127,8 @@ struct xen_domctl { struct xen_domctl_gdbsx_domstatus gdbsx_domstatus; struct xen_domctl_vnuma vnuma; struct xen_domctl_psr_cmt_op psr_cmt_op; - uint8_t pad[128]; + struct xen_domctl_set_rdm set_rdm; + uint8_t pad[120]; } u; }; typedef struct xen_domctl xen_domctl_t; diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h index 409f6f8..adf3d52 100644 --- a/xen/include/xen/iommu.h +++ b/xen/include/xen/iommu.h @@ -158,14 +158,14 @@ struct iommu_ops { void (*crash_shutdown)(void); void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count); void (*iotlb_flush_all)(struct domain *d); - int (*get_reserved_device_memory)(iommu_grdm_t *, void *); + int (*get_reserved_device_memory)(iommu_grdm_t *, struct domain *, void *); void (*dump_p2m_table)(struct domain *d); }; void iommu_suspend(void); void iommu_resume(void); void iommu_crash_shutdown(void); -int iommu_get_reserved_device_memory(iommu_grdm_t *, void *); +int iommu_get_reserved_device_memory(iommu_grdm_t *, struct domain *, void *); void iommu_share_p2m_table(struct domain *d); Thanks Tiejun