From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Chen, Tiejun" Subject: Re: [v7][RFC][PATCH 06/13] hvmloader/ram: check if guest memory is out of reserved device memory maps Date: Fri, 07 Nov 2014 18:27:35 +0800 Message-ID: <545C9E97.4040800@intel.com> References: <1414136077-18599-1-git-send-email-tiejun.chen@intel.com> <544F49F9.3070208@intel.com> <544F78B80200007800042B95@mail.emea.novell.com> <54509A8A.9060606@intel.com> <5450BE27020000780004304A@mail.emea.novell.com> <5451AC56.7010303@intel.com> <54521100020000780004363A@mail.emea.novell.com> <545320F2.5030103@intel.com> <545354500200007800043D94@mail.emea.novell.com> <5457174C.8020400@intel.com> <5457515102000078000443B0@mail.emea.novell.com> <54574D8F.8060407@intel.com> <54575E2D0200007800044443@mail.emea.novell.com> <545767C4.7070806@intel.com> <5457787002000078000445C7@mail.emea.novell.com> <54576DF7.8060408@intel.com> <545784830200007800044627@mail.emea.novell.com> <54585EAA.20904@intel.com> <545894610200007800044A5B@mail.emea.novell.com> <545992A2.8070309@intel.com> <545A57AD02000078000C1037@mail.emea.novell.com> <545B3F4A.5070808@intel.com> <545B562F02000078000453FB@mail.emea.novell.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii"; Format="flowed" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <545B562F02000078000453FB@mail.emea.novell.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Jan Beulich Cc: yang.z.zhang@intel.com, kevin.tian@intel.com, tim@xen.org, xen-devel@lists.xen.org List-Id: xen-devel@lists.xenproject.org On 2014/11/6 18:06, Jan Beulich wrote: >>>> On 06.11.14 at 10:28, wrote: >> --- a/xen/include/public/domctl.h >> +++ b/xen/include/public/domctl.h >> @@ -484,6 +484,14 @@ struct xen_domctl_assign_device { >> typedef struct xen_domctl_assign_device xen_domctl_assign_device_t; >> DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_device_t); >> >> +/* Control whether/how we check and reserve device memory. */ >> +struct xen_domctl_set_rdm { >> + uint32_t pci_rdmforce; >> + uint32_t pci_dev_bitmap; > > So this allows for 32 devices; considering that you index the bitmap Sorry its my fault when I just focus on figuring out a doable way. We need to cover 256 x 32 x 8 = 65536. > by BDF, all this covers are 0000:00:00.0 through 0000:00:03.7. > Hardly enough to cover even a single pass through device (iirc the > range above is fully used by emulated devices). And of course a Are you saying Xen restrict some BDFs specific to emulate some devices? But I don't see these associated codes. > much larger bitmap isn't a good solution either. So I guess I need to reconstruct something new, please see the new draft codes. > > Nor am I really clear what you need the 32 bits of "pci_rdmforce" > for, nor why this field isn't just being named "force". Without a This variable can tell Xen to force check and reserve all RMRR ranges in that case the user is sure he's going to hotplug a device later but indeed, he really don't assign any device while creating a VM. Please see the new draft codes as well. > comment alongside the fields, it's remaining guesswork anyway > when and how this domctl is to be used. Looking at your change > to intel_iommu_get_reserved_device_memory() the field appears > to be simply redundant. > >> --- a/xen/include/xen/iommu.h >> +++ b/xen/include/xen/iommu.h >> @@ -158,14 +158,14 @@ struct iommu_ops { >> void (*crash_shutdown)(void); >> void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count); >> void (*iotlb_flush_all)(struct domain *d); >> - int (*get_reserved_device_memory)(iommu_grdm_t *, void *); >> + int (*get_reserved_device_memory)(iommu_grdm_t *, struct domain *, void *); >> void (*dump_p2m_table)(struct domain *d); >> }; >> >> void iommu_suspend(void); >> void iommu_resume(void); >> void iommu_crash_shutdown(void); >> -int iommu_get_reserved_device_memory(iommu_grdm_t *, void *); >> +int iommu_get_reserved_device_memory(iommu_grdm_t *, struct domain *, void *); > > I don't see why these generic interfaces would need to change; > the only thing that would seem to need changing is the callback > function (and of course the context passed to it). > I'm not 100% sure if we can call current->domain in all scenarios. If you can help me confirm this I'd really like to remove this change :) Now I assume this should be true as follows: diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h index 30764b4..5957d2e 100644 --- a/tools/libxc/include/xenctrl.h +++ b/tools/libxc/include/xenctrl.h @@ -2036,6 +2036,12 @@ int xc_assign_device(xc_interface *xch, uint32_t domid, uint32_t machine_bdf); +int xc_domain_device_setrdm(xc_interface *xch, + uint32_t domid, + uint32_t num_pcidevs, + uint32_t pci_rdmforce, + struct xen_guest_pcidev_info *pcidevs); + int xc_get_device_group(xc_interface *xch, uint32_t domid, uint32_t machine_bdf, diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c index 009e351..f38b400 100644 --- a/tools/libxc/xc_domain.c +++ b/tools/libxc/xc_domain.c @@ -1642,6 +1642,34 @@ int xc_assign_device( return do_domctl(xch, &domctl); } +int xc_domain_device_setrdm(xc_interface *xch, + uint32_t domid, + uint32_t num_pcidevs, + uint32_t pci_rdmforce, + struct xen_guest_pcidev_info *pcidevs) +{ + int ret; + DECLARE_DOMCTL; + DECLARE_HYPERCALL_BOUNCE(pcidevs, + num_pcidevs*sizeof(xen_guest_pcidev_info_t), + XC_HYPERCALL_BUFFER_BOUNCE_IN); + + if ( xc_hypercall_bounce_pre(xch, pcidevs) ) + return -1; + + domctl.cmd = XEN_DOMCTL_set_rdm; + domctl.domain = domid; + domctl.u.set_rdm.pci_rdmforce = pci_rdmforce; + domctl.u.set_rdm.num_pcidevs = num_pcidevs; + set_xen_guest_handle(domctl.u.set_rdm.pcidevs, pcidevs); + + ret = do_domctl(xch, &domctl); + + xc_hypercall_bounce_post(xch, pcidevs); + + return ret; +} + int xc_get_device_group( xc_interface *xch, uint32_t domid, diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c index d8bd9b3..ac48a82 100644 --- a/tools/libxc/xc_domain_restore.c +++ b/tools/libxc/xc_domain_restore.c @@ -2165,8 +2165,8 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, if ( !ext_vcpucontext ) goto vcpu_ext_state_restore; - memcpy(&domctl.u.ext_vcpucontext, vcpup, 128); - vcpup += 128; + memcpy(&domctl.u.ext_vcpucontext, vcpup, 112); + vcpup += 112; domctl.cmd = XEN_DOMCTL_set_ext_vcpucontext; domctl.domain = dom; frc = xc_domctl(xch, &domctl); diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c index b1ff5ae..2429416 100644 --- a/tools/libxl/libxl_create.c +++ b/tools/libxl/libxl_create.c @@ -860,6 +860,9 @@ static void initiate_domain_create(libxl__egc *egc, ret = libxl__domain_build_info_setdefault(gc, &d_config->b_info); if (ret) goto error_out; + ret = libxl__domain_device_setrdm(gc, d_config, domid); + if (ret) goto error_out; + if (!sched_params_valid(gc, domid, &d_config->b_info.sched_params)) { LOG(ERROR, "Invalid scheduling parameters\n"); ret = ERROR_INVAL; diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c index 3e191c3..9e402d1 100644 --- a/tools/libxl/libxl_dm.c +++ b/tools/libxl/libxl_dm.c @@ -90,6 +90,42 @@ const char *libxl__domain_device_model(libxl__gc *gc, return dm; } +int libxl__domain_device_setrdm(libxl__gc *gc, + libxl_domain_config *d_config, + uint32_t dm_domid) +{ + int i, ret; + libxl_ctx *ctx = libxl__gc_owner(gc); + struct xen_guest_pcidev_info *pcidevs = NULL; + + if ( d_config->num_pcidevs ) + { + pcidevs = malloc(d_config->num_pcidevs*sizeof(xen_guest_pcidev_info_t)); + if ( pcidevs ) + { + for (i = 0; i < d_config->num_pcidevs; i++) + { + pcidevs[i].func = d_config->pcidevs[i].func; + pcidevs[i].dev = d_config->pcidevs[i].dev; + pcidevs[i].bus = d_config->pcidevs[i].bus; + pcidevs[i].domain = d_config->pcidevs[i].domain; + pcidevs[i].rdmforce = d_config->pcidevs[i].rdmforce; + } + } + else + LIBXL__LOG(CTX, LIBXL__LOG_ERROR, + "Can't allocate for pcidevs."); + } + + ret = xc_domain_device_setrdm(ctx->xch, dm_domid, + (uint32_t)d_config->num_pcidevs, + d_config->b_info.pci_rdmforce, + pcidevs); + free(pcidevs); + + return ret; +} + const libxl_vnc_info *libxl__dm_vnc(const libxl_domain_config *guest_config) { const libxl_vnc_info *vnc = NULL; diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index 4361421..c48a0e6 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -1471,6 +1471,11 @@ _hidden int libxl__domain_build(libxl__gc *gc, /* for device model creation */ _hidden const char *libxl__domain_device_model(libxl__gc *gc, const libxl_domain_build_info *info); + +_hidden int libxl__domain_device_setrdm(libxl__gc *gc, + libxl_domain_config *d_config, + uint32_t domid); + _hidden int libxl__need_xenpv_qemu(libxl__gc *gc, int nr_consoles, libxl__device_console *consoles, int nr_vfbs, libxl_device_vfb *vfbs, diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl index ca3f724..b700263 100644 --- a/tools/libxl/libxl_types.idl +++ b/tools/libxl/libxl_types.idl @@ -398,6 +398,7 @@ libxl_domain_build_info = Struct("domain_build_info",[ ("kernel", string), ("cmdline", string), ("ramdisk", string), + ("pci_rdmforce", uint32), ("u", KeyedUnion(None, libxl_domain_type, "type", [("hvm", Struct(None, [("firmware", string), ("bios", libxl_bios_type), @@ -518,6 +519,7 @@ libxl_device_pci = Struct("device_pci", [ ("power_mgmt", bool), ("permissive", bool), ("seize", bool), + ("rdmforce", bool), ]) libxl_device_vtpm = Struct("device_vtpm", [ diff --git a/tools/libxl/libxlu_pci.c b/tools/libxl/libxlu_pci.c index 26fb143..989eac8 100644 --- a/tools/libxl/libxlu_pci.c +++ b/tools/libxl/libxlu_pci.c @@ -143,6 +143,8 @@ int xlu_pci_parse_bdf(XLU_Config *cfg, libxl_device_pci *pcidev, const char *str pcidev->permissive = atoi(tok); }else if ( !strcmp(optkey, "seize") ) { pcidev->seize = atoi(tok); + }else if ( !strcmp(optkey, "rdmforce") ) { + pcidev->rdmforce = atoi(tok); }else{ XLU__PCI_ERR(cfg, "Unknown PCI BDF option: %s", optkey); } diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c index 3c9f146..436b4f3 100644 --- a/tools/libxl/xl_cmdimpl.c +++ b/tools/libxl/xl_cmdimpl.c @@ -904,6 +904,7 @@ static void replace_string(char **str, const char *val) *str = xstrdup(val); } +#define PCI_BDF(b,d,f) ((((b) & 0xff) << 8) | PCI_DEVFN(d,f)) static void parse_config_data(const char *config_source, const char *config_data, int config_len, @@ -919,6 +920,7 @@ static void parse_config_data(const char *config_source, int pci_msitranslate = 0; int pci_permissive = 0; int pci_seize = 0; + int pci_rdmforce = 0; int i, e; libxl_domain_create_info *c_info = &d_config->c_info; @@ -1699,6 +1701,9 @@ skip_vfb: if (!xlu_cfg_get_long (config, "pci_seize", &l, 0)) pci_seize = l; + if (!xlu_cfg_get_long (config, "pci_rdmforce", &l, 0)) + pci_rdmforce = l; + /* To be reworked (automatically enabled) once the auto ballooning * after guest starts is done (with PCI devices passed in). */ if (c_info->type == LIBXL_DOMAIN_TYPE_PV) { @@ -1719,9 +1724,11 @@ skip_vfb: pcidev->power_mgmt = pci_power_mgmt; pcidev->permissive = pci_permissive; pcidev->seize = pci_seize; + pcidev->rdmforce = pci_rdmforce; if (!xlu_pci_parse_bdf(config, pcidev, buf)) d_config->num_pcidevs++; } + d_config->b_info.pci_rdmforce = pci_rdmforce; if (d_config->num_pcidevs && c_info->type == LIBXL_DOMAIN_TYPE_PV) libxl_defbool_set(&b_info->u.pv.e820_host, true); } diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c index 1eba833..daf259e 100644 --- a/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c @@ -1540,6 +1540,34 @@ int iommu_do_pci_domctl( } break; + case XEN_DOMCTL_set_rdm: + { + struct xen_domctl_set_rdm *xdsr = &domctl->u.set_rdm; + struct xen_guest_pcidev_info *pcidevs; + int i; + + pcidevs = xmalloc_array(xen_guest_pcidev_info_t, + domctl->u.set_rdm.num_pcidevs); + if ( pcidevs == NULL ) + { + return -ENOMEM; + } + + for ( i = 0; i < xdsr->num_pcidevs; ++i ) + { + if ( __copy_from_guest_offset(pcidevs, xdsr->pcidevs, i, 1) ) + { + xfree(pcidevs); + return -EFAULT; + } + } + + d->arch.hvm_domain.pci_rdmforce = domctl->u.set_rdm.pci_rdmforce; + d->arch.hvm_domain.num_pcidevs = domctl->u.set_rdm.num_pcidevs; + d->arch.hvm_domain.pcidevs = pcidevs; + } + break; + case XEN_DOMCTL_assign_device: if ( unlikely(d->is_dying) ) { diff --git a/xen/drivers/passthrough/vtd/dmar.c b/xen/drivers/passthrough/vtd/dmar.c index 546eca9..4b35c04 100644 --- a/xen/drivers/passthrough/vtd/dmar.c +++ b/xen/drivers/passthrough/vtd/dmar.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "dmar.h" #include "iommu.h" @@ -925,14 +926,37 @@ int intel_iommu_get_reserved_device_memory(iommu_grdm_t *func, void *ctxt) { struct acpi_rmrr_unit *rmrr; int rc = 0; + int i, j; + u16 bdf, pt_bdf; + struct domain *d = current->domain; - list_for_each_entry(rmrr, &acpi_rmrr_units, list) + for_each_rmrr_device ( rmrr, bdf, i ) { - rc = func(PFN_DOWN(rmrr->base_address), - PFN_UP(rmrr->end_address) - PFN_DOWN(rmrr->base_address), - ctxt); - if ( rc ) - break; + if ( d->arch.hvm_domain.pci_rdmforce ) + { + rc = func(PFN_DOWN(rmrr->base_address), + PFN_UP(rmrr->end_address) - + PFN_DOWN(rmrr->base_address), + ctxt); + if ( rc ) + break; + } + else + { + for ( j = 0; j < d->arch.hvm_domain.num_pcidevs; j++ ) + { + pt_bdf = PCI_BDF(d->arch.hvm_domain.pcidevs[j].bus, + d->arch.hvm_domain.pcidevs[j].dev, + d->arch.hvm_domain.pcidevs[j].func); + if ( pt_bdf == bdf ) + rc = func(PFN_DOWN(rmrr->base_address), + PFN_UP(rmrr->end_address) - + PFN_DOWN(rmrr->base_address), + ctxt); + if ( rc ) + break; + } + } } return rc; diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h index 2757c7f..b18ce40 100644 --- a/xen/include/asm-x86/hvm/domain.h +++ b/xen/include/asm-x86/hvm/domain.h @@ -90,6 +90,10 @@ struct hvm_domain { /* Cached CF8 for guest PCI config cycles */ uint32_t pci_cf8; + uint32_t pci_rdmforce; + uint32_t num_pcidevs; + struct xen_guest_pcidev_info *pcidevs; + struct pl_time pl_time; struct hvm_io_handler *io_handler; diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h index 58b19e7..4e47fac 100644 --- a/xen/include/public/domctl.h +++ b/xen/include/public/domctl.h @@ -484,6 +484,24 @@ struct xen_domctl_assign_device { typedef struct xen_domctl_assign_device xen_domctl_assign_device_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_device_t); +struct xen_guest_pcidev_info { + uint8_t func; + uint8_t dev; + uint8_t bus; + int domain; + int rdmforce; +}; +typedef struct xen_guest_pcidev_info xen_guest_pcidev_info_t; +DEFINE_XEN_GUEST_HANDLE(xen_guest_pcidev_info_t); +/* Control whether/how we check and reserve device memory. */ +struct xen_domctl_set_rdm { + uint32_t pci_rdmforce; + uint32_t num_pcidevs; + XEN_GUEST_HANDLE(xen_guest_pcidev_info_t) pcidevs; +}; +typedef struct xen_domctl_set_rdm xen_domctl_set_rdm_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_rdm_t); + /* Retrieve sibling devices infomation of machine_sbdf */ /* XEN_DOMCTL_get_device_group */ struct xen_domctl_get_device_group { @@ -1056,6 +1074,7 @@ struct xen_domctl { #define XEN_DOMCTL_set_vcpu_msrs 73 #define XEN_DOMCTL_setvnumainfo 74 #define XEN_DOMCTL_psr_cmt_op 75 +#define XEN_DOMCTL_set_rdm 76 #define XEN_DOMCTL_gdbsx_guestmemio 1000 #define XEN_DOMCTL_gdbsx_pausevcpu 1001 #define XEN_DOMCTL_gdbsx_unpausevcpu 1002 @@ -1118,7 +1137,8 @@ struct xen_domctl { struct xen_domctl_gdbsx_domstatus gdbsx_domstatus; struct xen_domctl_vnuma vnuma; struct xen_domctl_psr_cmt_op psr_cmt_op; - uint8_t pad[128]; + struct xen_domctl_set_rdm set_rdm; + uint8_t pad[112]; } u; }; typedef struct xen_domctl xen_domctl_t; Thanks Tiejun