From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Chen, Tiejun" Subject: Re: [v3][PATCH 13/16] tools/libxl: detect and avoid conflicts with RDM Date: Fri, 12 Jun 2015 16:30:47 +0800 Message-ID: <557A98B7.7080805@intel.com> References: <1433985325-16676-1-git-send-email-tiejun.chen@intel.com> <1433985325-16676-14-git-send-email-tiejun.chen@intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii"; Format="flowed" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: "Tian, Kevin" , "jbeulich@suse.com" , "tim@xen.org" , "andrew.cooper3@citrix.com" , "Zhang, Yang Z" , "wei.liu2@citrix.com" , "ian.campbell@citrix.com" , "Ian.Jackson@eu.citrix.com" , "stefano.stabellini@citrix.com" Cc: "xen-devel@lists.xen.org" List-Id: xen-devel@lists.xenproject.org On 2015/6/11 18:19, Tian, Kevin wrote: >> From: Chen, Tiejun >> Sent: Thursday, June 11, 2015 9:15 AM >> >> While building a VM, HVM domain builder provides struct hvm_info_table{} >> to help hvmloader. Currently it includes two fields to construct guest >> e820 table by hvmloader, low_mem_pgend and high_mem_pgend. So we should >> check them to fix any conflict with RAM. >> >> RMRR can reside in address space beyond 4G theoretically, but we never >> see this in real world. So in order to avoid breaking highmem layout >> we don't solve highmem conflict. Note this means highmem rmrr could still >> be supported if no conflict. >> >> But in the case of lowmem, RMRR probably scatter the whole RAM space. >> Especially multiple RMRR entries would worsen this to lead a complicated >> memory layout. And then its hard to extend hvm_info_table{} to work >> hvmloader out. So here we're trying to figure out a simple solution to >> avoid breaking existing layout. So when a conflict occurs, >> >> #1. Above a predefined boundary (default 2G) >> - move lowmem_end below reserved region to solve conflict; >> >> #2. Below a predefined boundary (default 2G) >> - Check strict/relaxed policy. >> "strict" policy leads to fail libxl. Note when both policies >> are specified on a given region, 'strict' is always preferred. >> "relaxed" policy issue a warning message and also mask this entry INVALID >> to indicate we shouldn't expose this entry to hvmloader. >> >> Note this predefined boundary can be changes with the parameter >> "rdm_mem_boundary" in .cfg file. >> >> Signed-off-by: Tiejun Chen > > Reviewed-by: Kevin Tian > > One comment though. could you be consistent to use RDM in the code? > RMRR is just an example of RDM... Sure. Thanks Tiejun > > >> --- >> docs/man/xl.cfg.pod.5 | 21 ++++ >> tools/libxc/xc_hvm_build_x86.c | 5 +- >> tools/libxl/libxl.h | 6 + >> tools/libxl/libxl_create.c | 6 +- >> tools/libxl/libxl_dm.c | 255 >> +++++++++++++++++++++++++++++++++++++++++ >> tools/libxl/libxl_dom.c | 11 +- >> tools/libxl/libxl_internal.h | 11 +- >> tools/libxl/libxl_types.idl | 8 ++ >> tools/libxl/xl_cmdimpl.c | 3 + >> 9 files changed, 322 insertions(+), 4 deletions(-) >> >> diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5 >> index 638b350..6fd2370 100644 >> --- a/docs/man/xl.cfg.pod.5 >> +++ b/docs/man/xl.cfg.pod.5 >> @@ -767,6 +767,27 @@ to a given device, and "strict" is default here. >> >> Note this would override global B option. >> >> +=item B >> + >> +Number of megabytes to set a boundary for checking rdm conflict. >> + >> +When RDM conflicts with RAM, RDM probably scatter the whole RAM space. >> +Especially multiple RMRR entries would worsen this to lead a complicated >> +memory layout. So here we're trying to figure out a simple solution to >> +avoid breaking existing layout. So when a conflict occurs, >> + >> + #1. Above a predefined boundary >> + - move lowmem_end below reserved region to solve conflict; >> + >> + #2. Below a predefined boundary >> + - Check strict/relaxed policy. >> + "strict" policy leads to fail libxl. Note when both policies >> + are specified on a given region, 'strict' is always preferred. >> + "relaxed" policy issue a warning message and also mask this entry INVALID >> + to indicate we shouldn't expose this entry to hvmloader. >> + >> +Here the default is 2G. >> + >> =back >> >> =back >> diff --git a/tools/libxc/xc_hvm_build_x86.c b/tools/libxc/xc_hvm_build_x86.c >> index 0e98c84..5142578 100644 >> --- a/tools/libxc/xc_hvm_build_x86.c >> +++ b/tools/libxc/xc_hvm_build_x86.c >> @@ -21,6 +21,7 @@ >> #include >> #include >> #include >> +#include >> >> #include "xg_private.h" >> #include "xc_private.h" >> @@ -270,7 +271,7 @@ static int setup_guest(xc_interface *xch, >> >> elf_parse_binary(&elf); >> v_start = 0; >> - v_end = args->mem_size; >> + v_end = args->lowmem_end; >> >> if ( nr_pages > target_pages ) >> memflags |= XENMEMF_populate_on_demand; >> @@ -754,6 +755,8 @@ int xc_hvm_build_target_mem(xc_interface *xch, >> args.mem_size = (uint64_t)memsize << 20; >> args.mem_target = (uint64_t)target << 20; >> args.image_file_name = image_name; >> + if ( args.mmio_size == 0 ) >> + args.mmio_size = HVM_BELOW_4G_MMIO_LENGTH; >> >> return xc_hvm_build(xch, domid, &args); >> } >> diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h >> index 0a7913b..a6212fb 100644 >> --- a/tools/libxl/libxl.h >> +++ b/tools/libxl/libxl.h >> @@ -858,6 +858,12 @@ const char *libxl_defbool_to_string(libxl_defbool b); >> #define LIBXL_TIMER_MODE_DEFAULT -1 >> #define LIBXL_MEMKB_DEFAULT ~0ULL >> >> +/* >> + * We'd like to set a memory boundary to determine if we need to check >> + * any overlap with reserved device memory. >> + */ >> +#define LIBXL_RDM_MEM_BOUNDARY_MEMKB_DEFAULT (2048 * 1024) >> + >> #define LIBXL_MS_VM_GENID_LEN 16 >> typedef struct { >> uint8_t bytes[LIBXL_MS_VM_GENID_LEN]; >> diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c >> index 6c8ec63..0438731 100644 >> --- a/tools/libxl/libxl_create.c >> +++ b/tools/libxl/libxl_create.c >> @@ -109,6 +109,10 @@ void libxl__rdm_setdefault(libxl__gc *gc, libxl_domain_build_info >> *b_info) >> { >> if (b_info->rdm.reserve == LIBXL_RDM_RESERVE_FLAG_INVALID) >> b_info->rdm.reserve = LIBXL_RDM_RESERVE_FLAG_RELAXED; >> + >> + if (b_info->rdm_mem_boundary_memkb == LIBXL_MEMKB_DEFAULT) >> + b_info->rdm_mem_boundary_memkb = >> + LIBXL_RDM_MEM_BOUNDARY_MEMKB_DEFAULT; >> } >> >> int libxl__domain_build_info_setdefault(libxl__gc *gc, >> @@ -460,7 +464,7 @@ int libxl__domain_build(libxl__gc *gc, >> >> switch (info->type) { >> case LIBXL_DOMAIN_TYPE_HVM: >> - ret = libxl__build_hvm(gc, domid, info, state); >> + ret = libxl__build_hvm(gc, domid, d_config, state); >> if (ret) >> goto out; >> >> diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c >> index 33f9ce6..d908350 100644 >> --- a/tools/libxl/libxl_dm.c >> +++ b/tools/libxl/libxl_dm.c >> @@ -90,6 +90,261 @@ const char *libxl__domain_device_model(libxl__gc *gc, >> return dm; >> } >> >> +static struct xen_reserved_device_memory >> +*xc_device_get_rdm(libxl__gc *gc, >> + uint32_t flag, >> + uint16_t seg, >> + uint8_t bus, >> + uint8_t devfn, >> + unsigned int *nr_entries) >> +{ >> + struct xen_reserved_device_memory *xrdm; >> + int rc; >> + >> + rc = xc_reserved_device_memory_map(CTX->xch, flag, seg, bus, devfn, >> + NULL, nr_entries); >> + assert(rc <= 0); >> + /* "0" means we have no any rdm entry. */ >> + if (!rc) >> + goto out; >> + >> + if (errno == ENOBUFS) { >> + xrdm = malloc(*nr_entries * sizeof(xen_reserved_device_memory_t)); >> + if (!xrdm) { >> + LOG(ERROR, "Could not allocate RDM buffer!\n"); >> + goto out; >> + } >> + rc = xc_reserved_device_memory_map(CTX->xch, flag, seg, bus, devfn, >> + xrdm, nr_entries); >> + if (rc) { >> + LOG(ERROR, "Could not get reserved device memory maps.\n"); >> + *nr_entries = 0; >> + free(xrdm); >> + xrdm = NULL; >> + } >> + } else >> + LOG(ERROR, "Could not get reserved device memory maps.\n"); >> + >> + out: >> + return xrdm; >> +} >> + >> +/* >> + * Check whether there exists rdm hole in the specified memory range. >> + * Returns true if exists, else returns false. >> + */ >> +static bool overlaps_rdm(uint64_t start, uint64_t memsize, >> + uint64_t rdm_start, uint64_t rdm_size) >> +{ >> + return (start + memsize > rdm_start) && (start < rdm_start + rdm_size); >> +} >> + >> +/* >> + * Check reported RDM regions and handle potential gfn conflicts according >> + * to user preferred policy. >> + * >> + * RMRR can reside in address space beyond 4G theoretically, but we never >> + * see this in real world. So in order to avoid breaking highmem layout >> + * we don't solve highmem conflict. Note this means highmem rmrr could still >> + * be supported if no conflict. >> + * >> + * But in the case of lowmem, RMRR probably scatter the whole RAM space. >> + * Especially multiple RMRR entries would worsen this to lead a complicated >> + * memory layout. And then its hard to extend hvm_info_table{} to work >> + * hvmloader out. So here we're trying to figure out a simple solution to >> + * avoid breaking existing layout. So when a conflict occurs, >> + * >> + * #1. Above a predefined boundary (default 2G) >> + * - Move lowmem_end below reserved region to solve conflict; >> + * >> + * #2. Below a predefined boundary (default 2G) >> + * - Check strict/relaxed policy. >> + * "strict" policy leads to fail libxl. Note when both policies >> + * are specified on a given region, 'strict' is always preferred. >> + * "relaxed" policy issue a warning message and also mask this entry >> + * INVALID to indicate we shouldn't expose this entry to hvmloader. >> + */ >> +int libxl__domain_device_construct_rdm(libxl__gc *gc, >> + libxl_domain_config *d_config, >> + uint64_t rdm_mem_boundary, >> + struct xc_hvm_build_args *args) >> +{ >> + int i, j, conflict; >> + struct xen_reserved_device_memory *xrdm = NULL; >> + uint32_t type = d_config->b_info.rdm.type; >> + uint16_t seg; >> + uint8_t bus, devfn; >> + uint64_t rdm_start, rdm_size; >> + uint64_t highmem_end = args->highmem_end ? args->highmem_end : (1ull<<32); >> + >> + /* Might not expose rdm. */ >> + if (type == LIBXL_RDM_RESERVE_TYPE_NONE && !d_config->num_pcidevs) >> + return 0; >> + >> + /* Query all RDM entries in this platform */ >> + if (type == LIBXL_RDM_RESERVE_TYPE_HOST) { >> + unsigned int nr_entries; >> + >> + /* Collect all rdm info if exist. */ >> + xrdm = xc_device_get_rdm(gc, PCI_DEV_RDM_ALL, >> + 0, 0, 0, &nr_entries); >> + if (!nr_entries) >> + return 0; >> + >> + assert(xrdm); >> + >> + d_config->num_rdms = nr_entries; >> + d_config->rdms = libxl__realloc(NOGC, d_config->rdms, >> + d_config->num_rdms * sizeof(libxl_device_rdm)); >> + >> + for (i = 0; i < d_config->num_rdms; i++) { >> + d_config->rdms[i].start = >> + (uint64_t)xrdm[i].start_pfn << XC_PAGE_SHIFT; >> + d_config->rdms[i].size = >> + (uint64_t)xrdm[i].nr_pages << XC_PAGE_SHIFT; >> + d_config->rdms[i].flag = d_config->b_info.rdm.reserve; >> + } >> + >> + free(xrdm); >> + } else >> + d_config->num_rdms = 0; >> + >> + /* Query RDM entries per-device */ >> + for (i = 0; i < d_config->num_pcidevs; i++) { >> + unsigned int nr_entries; >> + bool new = true; >> + >> + seg = d_config->pcidevs[i].domain; >> + bus = d_config->pcidevs[i].bus; >> + devfn = PCI_DEVFN(d_config->pcidevs[i].dev, d_config->pcidevs[i].func); >> + nr_entries = 0; >> + xrdm = xc_device_get_rdm(gc, ~PCI_DEV_RDM_ALL, >> + seg, bus, devfn, &nr_entries); >> + /* No RDM to associated with this device. */ >> + if (!nr_entries) >> + continue; >> + >> + assert(xrdm); >> + >> + /* >> + * Need to check whether this entry is already saved in the array. >> + * This could come from two cases: >> + * >> + * - user may configure to get all RMRRs in this platform, which >> + * is already queried before this point >> + * - or two assigned devices may share one RMRR entry >> + * >> + * different policies may be configured on the same RMRR due to above >> + * two cases. We choose a simple policy to always favor stricter policy >> + */ >> + for (j = 0; j < d_config->num_rdms; j++) { >> + if (d_config->rdms[j].start == >> + (uint64_t)xrdm[0].start_pfn << XC_PAGE_SHIFT) >> + { >> + if (d_config->rdms[j].flag != LIBXL_RDM_RESERVE_FLAG_STRICT) >> + d_config->rdms[j].flag = d_config->pcidevs[i].rdm_reserve; >> + new = false; >> + break; >> + } >> + } >> + >> + if (new) { >> + d_config->num_rdms++; >> + d_config->rdms = libxl__realloc(NOGC, d_config->rdms, >> + d_config->num_rdms * sizeof(libxl_device_rdm)); >> + >> + d_config->rdms[d_config->num_rdms - 1].start = >> + (uint64_t)xrdm[0].start_pfn << XC_PAGE_SHIFT; >> + d_config->rdms[d_config->num_rdms - 1].size = >> + (uint64_t)xrdm[0].nr_pages << XC_PAGE_SHIFT; >> + d_config->rdms[d_config->num_rdms - 1].flag = >> + d_config->pcidevs[i].rdm_reserve; >> + } >> + free(xrdm); >> + } >> + >> + /* >> + * Next step is to check and avoid potential conflict between RDM entries >> + * and guest RAM. To avoid intrusive impact to existing memory layout >> + * {lowmem, mmio, highmem} which is passed around various function blocks, >> + * below conflicts are not handled which are rare and handling them would >> + * lead to a more scattered layout: >> + * - RMRR in highmem area (>4G) >> + * - RMRR lower than a defined memory boundary (e.g. 2G) >> + * Otherwise for conflicts between boundary and 4G, we'll simply move lowmem >> + * end below reserved region to solve conflict. >> + * >> + * If a conflict is detected on a given RMRR entry, an error will be >> + * returned if 'strict' policy is specified. Instead, if 'relaxed' policy >> + * specified, this conflict is treated just as a warning, but we mark this >> + * RMRR entry as INVALID to indicate that this entry shouldn't be exposed >> + * to hvmloader. >> + * >> + * Firstly we should check the case of rdm < 4G because we may need to >> + * expand highmem_end. >> + */ >> + for (i = 0; i < d_config->num_rdms; i++) { >> + rdm_start = d_config->rdms[i].start; >> + rdm_size = d_config->rdms[i].size; >> + conflict = overlaps_rdm(0, args->lowmem_end, rdm_start, rdm_size); >> + >> + if (!conflict) >> + continue; >> + >> + /* Just check if RDM > our memory boundary. */ >> + if (rdm_start > rdm_mem_boundary) { >> + /* >> + * We will move downwards lowmem_end so we have to expand >> + * highmem_end. >> + */ >> + highmem_end += (args->lowmem_end - rdm_start); >> + /* Now move downwards lowmem_end. */ >> + args->lowmem_end = rdm_start; >> + } >> + } >> + >> + /* Sync highmem_end. */ >> + args->highmem_end = highmem_end; >> + >> + /* >> + * Finally we can take same policy to check lowmem(< 2G) and >> + * highmem adjusted above. >> + */ >> + for (i = 0; i < d_config->num_rdms; i++) { >> + rdm_start = d_config->rdms[i].start; >> + rdm_size = d_config->rdms[i].size; >> + /* Does this entry conflict with lowmem? */ >> + conflict = overlaps_rdm(0, args->lowmem_end, >> + rdm_start, rdm_size); >> + /* Does this entry conflict with highmem? */ >> + conflict |= overlaps_rdm((1ULL<<32), >> + args->highmem_end - (1ULL<<32), >> + rdm_start, rdm_size); >> + >> + if (!conflict) >> + continue; >> + >> + if(d_config->rdms[i].flag == LIBXL_RDM_RESERVE_FLAG_STRICT) { >> + LOG(ERROR, "RDM conflict at 0x%lx.\n", d_config->rdms[i].start); >> + goto out; >> + } else { >> + LOG(WARN, "Ignoring RDM conflict at 0x%lx.\n", >> + d_config->rdms[i].start); >> + >> + /* >> + * Then mask this INVALID to indicate we shouldn't expose this >> + * to hvmloader. >> + */ >> + d_config->rdms[i].flag = LIBXL_RDM_RESERVE_FLAG_INVALID; >> + } >> + } >> + >> + return 0; >> + >> + out: >> + return ERROR_FAIL; >> +} >> + >> const libxl_vnc_info *libxl__dm_vnc(const libxl_domain_config *guest_config) >> { >> const libxl_vnc_info *vnc = NULL; >> diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c >> index 867172a..1777b32 100644 >> --- a/tools/libxl/libxl_dom.c >> +++ b/tools/libxl/libxl_dom.c >> @@ -914,13 +914,14 @@ out: >> } >> >> int libxl__build_hvm(libxl__gc *gc, uint32_t domid, >> - libxl_domain_build_info *info, >> + libxl_domain_config *d_config, >> libxl__domain_build_state *state) >> { >> libxl_ctx *ctx = libxl__gc_owner(gc); >> struct xc_hvm_build_args args = {}; >> int ret, rc = ERROR_FAIL; >> uint64_t mmio_start, lowmem_end, highmem_end; >> + libxl_domain_build_info *const info = &d_config->b_info; >> >> memset(&args, 0, sizeof(struct xc_hvm_build_args)); >> /* The params from the configuration file are in Mb, which are then >> @@ -958,6 +959,14 @@ int libxl__build_hvm(libxl__gc *gc, uint32_t domid, >> args.highmem_end = highmem_end; >> args.mmio_start = mmio_start; >> >> + ret = libxl__domain_device_construct_rdm(gc, d_config, >> + info->rdm_mem_boundary_memkb*1024, >> + &args); >> + if (ret) { >> + LOG(ERROR, "checking reserved device memory failed"); >> + goto out; >> + } >> + >> if (info->num_vnuma_nodes != 0) { >> int i; >> >> diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h >> index e9ac886..52f3831 100644 >> --- a/tools/libxl/libxl_internal.h >> +++ b/tools/libxl/libxl_internal.h >> @@ -1011,7 +1011,7 @@ _hidden int libxl__build_post(libxl__gc *gc, uint32_t domid, >> _hidden int libxl__build_pv(libxl__gc *gc, uint32_t domid, >> libxl_domain_build_info *info, libxl__domain_build_state *state); >> _hidden int libxl__build_hvm(libxl__gc *gc, uint32_t domid, >> - libxl_domain_build_info *info, >> + libxl_domain_config *d_config, >> libxl__domain_build_state *state); >> >> _hidden int libxl__qemu_traditional_cmd(libxl__gc *gc, uint32_t domid, >> @@ -1519,6 +1519,15 @@ _hidden int libxl__need_xenpv_qemu(libxl__gc *gc, >> int nr_channels, libxl_device_channel *channels); >> >> /* >> + * This function will fix reserved device memory conflict >> + * according to user's configuration. >> + */ >> +_hidden int libxl__domain_device_construct_rdm(libxl__gc *gc, >> + libxl_domain_config *d_config, >> + uint64_t rdm_mem_guard, >> + struct xc_hvm_build_args *args); >> + >> +/* >> * This function will cause the whole libxl process to hang >> * if the device model does not respond. It is deprecated. >> * >> diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl >> index 4dfcaf7..b4282a0 100644 >> --- a/tools/libxl/libxl_types.idl >> +++ b/tools/libxl/libxl_types.idl >> @@ -395,6 +395,7 @@ libxl_domain_build_info = Struct("domain_build_info",[ >> ("target_memkb", MemKB), >> ("video_memkb", MemKB), >> ("shadow_memkb", MemKB), >> + ("rdm_mem_boundary_memkb", MemKB), >> ("rtc_timeoffset", uint32), >> ("exec_ssidref", uint32), >> ("exec_ssid_label", string), >> @@ -559,6 +560,12 @@ libxl_device_pci = Struct("device_pci", [ >> ("rdm_reserve", libxl_rdm_reserve_flag), >> ]) >> >> +libxl_device_rdm = Struct("device_rdm", [ >> + ("start", uint64), >> + ("size", uint64), >> + ("flag", libxl_rdm_reserve_flag), >> + ]) >> + >> libxl_device_dtdev = Struct("device_dtdev", [ >> ("path", string), >> ]) >> @@ -589,6 +596,7 @@ libxl_domain_config = Struct("domain_config", [ >> ("disks", Array(libxl_device_disk, "num_disks")), >> ("nics", Array(libxl_device_nic, "num_nics")), >> ("pcidevs", Array(libxl_device_pci, "num_pcidevs")), >> + ("rdms", Array(libxl_device_rdm, "num_rdms")), >> ("dtdevs", Array(libxl_device_dtdev, "num_dtdevs")), >> ("vfbs", Array(libxl_device_vfb, "num_vfbs")), >> ("vkbs", Array(libxl_device_vkb, "num_vkbs")), >> diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c >> index 4364ba4..85d74fd 100644 >> --- a/tools/libxl/xl_cmdimpl.c >> +++ b/tools/libxl/xl_cmdimpl.c >> @@ -1374,6 +1374,9 @@ static void parse_config_data(const char *config_source, >> if (!xlu_cfg_get_long (config, "videoram", &l, 0)) >> b_info->video_memkb = l * 1024; >> >> + if (!xlu_cfg_get_long (config, "rdm_mem_boundary", &l, 0)) >> + b_info->rdm_mem_boundary_memkb = l * 1024; >> + >> if (!xlu_cfg_get_long(config, "max_event_channels", &l, 0)) >> b_info->event_channels = l; >> >> -- >> 1.9.1 > >