From mboxrd@z Thu Jan 1 00:00:00 1970 From: Boris Ostrovsky Subject: Re: [PATCH v5 1/1] Add mmio_hole_size Date: Mon, 29 Sep 2014 21:27:18 -0400 Message-ID: <542A06F6.1060205@oracle.com> References: <1410452423-11345-1-git-send-email-dslutz@verizon.com> <1410452423-11345-2-git-send-email-dslutz@verizon.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii"; Format="flowed" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1410452423-11345-2-git-send-email-dslutz@verizon.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Don Slutz , xen-devel@lists.xen.org Cc: Ian Jackson , Ian Campbell , Jan Beulich , Stefano Stabellini List-Id: xen-devel@lists.xenproject.org On 09/11/2014 12:20 PM, Don Slutz wrote: > If you add enough PCI devices then all mmio may not fit below 4G > which may not be the layout the user wanted. This allows you to > increase the below 4G address space that PCI devices can use and > therefore in more cases not have any mmio that is above 4G. > > There are real PCI cards that do not support mmio over 4G, so if you > want to emulate them precisely, you may also need to increase the > space below 4G for them. There are drivers for these cards that also > do not work if they have their mmio space mapped above 4G. > > This allows growing the MMIO hole to the size needed. > > This may help with using pci passthru and HVM. > > Signed-off-by: Don Slutz > --- > v5: > Add LIBXL_HAVE_BUILDINFO_HVM_MMIO_HOLE_SIZE. > v4: > s/pci_hole_min_size/mmio_hole_size/ > s/HVM_BELOW_4G_RAM_END/HVM_BELOW_4G_MMIO_START/ > Add ignore to hvmloader message > Adjust commit message > Dropped min; stopped mmio_hole_size changing in hvmloader > > docs/man/xl.cfg.pod.5 | 12 +++++++ > tools/firmware/hvmloader/config.h | 1 - > > tools/firmware/hvmloader/pci.c | 71 +++++++++++++++++++++++++++------------ > tools/libxc/xc_hvm_build_x86.c | 30 +++++++++++++++-- > tools/libxc/xenguest.h | 11 ++++++ > tools/libxl/libxl.h | 5 +++ > tools/libxl/libxl_create.c | 29 ++++++++++++---- > tools/libxl/libxl_dm.c | 24 +++++++++++-- > tools/libxl/libxl_dom.c | 3 +- > tools/libxl/libxl_types.idl | 1 + > tools/libxl/xl_cmdimpl.c | 13 +++++++ > 11 files changed, 166 insertions(+), 34 deletions(-) > > diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5 > index 517ae2f..45f7857 100644 > --- a/docs/man/xl.cfg.pod.5 > +++ b/docs/man/xl.cfg.pod.5 > @@ -1111,6 +1111,18 @@ wallclock (i.e., real) time. > > =back > > +=head3 Memory layout > + > +=over 4 > + > +=item B > + > +Specifies the size the MMIO hole below 4GiB will be. Only valid for > +device_model_version = "qemu-xen". Note: this requires QEMU 2.1.0 > +or later. Can you add text describing restrictions on hole size? Not smaller than 256MB, I think. > + > +=back > + > =head3 Support for Paravirtualisation of HVM Guests > > The following options allow Paravirtualised features (such as devices) > diff --git a/tools/firmware/hvmloader/config.h b/tools/firmware/hvmloader/config.h > index 80bea46..b838cf9 100644 > --- a/tools/firmware/hvmloader/config.h > +++ b/tools/firmware/hvmloader/config.h > @@ -53,7 +53,6 @@ extern struct bios_config ovmf_config; > #define PCI_ISA_IRQ_MASK 0x0c20U /* ISA IRQs 5,10,11 are PCI connected */ > > /* MMIO hole: Hardcoded defaults, which can be dynamically expanded. */ > -#define PCI_MEM_START 0xf0000000 > #define PCI_MEM_END 0xfc000000 > > extern unsigned long pci_mem_start, pci_mem_end; > diff --git a/tools/firmware/hvmloader/pci.c b/tools/firmware/hvmloader/pci.c > index 3712988..e5cd2e3 100644 > --- a/tools/firmware/hvmloader/pci.c > +++ b/tools/firmware/hvmloader/pci.c > @@ -28,9 +28,10 @@ > #include > #include > #include > +#include > #include > > -unsigned long pci_mem_start = PCI_MEM_START; > +unsigned long pci_mem_start = HVM_BELOW_4G_MMIO_START; > unsigned long pci_mem_end = PCI_MEM_END; > uint64_t pci_hi_mem_start = 0, pci_hi_mem_end = 0; > > @@ -59,6 +60,7 @@ void pci_setup(void) > uint64_t bar_sz; > } *bars = (struct bars *)scratch_start; > unsigned int i, nr_bars = 0; > + uint64_t mmio_hole_size = 0; > > const char *s; > /* > @@ -86,6 +88,10 @@ void pci_setup(void) > printf("Relocating guest memory for lowmem MMIO space %s\n", > allow_memory_relocate?"enabled":"disabled"); > > + s = xenstore_read("platform/mmio_hole_size", NULL); > + if ( s ) > + mmio_hole_size = strtoll(s, NULL, 0); > + > /* Program PCI-ISA bridge with appropriate link routes. */ > isa_irq = 0; > for ( link = 0; link < 4; link++ ) > @@ -237,26 +243,49 @@ void pci_setup(void) > pci_writew(devfn, PCI_COMMAND, cmd); > } > > - /* > - * At the moment qemu-xen can't deal with relocated memory regions. > - * It's too close to the release to make a proper fix; for now, > - * only allow the MMIO hole to grow large enough to move guest memory > - * if we're running qemu-traditional. Items that don't fit will be > - * relocated into the 64-bit address space. > - * > - * This loop now does the following: > - * - If allow_memory_relocate, increase the MMIO hole until it's > - * big enough, or until it's 2GiB > - * - If !allow_memory_relocate, increase the MMIO hole until it's > - * big enough, or until it's 2GiB, or until it overlaps guest > - * memory > - */ > - while ( (mmio_total > (pci_mem_end - pci_mem_start)) > - && ((pci_mem_start << 1) != 0) > - && (allow_memory_relocate > - || (((pci_mem_start << 1) >> PAGE_SHIFT) > - >= hvm_info->low_mem_pgend)) ) > - pci_mem_start <<= 1; > + if ( mmio_hole_size ) > + { > + uint64_t max_ram_below_4g = (1ULL << 32) - mmio_hole_size; > + > + if ( max_ram_below_4g > HVM_BELOW_4G_MMIO_START ) > + { > + printf("max_ram_below_4g=0x"PRIllx > + " too big for mmio_hole_size=0x"PRIllx > + " has been ignored.\n", > + PRIllx_arg(max_ram_below_4g), > + PRIllx_arg(mmio_hole_size)); > + } Do you need to check whether the hole is too large? Here and in the toolstack. > + else > + { > + pci_mem_start = max_ram_below_4g; > + printf("pci_mem_start=0x%lx (was 0x%x) for mmio_hole_size=%lu\n", > + pci_mem_start, HVM_BELOW_4G_MMIO_START, > + (long)mmio_hole_size); > + } > + } > + else > + { > + /* > + * At the moment qemu-xen can't deal with relocated memory regions. > + * It's too close to the release to make a proper fix; for now, > + * only allow the MMIO hole to grow large enough to move guest memory > + * if we're running qemu-traditional. Items that don't fit will be > + * relocated into the 64-bit address space. > + * > + * This loop now does the following: > + * - If allow_memory_relocate, increase the MMIO hole until it's > + * big enough, or until it's 2GiB > + * - If !allow_memory_relocate, increase the MMIO hole until it's > + * big enough, or until it's 2GiB, or until it overlaps guest > + * memory > + */ > + while ( (mmio_total > (pci_mem_end - pci_mem_start)) > + && ((pci_mem_start << 1) != 0) > + && (allow_memory_relocate > + || (((pci_mem_start << 1) >> PAGE_SHIFT) > + >= hvm_info->low_mem_pgend)) ) > + pci_mem_start <<= 1; > + } > > if ( mmio_total > (pci_mem_end - pci_mem_start) ) > { > diff --git a/tools/libxc/xc_hvm_build_x86.c b/tools/libxc/xc_hvm_build_x86.c > index c81a25b..94da7fa 100644 > --- a/tools/libxc/xc_hvm_build_x86.c > +++ b/tools/libxc/xc_hvm_build_x86.c > @@ -628,14 +628,40 @@ int xc_hvm_build_target_mem(xc_interface *xch, > int target, > const char *image_name) > { > + return xc_hvm_build_target_mem_with_hole(xch, domid, memsize, target, > + image_name, 0); > +} > + > +int xc_hvm_build_with_hole(xc_interface *xch, uint32_t domid, > + struct xc_hvm_build_args *args, > + uint64_t mmio_hole_size) > +{ > + if (mmio_hole_size) > + { > + uint64_t max_ram_below_4g = (1ULL << 32) - mmio_hole_size; > + > + if (max_ram_below_4g < HVM_BELOW_4G_MMIO_START) "<=", to be consistent with the check in libxl__domain_create_info_setdefault (). > + { > + args->mmio_size = mmio_hole_size; > + } > + } > + return xc_hvm_build(xch, domid, args); > +} > + > +int xc_hvm_build_target_mem_with_hole(xc_interface *xch, > + uint32_t domid, > + int memsize, > + int target, > + const char *image_name, > + uint64_t mmio_hole_size) > +{ > struct xc_hvm_build_args args = {}; > > memset(&args, 0, sizeof(struct xc_hvm_build_args)); > args.mem_size = (uint64_t)memsize << 20; > args.mem_target = (uint64_t)target << 20; > args.image_file_name = image_name; > - > - return xc_hvm_build(xch, domid, &args); > + return xc_hvm_build_with_hole(xch, domid, &args, mmio_hole_size); > } > > /* > diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h > index 40bbac8..d35f38d 100644 > --- a/tools/libxc/xenguest.h > +++ b/tools/libxc/xenguest.h > @@ -244,12 +244,23 @@ struct xc_hvm_build_args { > int xc_hvm_build(xc_interface *xch, uint32_t domid, > struct xc_hvm_build_args *hvm_args); > > +int xc_hvm_build_with_hole(xc_interface *xch, uint32_t domid, > + struct xc_hvm_build_args *args, > + uint64_t mmio_hole_size); > + > int xc_hvm_build_target_mem(xc_interface *xch, > uint32_t domid, > int memsize, > int target, > const char *image_name); > > +int xc_hvm_build_target_mem_with_hole(xc_interface *xch, > + uint32_t domid, > + int memsize, > + int target, > + const char *image_name, > + uint64_t mmio_hole_size); > + > /* > * Sets *lockfd to -1. > * Has deallocated everything even on error. > diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h > index dab3a67..e43d2d2 100644 > --- a/tools/libxl/libxl.h > +++ b/tools/libxl/libxl.h > @@ -138,6 +138,11 @@ > #define LIBXL_HAVE_BUILDINFO_IOMEM_START_GFN 1 > > /* > + * libxl_domain_build_info has the u.hvm.mmio_hole_size field. > + */ > +#define LIBXL_HAVE_BUILDINFO_HVM_MMIO_HOLE_SIZE 1 > + > +/* > * libxl ABI compatibility > * > * The only guarantee which libxl makes regarding ABI compatibility > diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c > index b36c719..7383053 100644 > --- a/tools/libxl/libxl_create.c > +++ b/tools/libxl/libxl_create.c > @@ -23,6 +23,7 @@ > #include > #include > #include > +#include > > int libxl__domain_create_info_setdefault(libxl__gc *gc, > libxl_domain_create_info *c_info) > @@ -428,13 +429,26 @@ int libxl__domain_build(libxl__gc *gc, > vments[4] = "start_time"; > vments[5] = libxl__sprintf(gc, "%lu.%02d", start_time.tv_sec,(int)start_time.tv_usec/10000); > > - localents = libxl__calloc(gc, 7, sizeof(char *)); > - localents[0] = "platform/acpi"; > - localents[1] = libxl_defbool_val(info->u.hvm.acpi) ? "1" : "0"; > - localents[2] = "platform/acpi_s3"; > - localents[3] = libxl_defbool_val(info->u.hvm.acpi_s3) ? "1" : "0"; > - localents[4] = "platform/acpi_s4"; > - localents[5] = libxl_defbool_val(info->u.hvm.acpi_s4) ? "1" : "0"; > + localents = libxl__calloc(gc, 9, sizeof(char *)); > + i = 0; > + localents[i++] = "platform/acpi"; > + localents[i++] = libxl_defbool_val(info->u.hvm.acpi) ? "1" : "0"; > + localents[i++] = "platform/acpi_s3"; > + localents[i++] = libxl_defbool_val(info->u.hvm.acpi_s3) ? "1" : "0"; > + localents[i++] = "platform/acpi_s4"; > + localents[i++] = libxl_defbool_val(info->u.hvm.acpi_s4) ? "1" : "0"; > + if (info->u.hvm.mmio_hole_size) { > + uint64_t max_ram_below_4g = > + (1ULL << 32) - info->u.hvm.mmio_hole_size; > + > + if (max_ram_below_4g <= HVM_BELOW_4G_MMIO_START) { You seem to be making various checks on hole size in multiple places --- here, in libxl__build_device_model_args_new() and in parse_config_data(). (And, to some degree, in xc_hvm_build_with_hole()). Are they all necessary? -boris > + localents[i++] = "platform/mmio_hole_size"; > + localents[i++] = > + libxl__sprintf(gc, "%"PRIu64, > + info->u.hvm.mmio_hole_size); > + } > + } > + assert(i < 9); > > break; > case LIBXL_DOMAIN_TYPE_PV: > @@ -460,6 +474,7 @@ int libxl__domain_build(libxl__gc *gc, > vments[i++] = "image/cmdline"; > vments[i++] = (char *) state->pv_cmdline; > } > + assert(i < 11); > > break; > default: > diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c > index 103cbca..099a960 100644 > --- a/tools/libxl/libxl_dm.c > +++ b/tools/libxl/libxl_dm.c > @@ -18,6 +18,7 @@ > #include "libxl_osdeps.h" /* must come before any other headers */ > > #include "libxl_internal.h" > +#include > > static const char *libxl_tapif_script(libxl__gc *gc) > { > @@ -415,6 +416,7 @@ static char ** libxl__build_device_model_args_new(libxl__gc *gc, > const libxl_vnc_info *vnc = libxl__dm_vnc(guest_config); > const libxl_sdl_info *sdl = dm_sdl(guest_config); > const char *keymap = dm_keymap(guest_config); > + char *machinearg; > flexarray_t *dm_args; > int i; > uint64_t ram_size; > @@ -696,10 +698,28 @@ static char ** libxl__build_device_model_args_new(libxl__gc *gc, > /* Switching here to the machine "pc" which does not add > * the xen-platform device instead of the default "xenfv" machine. > */ > - flexarray_append(dm_args, "pc,accel=xen"); > + machinearg = libxl__sprintf(gc, "pc,accel=xen"); > } else { > - flexarray_append(dm_args, "xenfv"); > + machinearg = libxl__sprintf(gc, "xenfv"); > } > + if (b_info->u.hvm.mmio_hole_size) { > + uint64_t max_ram_below_4g = (1ULL << 32) - > + b_info->u.hvm.mmio_hole_size; > + > + if (max_ram_below_4g > HVM_BELOW_4G_MMIO_START) { > + LIBXL__LOG(ctx, LIBXL__LOG_WARNING, > + "max-ram-below-4g=%"PRIu64 > + " (mmio_hole_size=%"PRIu64 > + ") too big > %u ignored.\n", > + max_ram_below_4g, > + b_info->u.hvm.mmio_hole_size, > + HVM_BELOW_4G_MMIO_START); > + } else { > + machinearg = libxl__sprintf(gc, "%s,max-ram-below-4g=%"PRIu64, > + machinearg, max_ram_below_4g); > + } > + } > + flexarray_append(dm_args, machinearg); > for (i = 0; b_info->extra_hvm && b_info->extra_hvm[i] != NULL; i++) > flexarray_append(dm_args, b_info->extra_hvm[i]); > break; > diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c > index 0dfdb08..a3a56f5 100644 > --- a/tools/libxl/libxl_dom.c > +++ b/tools/libxl/libxl_dom.c > @@ -728,7 +728,8 @@ int libxl__build_hvm(libxl__gc *gc, uint32_t domid, > goto out; > } > > - ret = xc_hvm_build(ctx->xch, domid, &args); > + ret = xc_hvm_build_with_hole(ctx->xch, domid, &args, > + info->u.hvm.mmio_hole_size); > if (ret) { > LOGEV(ERROR, ret, "hvm building failed"); > goto out; > diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl > index f1fcbc3..c912b18 100644 > --- a/tools/libxl/libxl_types.idl > +++ b/tools/libxl/libxl_types.idl > @@ -378,6 +378,7 @@ libxl_domain_build_info = Struct("domain_build_info",[ > ("timeoffset", string), > ("hpet", libxl_defbool), > ("vpt_align", libxl_defbool), > + ("mmio_hole_size", UInt(64, init_val = 0)), > ("timer_mode", libxl_timer_mode), > ("nested_hvm", libxl_defbool), > ("smbios_firmware", string), > diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c > index 8a38077..6c74fa9 100644 > --- a/tools/libxl/xl_cmdimpl.c > +++ b/tools/libxl/xl_cmdimpl.c > @@ -34,6 +34,7 @@ > #include > #include > #include > +#include > > #include "libxl.h" > #include "libxl_utils.h" > @@ -1036,6 +1037,18 @@ static void parse_config_data(const char *config_source, > xlu_cfg_get_defbool(config, "hpet", &b_info->u.hvm.hpet, 0); > xlu_cfg_get_defbool(config, "vpt_align", &b_info->u.hvm.vpt_align, 0); > > + if (!xlu_cfg_get_long(config, "mmio_hole_size", &l, 0)) { > + b_info->u.hvm.mmio_hole_size = (uint64_t) l; > + if (b_info->u.hvm.mmio_hole_size < HVM_BELOW_4G_MMIO_LENGTH || > + b_info->u.hvm.mmio_hole_size > HVM_BELOW_4G_MMIO_START) { > + fprintf(stderr, "ERROR: invalid value 0x%"PRIx64" (%"PRIu64 > + ") for \"mmio_hole_size\"\n", > + b_info->u.hvm.mmio_hole_size, > + b_info->u.hvm.mmio_hole_size); > + exit (1); > + } > + } > + > if (!xlu_cfg_get_long(config, "timer_mode", &l, 1)) { > const char *s = libxl_timer_mode_to_string(l); > fprintf(stderr, "WARNING: specifying \"timer_mode\" as an integer is deprecated. "