From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:60884) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Vl3I9-00036Z-0R for qemu-devel@nongnu.org; Mon, 25 Nov 2013 16:02:06 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1Vl3I2-000838-VI for qemu-devel@nongnu.org; Mon, 25 Nov 2013 16:02:00 -0500 Received: from mx1.redhat.com ([209.132.183.28]:60518) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Vl3I2-000832-Mu for qemu-devel@nongnu.org; Mon, 25 Nov 2013 16:01:54 -0500 Received: from int-mx09.intmail.prod.int.phx2.redhat.com (int-mx09.intmail.prod.int.phx2.redhat.com [10.5.11.22]) by mx1.redhat.com (8.14.4/8.14.4) with ESMTP id rAPL1rGg028518 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK) for ; Mon, 25 Nov 2013 16:01:54 -0500 Date: Mon, 25 Nov 2013 23:05:10 +0200 From: "Michael S. Tsirkin" Message-ID: <20131125210510.GL12689@redhat.com> References: <1385401393-14291-1-git-send-email-pbonzini@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <1385401393-14291-1-git-send-email-pbonzini@redhat.com> Subject: Re: [Qemu-devel] [PATCH rebased for-1.8] i386: pc: align gpa<->hpa on 1GB boundary (v6) List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Paolo Bonzini Cc: Marcelo Tosatti , qemu-devel@nongnu.org On Mon, Nov 25, 2013 at 06:43:13PM +0100, Paolo Bonzini wrote: > v2: condition enablement of new mapping to new machine types (Paolo) > v3: fix changelog > v4: rebase > v5: ensure alignment of piecetwo on 2MB GPA (Igor) > do not register zero-sized piece-one (Igor) > v6: fix memory leak (Igor) > fix integer overflow (Igor) > > ---- > > Align guest physical address and host physical address > beyond guest 4GB on a 1GB boundary. > > Otherwise 1GB TLBs cannot be cached for the range. > > Signed-off-by: Marcelo Tosatti > [Reorganize code, keep same logic. - Paolo] > Signed-off-by: Paolo Bonzini BTW how about a unit-test for this? Can be something along the lines of the acpi tests: run BIOS, probe what it reported. > --- > hw/i386/pc.c | 67 +++++++++++++++++++++++++++++++++++++++++++------ > hw/i386/pc_piix.c | 3 ++ > hw/i386/pc_q35.c | 3 ++ > include/hw/i386/pc.h | 1 + > 4 files changed, 65 insertions(+), 9 deletions(-) > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c > index 6c82ada..485b44d 100644 > --- a/hw/i386/pc.c > +++ b/hw/i386/pc.c > @@ -1148,8 +1148,10 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory, > { > int linux_boot, i; > MemoryRegion *ram, *option_rom_mr; > - MemoryRegion *ram_below_4g, *ram_above_4g; > + MemoryRegion *ram_below_4g, *ram_above_4g_pieceone, *ram_above_4g_piecetwo; > FWCfgState *fw_cfg; > + uint64_t holesize, pieceonesize, piecetwosize; > + uint64_t memsize, align_offset; > > linux_boot = (kernel_filename != NULL); > > @@ -1157,26 +1159,73 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory, > * aliases to address portions of it, mostly for backwards compatibility > * with older qemus that used qemu_ram_alloc(). > */ > + memsize = below_4g_mem_size + above_4g_mem_size; > + holesize = 0x100000000ULL - below_4g_mem_size; > + > + /* If 1GB hugepages are used to back guest RAM, we want the > + * physical address 4GB to map to 4GB in the RAM, so that > + * memory beyond 4GB is aligned on a 1GB boundary, at the > + * host physical address space. Thus, the ram block range > + * [holestart, 4GB] is mapped to the last holesize bytes of RAM: > + * > + * 0 h 4G memsize-holesize > + * > + * contiguous-ram-block [xxxxxx][yyy][zzzzz] > + * '-----------. > + * guest-addr-space [xxxxxx] [zzzzz][yyy] > + * > + * This is only done in new-enough machine types, and of course > + * it is only possible if the [zzzzz] block exists at all. > + */ > + if (guest_info->gb_align && above_4g_mem_size > holesize) { > + /* Round the allocation up to 2 MB to make [zzzzz]'s size > + * aligned, removing the extra from the [yyy] piece. > + */ > + align_offset = ROUND_UP(memsize, 1UL << 21) - memsize; > + piecetwosize = holesize - align_offset; > + } else { > + /* There's no [zzzzz] piece, all memory above 4G starts > + * at below_4g_mem_size in the RAM block. Also no need > + * to align anything. > + */ > + align_offset = 0; > + piecetwosize = above_4g_mem_size; > + } > + > ram = g_malloc(sizeof(*ram)); > - memory_region_init_ram(ram, NULL, "pc.ram", > - below_4g_mem_size + above_4g_mem_size); > + memory_region_init_ram(ram, NULL, "pc.ram", memsize + align_offset); > vmstate_register_ram_global(ram); > *ram_memory = ram; > + > ram_below_4g = g_malloc(sizeof(*ram_below_4g)); > memory_region_init_alias(ram_below_4g, NULL, "ram-below-4g", ram, > 0, below_4g_mem_size); > memory_region_add_subregion(system_memory, 0, ram_below_4g); > + > + pieceonesize = above_4g_mem_size - piecetwosize; > + if (pieceonesize) { > + ram_above_4g_pieceone = g_malloc(sizeof(*ram_above_4g_pieceone)); > + memory_region_init_alias(ram_above_4g_pieceone, NULL, > + "ram-above-4g-pieceone", ram, > + 0x100000000ULL, pieceonesize); > + memory_region_add_subregion(system_memory, 0x100000000ULL, > + ram_above_4g_pieceone); > + } > + if (piecetwosize) { > + ram_above_4g_piecetwo = g_malloc(sizeof(*ram_above_4g_piecetwo)); > + memory_region_init_alias(ram_above_4g_piecetwo, NULL, > + "ram-above-4g-piecetwo", ram, > + below_4g_mem_size, piecetwosize); > + memory_region_add_subregion(system_memory, > + 0x100000000ULL + pieceonesize, > + ram_above_4g_piecetwo); > + } > + > e820_add_entry(0, below_4g_mem_size, E820_RAM); > if (above_4g_mem_size > 0) { > - ram_above_4g = g_malloc(sizeof(*ram_above_4g)); > - memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g", ram, > - below_4g_mem_size, above_4g_mem_size); > - memory_region_add_subregion(system_memory, 0x100000000ULL, > - ram_above_4g); > e820_add_entry(0x100000000ULL, above_4g_mem_size, E820_RAM); > } > > - > /* Initialize PC system firmware */ > pc_system_firmware_init(rom_memory, guest_info->isapc_ram_fw); > > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c > index 36f2495..ca9bd2e 100644 > --- a/hw/i386/pc_piix.c > +++ b/hw/i386/pc_piix.c > @@ -62,6 +62,7 @@ static bool has_pvpanic; > static bool has_pci_info; > static bool has_acpi_build = true; > static bool smbios_type1_defaults = true; > +static bool gb_align = true; > > /* PC hardware initialisation */ > static void pc_init1(QEMUMachineInitArgs *args, > @@ -130,6 +131,7 @@ static void pc_init1(QEMUMachineInitArgs *args, > > guest_info->has_pci_info = has_pci_info; > guest_info->isapc_ram_fw = !pci_enabled; > + guest_info->gb_align = gb_align; > > if (smbios_type1_defaults) { > /* These values are guest ABI, do not change */ > @@ -249,6 +251,7 @@ static void pc_init_pci(QEMUMachineInitArgs *args) > static void pc_compat_1_7(QEMUMachineInitArgs *args) > { > smbios_type1_defaults = false; > + gb_align = false; > } > > static void pc_compat_1_6(QEMUMachineInitArgs *args) > diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c > index 50ca458..89c7720 100644 > --- a/hw/i386/pc_q35.c > +++ b/hw/i386/pc_q35.c > @@ -52,6 +52,7 @@ static bool has_pvpanic; > static bool has_pci_info; > static bool has_acpi_build = true; > static bool smbios_type1_defaults = true; > +static bool gb_align = true; > > /* PC hardware initialisation */ > static void pc_q35_init(QEMUMachineInitArgs *args) > @@ -115,6 +116,7 @@ static void pc_q35_init(QEMUMachineInitArgs *args) > guest_info->has_pci_info = has_pci_info; > guest_info->isapc_ram_fw = false; > guest_info->has_acpi_build = has_acpi_build; > + guest_info->gb_align = gb_align; > > if (smbios_type1_defaults) { > /* These values are guest ABI, do not change */ > @@ -233,6 +235,7 @@ static void pc_q35_init(QEMUMachineInitArgs *args) > static void pc_compat_1_7(QEMUMachineInitArgs *args) > { > smbios_type1_defaults = false; > + gb_align = false; > } > > static void pc_compat_1_6(QEMUMachineInitArgs *args) > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h > index 9af09d3..8047e82 100644 > --- a/include/hw/i386/pc.h > +++ b/include/hw/i386/pc.h > @@ -41,6 +41,7 @@ struct PcGuestInfo { > uint64_t *node_cpu; > FWCfgState *fw_cfg; > bool has_acpi_build; > + bool gb_align; > }; > > /* parallel.c */ > -- > 1.7.1