* Re: [RFC] Extending MTRRs above 4G
2008-09-17 17:51 [RFC] Extending MTRRs above 4G Alex Williamson
2008-09-17 19:47 ` Philip Herron
@ 2008-09-17 21:05 ` Alex Williamson
2008-09-23 9:19 ` Avi Kivity
2008-09-23 9:18 ` Avi Kivity
2 siblings, 1 reply; 5+ messages in thread
From: Alex Williamson @ 2008-09-17 21:05 UTC (permalink / raw)
To: kvm
On Wed, 2008-09-17 at 11:51 -0600, Alex Williamson wrote:
> the patch below is a possible
> approach to continue down this path and enlighten rombios32 about the
> real top of memory, and setup MTRRs appropriately. It doesn't address
> SMBIOS or whatever causes grub to only report upper memory below 4G.
This version also fixes SMBIOS, I'm haven't found where grub is getting
it's idea of upper memory yet. It seems like there's a bug in SMBIOS
type 19 & 20 entries that we were using (size_mb - 1) * 1024. I changed
this to (size_mb * 1024) - 1, which seems to make a lot more sense in
the dmidecode output. If that's wrong, please let me know. Thanks,
Alex
Signed-off-by: Alex Williamson <alex.williamson@hp.com>
--
diff --git a/bios/rombios32.c b/bios/rombios32.c
index 2dc1d25..f0cb070 100755
--- a/bios/rombios32.c
+++ b/bios/rombios32.c
@@ -415,7 +415,8 @@ int smp_cpus;
uint32_t cpuid_signature;
uint32_t cpuid_features;
uint32_t cpuid_ext_features;
-unsigned long ram_size;
+unsigned long low_ram_size;
+uint64_t mem_top;
uint8_t bios_uuid[16];
#ifdef BX_USE_EBDA_TABLES
unsigned long ebda_cur_addr;
@@ -503,12 +504,12 @@ void setup_mtrr(void)
return;
u.val = 0;
for (i = 0; i < 8; ++i)
- if (ram_size >= 65536 * (i + 1))
+ if (low_ram_size >= 65536 * (i + 1))
u.valb[i] = 6;
wrmsr_smp(MSR_MTRRfix64K_00000, u.val);
u.val = 0;
for (i = 0; i < 8; ++i)
- if (ram_size >= 65536 * 8 + 16384 * (i + 1))
+ if (low_ram_size >= 65536 * 8 + 16384 * (i + 1))
u.valb[i] = 6;
wrmsr_smp(MSR_MTRRfix16K_80000, u.val);
wrmsr_smp(MSR_MTRRfix16K_A0000, 0);
@@ -522,9 +523,17 @@ void setup_mtrr(void)
wrmsr_smp(MSR_MTRRfix4K_F8000, 0);
vbase = 0;
--vcnt; /* leave one mtrr for VRAM */
- for (i = 0; i < vcnt && vbase < ram_size; ++i) {
+ for (i = 0; i < vcnt && vbase < low_ram_size; ++i) {
vmask = (1ull << 40) - 1;
- while (vbase + vmask + 1 > ram_size)
+ while (vbase + vmask + 1 > low_ram_size)
+ vmask >>= 1;
+ wrmsr_smp(MTRRphysBase_MSR(i), vbase | 6);
+ wrmsr_smp(MTRRphysMask_MSR(i), (~vmask & 0xfffffff000ull) | 0x800);
+ vbase += vmask + 1;
+ }
+ for (vbase = 1ull << 32; i < vcnt && vbase < mem_top; ++i) {
+ vmask = (1ull << 40) - 1;
+ while (vbase + vmask + 1 > mem_top)
vmask >>= 1;
wrmsr_smp(MTRRphysBase_MSR(i), vbase | 6);
wrmsr_smp(MTRRphysMask_MSR(i), (~vmask & 0xfffffff000ull) | 0x800);
@@ -535,16 +544,26 @@ void setup_mtrr(void)
void ram_probe(void)
{
- if (cmos_readb(0x34) | cmos_readb(0x35))
- ram_size = (cmos_readb(0x34) | (cmos_readb(0x35) << 8)) * 65536 +
- 16 * 1024 * 1024;
- else
- ram_size = (cmos_readb(0x17) | (cmos_readb(0x18) << 8)) * 1024;
+ if (cmos_readb(0x34) | cmos_readb(0x35))
+ low_ram_size = (cmos_readb(0x34) | (cmos_readb(0x35) << 8)) *
+ 65536 + 16 * 1024 * 1024;
+ else
+ low_ram_size = (cmos_readb(0x17) | (cmos_readb(0x18) << 8)) * 1024;
+
+ if (cmos_readb(0x5b) | cmos_readb(0x5c) | cmos_readb(0x5d)) {
+ mem_top = ((uint64_t)cmos_readb(0x5b) << 16) |
+ ((uint64_t)cmos_readb(0x5c) << 24) |
+ ((uint64_t)cmos_readb(0x5d) << 32);
+ mem_top += 1ull << 32;
+ } else
+ mem_top = low_ram_size;
+
#ifdef BX_USE_EBDA_TABLES
ebda_cur_addr = ((*(uint16_t *)(0x40e)) << 4) + 0x380;
#endif
- BX_INFO("ram_size=0x%08lx\n", ram_size);
- setup_mtrr();
+ BX_INFO("low_ram_size=0x%08lx\n", low_ram_size);
+ BX_INFO("top of ram %ldMB\n", mem_top / (1024 * 1024));
+ setup_mtrr();
}
/****************************************************/
@@ -951,7 +970,7 @@ void pci_bios_init(void)
{
pci_bios_io_addr = 0xc000;
pci_bios_mem_addr = 0xf0000000;
- pci_bios_bigmem_addr = ram_size;
+ pci_bios_bigmem_addr = low_ram_size;
if (pci_bios_bigmem_addr < 0x90000000)
pci_bios_bigmem_addr = 0x90000000;
@@ -1021,7 +1040,7 @@ static void mptable_init(void)
int mp_config_table_size;
#ifdef BX_USE_EBDA_TABLES
- mp_config_table = (uint8_t *)(ram_size - ACPI_DATA_SIZE - MPTABLE_MAX_SIZE);
+ mp_config_table = (uint8_t *)(low_ram_size - ACPI_DATA_SIZE - MPTABLE_MAX_SIZE);
#else
bios_table_cur_addr = align(bios_table_cur_addr, 16);
mp_config_table = (uint8_t *)bios_table_cur_addr;
@@ -1409,7 +1428,7 @@ void acpi_bios_init(void)
bios_table_cur_addr += sizeof(*rsdp);
#endif
- addr = base_addr = ram_size - ACPI_DATA_SIZE;
+ addr = base_addr = low_ram_size - ACPI_DATA_SIZE;
rsdt_addr = addr;
rsdt = (void *)(addr);
addr += sizeof(*rsdt);
@@ -1931,7 +1950,7 @@ smbios_type_19_init(void *start, uint32_t memory_size_mb)
p->header.handle = 0x1300;
p->starting_address = 0;
- p->ending_address = (memory_size_mb-1) * 1024;
+ p->ending_address = (memory_size_mb * 1024) - 1;
p->memory_array_handle = 0x1000;
p->partition_width = 1;
@@ -1952,7 +1971,7 @@ smbios_type_20_init(void *start, uint32_t memory_size_mb)
p->header.handle = 0x1400;
p->starting_address = 0;
- p->ending_address = (memory_size_mb-1)*1024;
+ p->ending_address = (memory_size_mb * 1024) - 1;
p->memory_device_handle = 0x1100;
p->memory_array_mapped_address_handle = 0x1300;
p->partition_row_position = 1;
@@ -2003,7 +2022,7 @@ void smbios_init(void)
{
unsigned cpu_num, nr_structs = 0, max_struct_size = 0;
char *start, *p, *q;
- int memsize = ram_size / (1024 * 1024);
+ int memsize = (mem_top - (1ull << 32) + low_ram_size) / (1024 * 1024);
#ifdef BX_USE_EBDA_TABLES
ebda_cur_addr = align(ebda_cur_addr, 16);
@@ -2030,8 +2049,8 @@ void smbios_init(void)
add_struct(smbios_type_4_init(p, cpu_num));
add_struct(smbios_type_16_init(p, memsize));
add_struct(smbios_type_17_init(p, memsize));
- add_struct(smbios_type_19_init(p, memsize));
- add_struct(smbios_type_20_init(p, memsize));
+ add_struct(smbios_type_19_init(p, mem_top / (1024 * 1024)));
+ add_struct(smbios_type_20_init(p, mem_top / (1024 * 1024)));
add_struct(smbios_type_32_init(p));
add_struct(smbios_type_127_init(p));
^ permalink raw reply related [flat|nested] 5+ messages in thread* Re: [RFC] Extending MTRRs above 4G
2008-09-17 17:51 [RFC] Extending MTRRs above 4G Alex Williamson
2008-09-17 19:47 ` Philip Herron
2008-09-17 21:05 ` Alex Williamson
@ 2008-09-23 9:18 ` Avi Kivity
2 siblings, 0 replies; 5+ messages in thread
From: Avi Kivity @ 2008-09-23 9:18 UTC (permalink / raw)
To: Alex Williamson; +Cc: kvm
Alex Williamson wrote:
> When I try to boot guests using a recent Linux kernel (2.6.26+), memory
> above 3.5G gets thrown away with an error like this:
>
> WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 4608MB of RAM.
>
> And it's true, we're only providing MTRRs for memory below 4G. In fact
> rombios32 knows very little, if anything, about memory above 4G, as seen
> by memory reporting in the SMBIOS table.
>
> It looks like the Linux kernel MTRR code does have a bail-out point for
> kvm/qemu, but that was only effective before we started reporting MTRRs.
> On real hardware, I have two systems that do this two different ways.
> The first is an Intel based system, which reports MTRRs to cover the I/O
> space, then defaults the rest of memory to WB. The second is an AMD
> based system which uses MTRRs to cover memory below 4G, then seems to
> have a special AMD MSR to describe the top of memory above 4G. Xen
> appears to mimic the first approach.
>
> Is there any reason that KVM sets the default MTRR type to UC, then only
> sets up MTRRs for the memory below 4G?
The thinking is that if we hotplug a device, its memory must be set to
uncacheable by default.
> the patch below is a possible
> approach to continue down this path and enlighten rombios32 about the
> real top of memory, and setup MTRRs appropriately. It doesn't address
> SMBIOS or whatever causes grub to only report upper memory below 4G.
> Alternatively we could switch to the Intel/Xen system approach, but it
> seems rombios32 needs to understand the extra memory at some point
> anyway. Thoughts? BTW, another benefit to the default WB approach is
> that MTRRs are a limited resource and there will be memory sizes we
> can't fully cover using the approach below.
Yes, especially with the pci hole causing any memory size to require
many MTRRs.
I'd like to switch to default WB + MTRRs covering the pci space, but I'd
like to get a clear understanding of how we handle hotplug. Meanwhile,
I've applied your patch.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 5+ messages in thread