From: Andrea Arcangeli <andrea@qumranet.com>
To: kvm-devel@lists.sourceforge.net
Subject: [2/3] -reserved-ram for PCI passthrough without VT-d and without paravirt
Date: Mon, 31 Mar 2008 19:07:51 +0200 [thread overview]
Message-ID: <20080331170751.GD12427@duo.random> (raw)
In-Reply-To: <20080331165554.GB12427@duo.random>
This is the kvm-userland.git patch overwriting the ranges in the
virtualized e820 map with /dev/mem. All is validated through
/proc/iomem, so shall the hardware e820 map be weird, there will be
zero risk of corruption, simply it will fail to startup with a verbose
error.
The bios has to be rebuilt to pass the variable address near 640k
where to stop the virtualized e820 slot in function of the ram
available in the host, and in function of the eary-reserve for things
like the smp trampoline page that we don't want to pass as available
ram to the guest. Only the page at address zero is magic and it's
mapped as ram in the guest, but it's allocated through regular
anonymous memory as you can see from the first /dev/mem mapping
starting at area+reserved[0]. To rebuild the bios "make bios" before
"make install" should do the trick. If you don't rebuild the bios
everything will work fine if you don't use pci-passthrough, but then
pci passthrough will randomly memory corrupt the host.
Signed-off-by: Andrea Arcangeli <andrea@qumranet.com>
diff --git a/bios/rombios.c b/bios/rombios.c
index 318de57..f93a6c6 100644
--- a/bios/rombios.c
+++ b/bios/rombios.c
@@ -4251,6 +4251,7 @@ int15_function32(regs, ES, DS, FLAGS)
Bit32u extra_lowbits_memory_size=0;
Bit16u CX,DX;
Bit8u extra_highbits_memory_size=0;
+ Bit32u below_640_end;
BX_DEBUG_INT15("int15 AX=%04x\n",regs.u.r16.ax);
@@ -4305,6 +4306,11 @@ ASM_END
case 0x20: // coded by osmaker aka K.J.
if(regs.u.r32.edx == 0x534D4150)
{
+ below_640_end = inb_cmos(0x16);
+ below_640_end <<= 8;
+ below_640_end |= inb_cmos(0x15);
+ below_640_end *= 1024;
+
extended_memory_size = inb_cmos(0x35);
extended_memory_size <<= 8;
extended_memory_size |= inb_cmos(0x34);
@@ -4334,7 +4340,7 @@ ASM_END
{
case 0:
set_e820_range(ES, regs.u.r16.di,
- 0x0000000L, 0x0009fc00L, 0, 0, 1);
+ 0x0000000L, below_640_end, 0, 0, 1);
regs.u.r32.ebx = 1;
regs.u.r32.eax = 0x534D4150;
regs.u.r32.ecx = 0x14;
@@ -4343,7 +4349,7 @@ ASM_END
break;
case 1:
set_e820_range(ES, regs.u.r16.di,
- 0x0009fc00L, 0x000a0000L, 0, 0, 2);
+ below_640_end, 0x000a0000L, 0, 0, 2);
regs.u.r32.ebx = 2;
regs.u.r32.eax = 0x534D4150;
regs.u.r32.ecx = 0x14;
diff --git a/qemu/hw/pc.c b/qemu/hw/pc.c
index 0d2e6c3..a6b28c8 100644
--- a/qemu/hw/pc.c
+++ b/qemu/hw/pc.c
@@ -198,6 +198,8 @@ static void cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size,
/* memory size */
val = 640; /* base memory in K */
+ if (reserved_ram)
+ val = reserved[1] / 1024;
rtc_set_memory(s, 0x15, val);
rtc_set_memory(s, 0x16, val >> 8);
diff --git a/qemu/pc-bios/bios.bin b/qemu/pc-bios/bios.bin
index 2e7d3e0..90d626d 100644
Binary files a/qemu/pc-bios/bios.bin and b/qemu/pc-bios/bios.bin differ
diff --git a/qemu/sysemu.h b/qemu/sysemu.h
index c728605..db0dda4 100644
--- a/qemu/sysemu.h
+++ b/qemu/sysemu.h
@@ -103,6 +103,8 @@ extern int autostart;
extern int old_param;
extern int hpagesize;
extern const char *bootp_filename;
+extern int reserved_ram;
+extern int64_t reserved[4];
#ifdef USE_KQEMU
diff --git a/qemu/vl.c b/qemu/vl.c
index 3570388..31adc90 100644
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -240,6 +240,8 @@ int time_drift_fix = 0;
unsigned int kvm_shadow_memory = 0;
const char *mem_path = NULL;
int hpagesize = 0;
+int reserved_ram = 0;
+int64_t reserved[4];
const char *cpu_vendor_string;
#ifdef TARGET_ARM
int old_param = 0;
@@ -8313,6 +8315,7 @@ enum {
QEMU_OPTION_tdf,
QEMU_OPTION_kvm_shadow_memory,
QEMU_OPTION_mempath,
+ QEMU_OPTION_reserved_ram,
};
typedef struct QEMUOption {
@@ -8439,6 +8442,7 @@ const QEMUOption qemu_options[] = {
{ "clock", HAS_ARG, QEMU_OPTION_clock },
{ "startdate", HAS_ARG, QEMU_OPTION_startdate },
{ "mem-path", HAS_ARG, QEMU_OPTION_mempath },
+ { "reserved-ram", 0, QEMU_OPTION_reserved_ram },
{ NULL },
};
@@ -8724,6 +8728,80 @@ static int gethugepagesize(void)
return hugepagesize;
}
+static int find_reserved_ram(int64_t *_start, int64_t *_end,
+ unsigned long below, unsigned long above,
+ unsigned long min_size)
+{
+ int ret, fd;
+ char buf[4096];
+ char *needle = "reserved RAM\n";
+// char *needle = "System RAM\n";
+ char *size, *curr;
+ int64_t start, end;
+
+ fd = open("/proc/iomem", O_RDONLY);
+ if (fd < 0) {
+ perror("open");
+ exit(0);
+ }
+
+ ret = read(fd, buf, sizeof(buf)-1);
+ if (ret < 0) {
+ perror("read");
+ exit(0);
+ }
+ buf[ret] = 0;
+
+ size = buf;
+ while (1) {
+ size = strstr(size, needle);
+ if (!size)
+ return 0;
+ size += strlen(needle);
+ curr = size - strlen(needle) - 20;
+ start = strtoll(curr, &curr, 16);
+ end = strtoll(curr+1, NULL, 16);
+ if ((!above || start >= above) && (!below || end <= below) &&
+ (!min_size || end-start >= min_size)) {
+ *_start = start;
+ *_end = end+1;
+ return 1;
+ }
+ }
+}
+
+static void init_reserved_ram(void)
+{
+ if (find_reserved_ram(&reserved[0], &reserved[1],
+ 640*1024, 0, 500*1024) &&
+ find_reserved_ram(&reserved[2], &reserved[3],
+ 0, 1024*1024, 1024*1024)) {
+ reserved_ram = 1;
+ if (reserved[0] != 4096) {
+ fprintf(stderr,
+ "strange host ram layout\n");
+ exit(1);
+ }
+ if (reserved[2] != 1024*1024) {
+ fprintf(stderr,
+ "strange host ram layout\n");
+ exit(1);
+ }
+ if (reserved[3] < ram_size) {
+ fprintf(stderr,
+ "not enough host reserved ram, decrease -m\n");
+ exit(1);
+ }
+ reserved[1] &= TARGET_PAGE_MASK;
+ //printf("reserved RAM %lx-%lx %lx-%lx\n",
+ // reserved[0], reserved[1], reserved[2], reserved[3]);
+ } else {
+ fprintf(stderr,
+ "host reserved ram not found\n");
+ exit(1);
+ }
+}
+
void *alloc_mem_area(unsigned long memory, const char *path)
{
char *filename;
@@ -8768,10 +8846,43 @@ void *qemu_alloc_physram(unsigned long memory)
{
void *area = NULL;
- if (mem_path)
+ if (!area && mem_path)
area = alloc_mem_area(memory, mem_path);
- if (!area)
+ if (!area) {
area = qemu_vmalloc(memory);
+ if (reserved_ram) {
+ int fd;
+ if (memory < reserved[2]) {
+ printf("memory < reserved[2]\n");
+ return NULL;
+ }
+ fd = open("/dev/mem", O_RDWR);
+ if (fd < 0) {
+ perror("reserved_ram requires access to /dev/mem");
+ return NULL;
+ }
+ if (mmap((char *)area+reserved[0],
+ reserved[1]-reserved[0],
+ PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FIXED,
+ fd, 0) == MAP_FAILED) {
+ perror("reserved_ram mmap failed on /dev/mem");
+ return NULL;
+ }
+ bzero((char *)area+reserved[0], reserved[1]-reserved[0]);
+ if (mmap((char *)area+reserved[2],
+ ram_size-reserved[2],
+ PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FIXED,
+ fd, reserved[2]) == MAP_FAILED) {
+ perror("reserved_ram mmap failed on /dev/mem");
+ return NULL;
+ }
+ bzero((char *)area+reserved[2], ram_size-reserved[2]);
+ if (close(fd) < 0) {
+ perror("/dev/mem");
+ return NULL;
+ }
+ }
+ }
return area;
}
@@ -9389,6 +9500,9 @@ int main(int argc, char **argv)
case QEMU_OPTION_mempath:
mem_path = optarg;
break;
+ case QEMU_OPTION_reserved_ram:
+ init_reserved_ram();
+ break;
case QEMU_OPTION_name:
qemu_name = optarg;
break;
-------------------------------------------------------------------------
Check out the new SourceForge.net Marketplace.
It's the best place to buy or sell services for
just about anything Open Source.
http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace
next prev parent reply other threads:[~2008-03-31 17:07 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-03-31 16:55 [0/3] -reserved-ram for PCI passthrough without VT-d and without paravirt Andrea Arcangeli
2008-03-31 17:02 ` [1/3] " Andrea Arcangeli
2008-03-31 17:07 ` Andrea Arcangeli [this message]
2008-03-31 17:20 ` [3/3] -reserved-ram for PCI passthrough without iommu " Andrea Arcangeli
2008-04-11 12:13 ` [0/3] -reserved-ram for PCI passthrough without VT-d " Amit Shah
2008-04-11 18:36 ` [0/3] -reserved-ram for PCI passthrough without VT-d and without?paravirt Andrea Arcangeli
2008-04-12 7:41 ` Amit Shah
2008-04-12 12:22 ` Andrea Arcangeli
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080331170751.GD12427@duo.random \
--to=andrea@qumranet.com \
--cc=kvm-devel@lists.sourceforge.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox