All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Elf loader fixes
@ 2006-02-22 11:37 Gerd Hoffmann
  2006-02-22 12:51 ` Jan Beulich
  0 siblings, 1 reply; 13+ messages in thread
From: Gerd Hoffmann @ 2006-02-22 11:37 UTC (permalink / raw)
  To: Xen devel list

[-- Attachment #1: Type: text/plain, Size: 671 bytes --]

  Hi folks,

The Xen ELF kernel loader is quite quirky wrt. physical and virtual
addresses, probably for historical reasons, linux got that wrong too
until very recently (kexec merge in 2.6.14 or so).  The patch below
fixes that.

Changes:
  * Fix linux kernel ELF entry point (also submitted to lkml)
  * Drop LOAD_OFFSET re-#define hack in xen headers.
  * Fix both dom0 and libxc elf loaders.
  * add quick mode so loading old linux kernels doesn't break.

Linux-wise everything should be OK with that, but it might break other
OS'es which also use the ELF loader (in case they create bug-compatible
ELF headers with broken paddr entries ...).

please apply,

  Gerd


[-- Attachment #2: load-offset-1.diff --]
[-- Type: text/x-patch, Size: 7443 bytes --]

diff -r 5abf652c4c52 linux-2.6-xen-sparse/arch/i386/kernel/vmlinux.lds.S
--- a/linux-2.6-xen-sparse/arch/i386/kernel/vmlinux.lds.S	Tue Feb 21 18:36:00 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/vmlinux.lds.S	Wed Feb 22 12:20:06 2006
@@ -10,7 +10,7 @@
 
 OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
 OUTPUT_ARCH(i386)
-ENTRY(phys_startup_32)
+ENTRY(startup_32)
 jiffies = jiffies_64;
 SECTIONS
 {
diff -r 5abf652c4c52 linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h	Tue Feb 21 18:36:00 2006
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h	Wed Feb 22 12:20:06 2006
@@ -288,10 +288,6 @@
 #endif
 #define __KERNEL_START		(__PAGE_OFFSET + __PHYSICAL_START)
 
-#undef LOAD_OFFSET
-#define LOAD_OFFSET		0
-
-
 #define PAGE_OFFSET		((unsigned long)__PAGE_OFFSET)
 #define VMALLOC_RESERVE		((unsigned long)__VMALLOC_RESERVE)
 #define MAXMEM			(HYPERVISOR_VIRT_START-__PAGE_OFFSET-__VMALLOC_RESERVE)
diff -r 5abf652c4c52 tools/libxc/xc_load_elf.c
--- a/tools/libxc/xc_load_elf.c	Tue Feb 21 18:36:00 2006
+++ b/tools/libxc/xc_load_elf.c	Wed Feb 22 12:20:06 2006
@@ -138,10 +138,10 @@
         phdr = (Elf_Phdr *)(image + ehdr->e_phoff + (h*ehdr->e_phentsize));
         if ( !is_loadable_phdr(phdr) )
             continue;
-        if ( phdr->p_paddr < kernstart )
-            kernstart = phdr->p_paddr;
-        if ( (phdr->p_paddr + phdr->p_memsz) > kernend )
-            kernend = phdr->p_paddr + phdr->p_memsz;
+        if ( phdr->p_vaddr < kernstart )
+            kernstart = phdr->p_vaddr;
+        if ( (phdr->p_vaddr + phdr->p_memsz) > kernend )
+            kernend = phdr->p_vaddr + phdr->p_memsz;
     }
 
     if ( (kernstart > kernend) || 
@@ -189,7 +189,18 @@
         
         for ( done = 0; done < phdr->p_filesz; done += chunksz )
         {
-            pa = (phdr->p_paddr + done) - dsi->v_start;
+            if (phdr->p_paddr == phdr->p_vaddr) {
+                /*
+                 * Bug compatibility alert: In older linux kernels
+                 * p_paddr is broken, it doesn't contain the physical
+                 * address but instead is identical to p_vaddr.  Thus
+                 * we can't use it directly, instead we'll guess it
+                 * using dsi->v_start.
+                 */
+                pa = (phdr->p_vaddr + done) - dsi->v_start;
+            } else {
+                pa = (phdr->p_paddr + done);
+            }
             va = xc_map_foreign_range(
                 xch, dom, PAGE_SIZE, PROT_WRITE, parray[pa>>PAGE_SHIFT]);
             chunksz = phdr->p_filesz - done;
@@ -202,7 +213,12 @@
 
         for ( ; done < phdr->p_memsz; done += chunksz )
         {
-            pa = (phdr->p_paddr + done) - dsi->v_start;
+            if (phdr->p_paddr == phdr->p_vaddr) {
+                /* bug compatibility alert, see above */
+                pa = (phdr->p_vaddr + done) - dsi->v_start;
+            } else {
+                pa = (phdr->p_paddr + done);
+            }
             va = xc_map_foreign_range(
                 xch, dom, PAGE_SIZE, PROT_WRITE, parray[pa>>PAGE_SHIFT]);
             chunksz = phdr->p_memsz - done;
diff -r 5abf652c4c52 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c	Tue Feb 21 18:36:00 2006
+++ b/xen/arch/x86/domain.c	Wed Feb 22 12:20:06 2006
@@ -346,7 +346,7 @@
     struct vcpu *v, struct vcpu_guest_context *c)
 {
     struct domain *d = v->domain;
-    unsigned long phys_basetab;
+    unsigned long phys_basetab = 0;
     int i, rc;
 
     /*
diff -r 5abf652c4c52 xen/common/elf.c
--- a/xen/common/elf.c	Tue Feb 21 18:36:00 2006
+++ b/xen/common/elf.c	Wed Feb 22 12:20:06 2006
@@ -23,7 +23,8 @@
     Elf_Ehdr *ehdr = (Elf_Ehdr *)dsi->image_addr;
     Elf_Phdr *phdr;
     Elf_Shdr *shdr;
-    unsigned long kernstart = ~0UL, kernend=0UL;
+    unsigned long v_kernstart = ~0UL, v_kernend=0UL;
+    unsigned long p_kernstart = ~0UL, p_kernend=0UL;
     char *shstrtab, *guestinfo=NULL, *p;
     char *elfbase = (char *)dsi->image_addr;
     int h;
@@ -87,21 +88,31 @@
         phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
         if ( !is_loadable_phdr(phdr) )
             continue;
-        if ( phdr->p_paddr < kernstart )
-            kernstart = phdr->p_paddr;
-        if ( (phdr->p_paddr + phdr->p_memsz) > kernend )
-            kernend = phdr->p_paddr + phdr->p_memsz;
-    }
-
-    if ( (kernstart > kernend) || 
-         (ehdr->e_entry < kernstart) || 
-         (ehdr->e_entry > kernend) )
+        printk("%s: phdr: vaddr %08lx paddr %08lx filesz %08lx\n",
+               __FUNCTION__,
+               (unsigned long)phdr->p_vaddr,
+               (unsigned long)phdr->p_paddr,
+               (unsigned long)phdr->p_filesz);
+        if ( phdr->p_vaddr < v_kernstart )
+            v_kernstart = phdr->p_vaddr;
+        if ( (phdr->p_vaddr + phdr->p_memsz) > v_kernend )
+            v_kernend = phdr->p_vaddr + phdr->p_memsz;
+        if ( phdr->p_paddr < p_kernstart )
+            p_kernstart = phdr->p_paddr;
+        if ( (phdr->p_paddr + phdr->p_memsz) > p_kernend )
+            p_kernend = phdr->p_paddr + phdr->p_memsz;
+    }
+
+    if ( (v_kernstart > v_kernend) || 
+         (p_kernstart > p_kernend) || 
+         (ehdr->e_entry < v_kernstart) || 
+         (ehdr->e_entry > v_kernend) )
     {
         printk("Malformed ELF image.\n");
         return -EINVAL;
     }
 
-    dsi->v_start = kernstart;
+    dsi->v_start = v_kernstart;
 
     if ( guestinfo != NULL )
     {
@@ -112,10 +123,26 @@
             dsi->load_symtab = 1;
     }
 
-    dsi->v_kernstart = kernstart;
-    dsi->v_kernend   = kernend;
+    dsi->v_kernstart = v_kernstart;
+    dsi->v_kernend   = v_kernend;
     dsi->v_kernentry = ehdr->e_entry;
     dsi->v_end       = dsi->v_kernend;
+
+    if (p_kernstart == v_kernstart) {
+        /*
+         * Bug compatibility alert: In older linux kernels
+         * p_paddr is broken, it doesn't contain the physical
+         * address but instead is identical to p_vaddr.  Thus
+         * we can't use it directly, instead we'll guess it
+         * using dsi->v_start.
+         */
+        printk("%s: linux kernel paddr quirk\n", __FUNCTION__);
+        dsi->p_kernstart = v_kernstart - dsi->v_start;
+        dsi->p_kernend   = v_kernend   - dsi->v_start;
+    } else {
+        dsi->p_kernstart = p_kernstart;
+        dsi->p_kernend   = p_kernend;
+    }
 
     loadelfsymtab(dsi, 0);
 
@@ -135,10 +162,10 @@
         if ( !is_loadable_phdr(phdr) )
             continue;
         if ( phdr->p_filesz != 0 )
-            memcpy((char *)phdr->p_paddr, elfbase + phdr->p_offset, 
+            memcpy((char *)phdr->p_vaddr, elfbase + phdr->p_offset, 
                    phdr->p_filesz);
         if ( phdr->p_memsz > phdr->p_filesz )
-            memset((char *)phdr->p_paddr + phdr->p_filesz, 0, 
+            memset((char *)phdr->p_vaddr + phdr->p_filesz, 0, 
                    phdr->p_memsz - phdr->p_filesz);
     }
 
diff -r 5abf652c4c52 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h	Tue Feb 21 18:36:00 2006
+++ b/xen/include/xen/sched.h	Wed Feb 22 12:20:06 2006
@@ -165,6 +165,8 @@
     unsigned long v_end;
     unsigned long v_kernstart;
     unsigned long v_kernend;
+    unsigned long p_kernstart;
+    unsigned long p_kernend;
     unsigned long v_kernentry;
     /* Initialised by loader: Private. */
     unsigned int  load_symtab;

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Elf loader fixes
  2006-02-22 11:37 [PATCH] Elf loader fixes Gerd Hoffmann
@ 2006-02-22 12:51 ` Jan Beulich
  2006-02-22 13:33   ` Gerd Hoffmann
  2006-02-22 15:12   ` Gerd Hoffmann
  0 siblings, 2 replies; 13+ messages in thread
From: Jan Beulich @ 2006-02-22 12:51 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: Xen devel list

I'm afraid this won't work for 64-bits, because of the way the vsyscall page gets set up there. If you dump the program
headers you'll see that the respective segment is at an address close to the top of the address space, which would make
the image appear to need nearly 2 Gb of memory (making the load fail if there's less than that available to the
domain).

Also, I'm unclear why you need to make persistent the physical address information for the dom0 case, and hence why the
bug compatibility hack is needed there.

Jan

>>> Gerd Hoffmann <kraxel@suse.de> 22.02.06 12:37:26 >>>
Hi folks,

The Xen ELF kernel loader is quite quirky wrt. physical and virtual
addresses, probably for historical reasons, linux got that wrong too
until very recently (kexec merge in 2.6.14 or so).  The patch below
fixes that.

Changes:
  * Fix linux kernel ELF entry point (also submitted to lkml)
  * Drop LOAD_OFFSET re-#define hack in xen headers.
  * Fix both dom0 and libxc elf loaders.
  * add quick mode so loading old linux kernels doesn't break.

Linux-wise everything should be OK with that, but it might break other
OS'es which also use the ELF loader (in case they create bug-compatible
ELF headers with broken paddr entries ...).

please apply,

  Gerd

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Elf loader fixes
  2006-02-22 12:51 ` Jan Beulich
@ 2006-02-22 13:33   ` Gerd Hoffmann
  2006-02-22 15:12   ` Gerd Hoffmann
  1 sibling, 0 replies; 13+ messages in thread
From: Gerd Hoffmann @ 2006-02-22 13:33 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Xen devel list

Jan Beulich wrote:
> I'm afraid this won't work for 64-bits, because of the way the vsyscall page gets set up there. If you dump the program
> headers you'll see that the respective segment is at an address close to the top of the address space, which would make
> the image appear to need nearly 2 Gb of memory (making the load fail if there's less than that available to the
> domain).

Yep, will not work work as-is, the domain builder needs more fixes for that.

> Also, I'm unclear why you need to make persistent the physical address information for the dom0 case, and hence why the
> bug compatibility hack is needed there.

Well, the libxc loader needs it to be able to load both old kernels and
kernels with correct physical addresses in the phdr.

The dom0 loader doesn't really need it, at least on x32, right now it
doesn't look at the physical addresses anyway.  Instead it depends on
the VIRT_BASE hack to get it right.  It's sort of documentation only ;)

For x64 it will certainly be needed to get the memory size calculations
right (use physical not virtual addresses) and fix the issue you've
outlined above that way.

cheers,

  Gerd

-- 
Gerd 'just married' Hoffmann <kraxel@suse.de>
I'm the hacker formerly known as Gerd Knorr.
http://www.suse.de/~kraxel/just-married.jpeg

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Elf loader fixes
  2006-02-22 12:51 ` Jan Beulich
  2006-02-22 13:33   ` Gerd Hoffmann
@ 2006-02-22 15:12   ` Gerd Hoffmann
  2006-02-22 15:30     ` Jan Beulich
  1 sibling, 1 reply; 13+ messages in thread
From: Gerd Hoffmann @ 2006-02-22 15:12 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Xen devel list

Jan Beulich wrote:
> I'm afraid this won't work for 64-bits, because of the way the vsyscall page gets set up there. If you dump the program
> headers you'll see that the respective segment is at an address close to the top of the address space, which would make
> the image appear to need nearly 2 Gb of memory (making the load fail if there's less than that available to the
> domain).

Hmm, who sets up the vsyscall mapping right now?  The linux kernel I
guess?  The elf kernel loader certainly doesn't look like it cares much
about virtual address (other than VIRT_BASE) ...

cheers,

  Gerd

-- 
Gerd 'just married' Hoffmann <kraxel@suse.de>
I'm the hacker formerly known as Gerd Knorr.
http://www.suse.de/~kraxel/just-married.jpeg

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Elf loader fixes
  2006-02-22 15:12   ` Gerd Hoffmann
@ 2006-02-22 15:30     ` Jan Beulich
  2006-02-22 16:10       ` Gerd Hoffmann
  0 siblings, 1 reply; 13+ messages in thread
From: Jan Beulich @ 2006-02-22 15:30 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: Xen devel list

>>> Gerd Hoffmann <kraxel@suse.de> 22.02.06 16:12:25 >>>
>Jan Beulich wrote:
>> I'm afraid this won't work for 64-bits, because of the way the vsyscall page gets set up there. If you dump the
program
>> headers you'll see that the respective segment is at an address close to the top of the address space, which would
make
>> the image appear to need nearly 2 Gb of memory (making the load fail if there's less than that available to the
>> domain).
>
>Hmm, who sets up the vsyscall mapping right now?  The linux kernel I
>guess?

Yes.

>The elf kernel loader certainly doesn't look like it cares much
>about virtual address (other than VIRT_BASE) ...

Correct. And the same applies to boot loaders.

Jan

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Elf loader fixes
  2006-02-22 15:30     ` Jan Beulich
@ 2006-02-22 16:10       ` Gerd Hoffmann
  2006-02-22 16:11         ` Ronald G Minnich
  2006-02-22 16:25         ` Jacob Gorm Hansen
  0 siblings, 2 replies; 13+ messages in thread
From: Gerd Hoffmann @ 2006-02-22 16:10 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Xen devel list

>> The elf kernel loader certainly doesn't look like it cares much
>> about virtual address (other than VIRT_BASE) ...
> 
> Correct. And the same applies to boot loaders.

Well, the difference between xen and other boot loaders is that xen
boots the kernel with paging already enabled ...

Right now it's completely broken: the loader works only with virtual
addresses in the paddr header field.  It can't stay that way, especially
if we'll want to have kexec working with xenlinux kernels some day ;)

As I see things there are basically two ways to fixup this: Either
create a simple linear mapping using paddr + VIRT_BASE, which would be
pretty close to the current behaviour (and other boot loaders). Or do a
complete virtual memory setup, which probably can't be done without
major changes in the domain builder ...

cheers,

  Gerd

-- 
Gerd 'just married' Hoffmann <kraxel@suse.de>
I'm the hacker formerly known as Gerd Knorr.
http://www.suse.de/~kraxel/just-married.jpeg

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Elf loader fixes
  2006-02-22 16:10       ` Gerd Hoffmann
@ 2006-02-22 16:11         ` Ronald G Minnich
  2006-02-22 16:25         ` Jacob Gorm Hansen
  1 sibling, 0 replies; 13+ messages in thread
From: Ronald G Minnich @ 2006-02-22 16:11 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: Xen devel list, Jan Beulich

Gerd Hoffmann wrote:
>>>The elf kernel loader certainly doesn't look like it cares much
>>>about virtual address (other than VIRT_BASE) ...
>>
>>Correct. And the same applies to boot loaders.
> 
> 
> Well, the difference between xen and other boot loaders is that xen
> boots the kernel with paging already enabled ...

actually, there have historically been a ton of boot loaders that boot 
with paging already enabled: just go back to the early Suns; part of the 
task of SunOS was to move the firmware's page tables to kernel space and 
use them until it could build its own.

I hit this when I ported sunos to a machine with firmware that did not 
set up page tables.

it's been very common, in the non-PC world, to have paging turned on 
very early.

Just a historical comment of no value to this discussion :-)

ron

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Elf loader fixes
  2006-02-22 16:10       ` Gerd Hoffmann
  2006-02-22 16:11         ` Ronald G Minnich
@ 2006-02-22 16:25         ` Jacob Gorm Hansen
  1 sibling, 0 replies; 13+ messages in thread
From: Jacob Gorm Hansen @ 2006-02-22 16:25 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: Xen devel list, Jan Beulich

On 2/22/06, Gerd Hoffmann <kraxel@suse.de> wrote:

> As I see things there are basically two ways to fixup this: Either
> create a simple linear mapping using paddr + VIRT_BASE, which would be
> pretty close to the current behaviour (and other boot loaders). Or do a
> complete virtual memory setup, which probably can't be done without
> major changes in the domain builder ...

hi,

as previously mentioned here and at the XenSummit, I have a simple yet
functioning domU bootloader which defers ELF-decoding to domU-space.
It currently only works when the input is a ramdisk image, created
with a small tool called 'pack. It used to work from a TCP-connection
(provided by the fabulous UIP which is still in my tree), hence the
weird state-machine look of the code.

I suppose taking this approach could solve all our domU-building woes
for good. It should also provide a more elegant solution to the
attestation problems the Intel guys were talking about and trying to
solve with their two-stage domain building proposal.

Clone my tree at http://www.distlab.dk/hg/index.cgi/xen-gfx.hg and
have a look at the extras/mstrap subdir. The 'pack' tool is in
tools/migrate.

You will need Jam to build, or you have to roll your own Makefile. The
tree also contains my very experimental 'Blink' display system as
demoed at the Summit in tools/gfx, and my self-migration patch to
XenLinux which at the moment is mostly useful for self-checkpointing
to disk (see tools/minimig.c). Your mileage may vary.

Jacob

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Elf loader fixes
       [not found] <E1F6x4v-0001lS-2n@host-192-168-0-1-bcn-london>
@ 2006-02-22 18:17 ` Joe Bonasera
  2006-02-23 11:15   ` Gerd Hoffmann
  0 siblings, 1 reply; 13+ messages in thread
From: Joe Bonasera @ 2006-02-22 18:17 UTC (permalink / raw)
  To: xen-devel


This is a good start as the PHYS vs. VIRT stuff in the ELF loader
is all a bit overloaded. However, I believe these changes aren't
quite complete and for example would break the released OpenSolaris Xen
client.  It has multiple PT_LOAD sections in the Elf file,
some with p_vaddr == p_paddr on purpose and some which don't. We rely on a
"boot loader" (ie grub or domain builder) that only cares about p_paddr.
The identity mapped PT_LOAD section contains the OS entry point and
has the code to remap the other of the sections to the final VA by
creating/installing new page table entries.

For example, I think the xc_load_elf.c change:

@@ -189,7 +189,18 @@

          for ( done = 0; done < phdr->p_filesz; done += chunksz )
          {
-            pa = (phdr->p_paddr + done) - dsi->v_start;
+            if (phdr->p_paddr == phdr->p_vaddr) {
+                /*
+                 * Bug compatibility alert: In older linux kernels
+                 * p_paddr is broken, it doesn't contain the physical
+                 * address but instead is identical to p_vaddr.  Thus
+                 * we can't use it directly, instead we'll guess it
+                 * using dsi->v_start.
+                 */
+                pa = (phdr->p_vaddr + done) - dsi->v_start;
+            } else {
+                pa = (phdr->p_paddr + done);
+            }
              va = xc_map_foreign_range(
                  xch, dom, PAGE_SIZE, PROT_WRITE, parray[pa>>PAGE_SHIFT]);
              chunksz = phdr->p_filesz - done;

needs to have the line:
	pa = (phdr->p_paddr + done);
be more like:
	pa = (phdr->p_paddr + done) - kernstart;

or better yet add a dsi->p_start and dsi->p_end to use. The same
applies to your change to xen/common/elf.c for dom0.

To save you downloading OpenSolaris. Here's sample values from
the domU/dom0 ELF image:

In the xenguest section we currently specify VIRT_BASE=0x40000000, as
there was no PHYS_BASE=. In the flavor of your other changes, I'd expect
you could add PHYS_BASE= and OpenSolaris would change to use that.

   e_entry:            0x40800000

Program Header[0]:
     p_vaddr:      0x40800000      p_flags:    [ PF_X  PF_W  PF_R ]
     p_paddr:      0x40800000      p_type:     [ PT_LOAD ]
     p_filesz:     0xe95c          p_memsz:    0xe95c
     p_offset:     0xd4            p_align:    0

Program Header[1]:
     p_vaddr:      0xfb400000      p_flags:    [ PF_X  PF_R ]
     p_paddr:      0x40000000      p_type:     [ PT_LOAD ]
     p_filesz:     0x2aa362        p_memsz:    0x2aa362
     p_offset:     0xea40          p_align:    0

Program Header[2]:
     p_vaddr:      0xfb800000      p_flags:    [ PF_X  PF_W  PF_R ]
     p_paddr:      0x40400000      p_type:     [ PT_LOAD ]
     p_filesz:     0x16515         p_memsz:    0x94a44
     p_offset:     0x2b8dc0        p_align:    0

Here's sample values we use for the 64 bit Xen OS image:

   e_entry:            0x40800000

Program Header[0]:
     p_vaddr:      0x40800000      p_flags:    [ PF_X  PF_W  PF_R ]
     p_paddr:      0x40800000      p_type:     [ PT_LOAD ]
     p_filesz:     0xed28          p_memsz:    0xed28
     p_offset:     0x190           p_align:    0

Program Header[1]:
     p_vaddr:      0xfffffffffb800000  p_flags:    [ PF_X  PF_R ]
     p_paddr:      0x40000000      p_type:     [ PT_LOAD ]
     p_filesz:     0x39adca        p_memsz:    0x39adca
     p_offset:     0xeec0          p_align:    0

Program Header[2]:
     p_vaddr:      0xfffffffffbc00000  p_flags:    [ PF_X  PF_W  PF_R ]
     p_paddr:      0x40400000      p_type:     [ PT_LOAD ]
     p_filesz:     0x20fe9         p_memsz:    0xe36c0
     p_offset:     0x3a9cc0        p_align:    0


Joe

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Elf loader fixes
  2006-02-22 18:17 ` Joe Bonasera
@ 2006-02-23 11:15   ` Gerd Hoffmann
  2006-03-01  9:48     ` Christian Limpach
  0 siblings, 1 reply; 13+ messages in thread
From: Gerd Hoffmann @ 2006-02-23 11:15 UTC (permalink / raw)
  To: Joe Bonasera; +Cc: xen-devel

[-- Attachment #1: Type: text/plain, Size: 798 bytes --]

> some with p_vaddr == p_paddr on purpose and some which don't. We rely on a
> "boot loader" (ie grub or domain builder) that only cares about p_paddr.

Same goes for the x86_64 linux kernel with the vsyscall page ...

I've settled for a slightly different approach now.  To keep behaviour
as close as possible to classic i386 boot loaders I'll check paddr only
and use the virt_base value from the __xen_guest section to shift the
addresses.  For bug compatibility with old linux kernels I compare paddr
+ virt_base.  New patch below, this time tested both 32 and 64 bit
(linux only though), I think it should be ok for OpenSolaris too ;)

cheers,

  Gerd

-- 
Gerd 'just married' Hoffmann <kraxel@suse.de>
I'm the hacker formerly known as Gerd Knorr.
http://www.suse.de/~kraxel/just-married.jpeg

[-- Attachment #2: load-offset-4.diff --]
[-- Type: text/x-patch, Size: 8522 bytes --]

diff -r 175ad739d8bc linux-2.6-xen-sparse/arch/i386/kernel/vmlinux.lds.S
--- a/linux-2.6-xen-sparse/arch/i386/kernel/vmlinux.lds.S	Wed Feb 22 20:52:30 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/vmlinux.lds.S	Thu Feb 23 10:31:01 2006
@@ -10,7 +10,7 @@
 
 OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
 OUTPUT_ARCH(i386)
-ENTRY(phys_startup_32)
+ENTRY(startup_32)
 jiffies = jiffies_64;
 SECTIONS
 {
diff -r 175ad739d8bc linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h	Wed Feb 22 20:52:30 2006
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h	Thu Feb 23 10:31:01 2006
@@ -288,10 +288,6 @@
 #endif
 #define __KERNEL_START		(__PAGE_OFFSET + __PHYSICAL_START)
 
-#undef LOAD_OFFSET
-#define LOAD_OFFSET		0
-
-
 #define PAGE_OFFSET		((unsigned long)__PAGE_OFFSET)
 #define VMALLOC_RESERVE		((unsigned long)__VMALLOC_RESERVE)
 #define MAXMEM			(HYPERVISOR_VIRT_START-__PAGE_OFFSET-__VMALLOC_RESERVE)
diff -r 175ad739d8bc linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h	Wed Feb 22 20:52:30 2006
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h	Thu Feb 23 10:31:01 2006
@@ -259,9 +259,6 @@
 #define __PAGE_OFFSET           0xffff880000000000
 #endif /* !__ASSEMBLY__ */
 
-#undef LOAD_OFFSET
-#define LOAD_OFFSET		0
-
 /* to align the pointer to the (next) page boundary */
 #define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
 
diff -r 175ad739d8bc tools/libxc/xc_load_elf.c
--- a/tools/libxc/xc_load_elf.c	Wed Feb 22 20:52:30 2006
+++ b/tools/libxc/xc_load_elf.c	Thu Feb 23 10:31:01 2006
@@ -60,6 +60,7 @@
     Elf_Phdr *phdr;
     Elf_Shdr *shdr;
     unsigned long kernstart = ~0UL, kernend=0UL;
+    unsigned long sstart, send;
     char *shstrtab, *guestinfo=NULL, *p;
     int h;
 
@@ -117,6 +118,8 @@
         }
         if ( (strstr(guestinfo, "PAE=yes") != NULL) )
             dsi->pae_kernel = 1;
+        if ( (p = strstr(guestinfo, "VIRT_BASE=")) != NULL )
+            dsi->virt_base = strtoul(p+10, &p, 0);
 
         break;
     }
@@ -138,10 +141,27 @@
         phdr = (Elf_Phdr *)(image + ehdr->e_phoff + (h*ehdr->e_phentsize));
         if ( !is_loadable_phdr(phdr) )
             continue;
-        if ( phdr->p_paddr < kernstart )
-            kernstart = phdr->p_paddr;
-        if ( (phdr->p_paddr + phdr->p_memsz) > kernend )
-            kernend = phdr->p_paddr + phdr->p_memsz;
+        sstart = phdr->p_paddr;
+        send   = phdr->p_paddr + phdr->p_memsz;
+        /*
+         * bug comparibility alert: old linux kernels used to have
+         * virtual addresses in the paddr headers, whereas newer ones
+         * (since kexec merge, around 2.6.14) correctly use physical
+         * addresses.
+         *
+         * As we want to be able to boot both kinds of kernels we'll
+         * do some guesswork here: If paddr is greater than virt_base
+         * we assume it is a old kernel and use it as-is.  Otherwise
+         * we'll add virt_base to get the correct address.
+         */
+        if (sstart < dsi->virt_base) {
+            sstart += dsi->virt_base;
+            send   += dsi->virt_base;
+        }
+        if ( sstart < kernstart )
+            kernstart = sstart;
+        if ( send > kernend )
+            kernend = send;
     }
 
     if ( (kernstart > kernend) || 
@@ -189,7 +209,11 @@
         
         for ( done = 0; done < phdr->p_filesz; done += chunksz )
         {
-            pa = (phdr->p_paddr + done) - dsi->v_start;
+            /* bug compatibility alert, see above */
+            pa = phdr->p_paddr + done;
+            if (pa > dsi->virt_base)
+                pa -= dsi->virt_base;
+
             va = xc_map_foreign_range(
                 xch, dom, PAGE_SIZE, PROT_WRITE, parray[pa>>PAGE_SHIFT]);
             chunksz = phdr->p_filesz - done;
@@ -202,7 +226,11 @@
 
         for ( ; done < phdr->p_memsz; done += chunksz )
         {
-            pa = (phdr->p_paddr + done) - dsi->v_start;
+            /* bug compatibility alert, see above */
+            pa = phdr->p_paddr + done;
+            if (pa > dsi->virt_base)
+                pa -= dsi->virt_base;
+
             va = xc_map_foreign_range(
                 xch, dom, PAGE_SIZE, PROT_WRITE, parray[pa>>PAGE_SHIFT]);
             chunksz = phdr->p_memsz - done;
diff -r 175ad739d8bc tools/libxc/xg_private.h
--- a/tools/libxc/xg_private.h	Wed Feb 22 20:52:30 2006
+++ b/tools/libxc/xg_private.h	Thu Feb 23 10:31:01 2006
@@ -131,6 +131,7 @@
     unsigned long v_kernstart;
     unsigned long v_kernend;
     unsigned long v_kernentry;
+    unsigned long virt_base;
 
     unsigned int  load_symtab;
     unsigned int  pae_kernel;
diff -r 175ad739d8bc xen/common/elf.c
--- a/xen/common/elf.c	Wed Feb 22 20:52:30 2006
+++ b/xen/common/elf.c	Thu Feb 23 10:31:01 2006
@@ -24,6 +24,7 @@
     Elf_Phdr *phdr;
     Elf_Shdr *shdr;
     unsigned long kernstart = ~0UL, kernend=0UL;
+    unsigned long sstart, send;
     char *shstrtab, *guestinfo=NULL, *p;
     char *elfbase = (char *)dsi->image_addr;
     int h;
@@ -77,6 +78,8 @@
             return -EINVAL;
         }
 
+        if ( (p = strstr(guestinfo, "VIRT_BASE=")) != NULL )
+            dsi->virt_base = simple_strtoul(p+10, &p, 0);
         break;
     }
 
@@ -87,11 +90,38 @@
         phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
         if ( !is_loadable_phdr(phdr) )
             continue;
-        if ( phdr->p_paddr < kernstart )
-            kernstart = phdr->p_paddr;
-        if ( (phdr->p_paddr + phdr->p_memsz) > kernend )
-            kernend = phdr->p_paddr + phdr->p_memsz;
-    }
+        sstart = phdr->p_paddr;
+        send   = phdr->p_paddr + phdr->p_memsz;
+        /*
+         * bug comparibility alert: old linux kernels used to have
+         * virtual addresses in the paddr headers, whereas newer ones
+         * (since kexec merge, around 2.6.14) correctly use physical
+         * addresses.
+         *
+         * As we want to be able to boot both kinds of kernels we'll
+         * do some guesswork here: If paddr is greater than virt_base
+         * we assume it is a old kernel and use it as-is.  Otherwise
+         * we'll add virt_base to get the correct address.
+         */
+        if (sstart < dsi->virt_base) {
+            sstart += dsi->virt_base;
+            send   += dsi->virt_base;
+        }
+        printk("%s: program hdr: %08lx (=vaddr)  "
+               "paddr: %08lx  filesz: %08lx  memsz: %08lx  =>  %08lx-%08lx\n",
+               __FUNCTION__,
+               (unsigned long)phdr->p_vaddr,
+               (unsigned long)phdr->p_paddr,
+               (unsigned long)phdr->p_filesz,
+               (unsigned long)phdr->p_memsz,
+               sstart, send);
+        if ( sstart < kernstart )
+            kernstart = sstart;
+        if ( send > kernend )
+            kernend = send;
+    }
+    printk("%s: entry point: %08lx\n", __FUNCTION__,
+           (unsigned long)ehdr->e_entry);
 
     if ( (kernstart > kernend) || 
          (ehdr->e_entry < kernstart) || 
@@ -127,6 +157,7 @@
     char *elfbase = (char *)dsi->image_addr;
     Elf_Ehdr *ehdr = (Elf_Ehdr *)dsi->image_addr;
     Elf_Phdr *phdr;
+    unsigned long vaddr;
     int h;
   
     for ( h = 0; h < ehdr->e_phnum; h++ ) 
@@ -134,11 +165,15 @@
         phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
         if ( !is_loadable_phdr(phdr) )
             continue;
+        vaddr = phdr->p_paddr;
+        if (vaddr < dsi->virt_base)
+            vaddr += dsi->virt_base;
         if ( phdr->p_filesz != 0 )
-            memcpy((char *)phdr->p_paddr, elfbase + phdr->p_offset, 
+            memcpy((char *)vaddr,
+                   elfbase + phdr->p_offset, 
                    phdr->p_filesz);
         if ( phdr->p_memsz > phdr->p_filesz )
-            memset((char *)phdr->p_paddr + phdr->p_filesz, 0, 
+            memset((char *)phdr->p_vaddr + phdr->p_filesz, 0, 
                    phdr->p_memsz - phdr->p_filesz);
     }
 
diff -r 175ad739d8bc xen/include/xen/sched.h
--- a/xen/include/xen/sched.h	Wed Feb 22 20:52:30 2006
+++ b/xen/include/xen/sched.h	Thu Feb 23 10:31:01 2006
@@ -166,6 +166,7 @@
     unsigned long v_kernstart;
     unsigned long v_kernend;
     unsigned long v_kernentry;
+    unsigned long virt_base;
     /* Initialised by loader: Private. */
     unsigned int  load_symtab;
     unsigned long symtab_addr;

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Elf loader fixes
  2006-02-23 11:15   ` Gerd Hoffmann
@ 2006-03-01  9:48     ` Christian Limpach
  2006-03-01 15:00       ` Gerd Hoffmann
  2006-03-06 13:40       ` Gerd Hoffmann
  0 siblings, 2 replies; 13+ messages in thread
From: Christian Limpach @ 2006-03-01  9:48 UTC (permalink / raw)
  To: Gerd Hoffmann; +Cc: xen-devel, Joe Bonasera

On 2/23/06, Gerd Hoffmann <kraxel@suse.de> wrote:
> > some with p_vaddr == p_paddr on purpose and some which don't. We rely on a
> > "boot loader" (ie grub or domain builder) that only cares about p_paddr.
>
> Same goes for the x86_64 linux kernel with the vsyscall page ...
>
> I've settled for a slightly different approach now.  To keep behaviour
> as close as possible to classic i386 boot loaders I'll check paddr only
> and use the virt_base value from the __xen_guest section to shift the
> addresses.  For bug compatibility with old linux kernels I compare paddr
> + virt_base.  New patch below, this time tested both 32 and 64 bit
> (linux only though), I think it should be ok for OpenSolaris too ;)

I have two issues with your latest patch:
- the heuristics you use to distinguish between "old" and "new" kernel
images, especially the case where a valid "new" kernel image could be
mistaken for an "old" image.
- the change to the entry point, now entry point and the elf header
paddr fields are in different address modes.

We have the current combination of VIRT_BASE and elf header paddr
fields containing virtual addresses because:
- we want to be able to specify where the virtual address space mapped
by the initial pagetables starts, this is why we have VIRT_BASE
- since we always run in virtual address mode, we thought that the
loader should also do so, thus it will load the image to a virtual
address - this seems to be in line with how other loaders would read
the elf header paddr fields, i.e. use them as load addresses.

If we really need to get rid of how we change LOAD_OFFSET in Linux,
how about this:
we add another entry to the __xen_guest header, PHYS_OFFSET=0 and then
substract PHYS_OFFSET from all elf header paddr fiels to turn them
into physical addresses, respectively, resp. add VIRT_BASE-PHYS_OFFSET
to turn them into virtual addresses.  We default PHYS_OFFSET to
VIRT_BASE if it's not present in the __xen_guest header.

     christian

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Elf loader fixes
  2006-03-01  9:48     ` Christian Limpach
@ 2006-03-01 15:00       ` Gerd Hoffmann
  2006-03-06 13:40       ` Gerd Hoffmann
  1 sibling, 0 replies; 13+ messages in thread
From: Gerd Hoffmann @ 2006-03-01 15:00 UTC (permalink / raw)
  To: Christian.Limpach; +Cc: xen-devel, Joe Bonasera

  Hi,

> I have two issues with your latest patch:
> - the heuristics you use to distinguish between "old" and "new" kernel
> images, especially the case where a valid "new" kernel image could be
> mistaken for an "old" image.

I don't think it can ever happen, at least not with linux.  The
(correct) paddr addresses must be relatively small, otherwise loading
the kernel on machines with a small amount of memory will not work.
Especially they must be much smaller than the usual linux kernel VIRT_BASE.

Not sure about other OS'es, maybe there are some which use a small
VIRT_BASE, then it could be problematic, yes.

> - the change to the entry point, now entry point and the elf header
> paddr fields are in different address modes.

xen/include/xen/elf.h says entry point is virtual.  IMHO the elf headers
should be correctly filled.

> - we want to be able to specify where the virtual address space mapped
> by the initial pagetables starts, this is why we have VIRT_BASE

Sure.

> - since we always run in virtual address mode, we thought that the
> loader should also do so, thus it will load the image to a virtual
> address - this seems to be in line with how other loaders would read
> the elf header paddr fields, i.e. use them as load addresses.

Well, loadelfimage() in tools/libxc/xc_load_elf.c copyes the image
page-by-page to the _physical_ addresses.  And this is how it should be
IMHO.  virt_base is only used to create the initial virtual mappings as
expected by the guest kernel (and for bug-compatibility with old kernels).

The dom0 builder can take a shortcut and simply copy the big blobs to
virtual addresses (paddr + virt_base).  That works because the initial
virtual mapping is a simple offset to the physical address and the
mappings are already created and active at that point.

> If we really need to get rid of how we change LOAD_OFFSET in Linux,
> how about this:
> we add another entry to the __xen_guest header, PHYS_OFFSET=0 and then
> substract PHYS_OFFSET from all elf header paddr fiels to turn them
> into physical addresses, respectively, resp. add VIRT_BASE-PHYS_OFFSET
> to turn them into virtual addresses.  We default PHYS_OFFSET to
> VIRT_BASE if it's not present in the __xen_guest header.

I don't think the new PHYS_OFFSET is needed.

cheers,

  Gerd

-- 
Gerd 'just married' Hoffmann <kraxel@suse.de>
I'm the hacker formerly known as Gerd Knorr.
http://www.suse.de/~kraxel/just-married.jpeg

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Elf loader fixes
  2006-03-01  9:48     ` Christian Limpach
  2006-03-01 15:00       ` Gerd Hoffmann
@ 2006-03-06 13:40       ` Gerd Hoffmann
  1 sibling, 0 replies; 13+ messages in thread
From: Gerd Hoffmann @ 2006-03-06 13:40 UTC (permalink / raw)
  To: Christian.Limpach; +Cc: xen-devel, Joe Bonasera

> - the change to the entry point, now entry point and the elf header
> paddr fields are in different address modes.

FYI: that one was rejected on lkml so we probably should go with
(physical) entry_point + virt_base ...

cheers,

  Gerd

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2006-03-06 13:40 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-02-22 11:37 [PATCH] Elf loader fixes Gerd Hoffmann
2006-02-22 12:51 ` Jan Beulich
2006-02-22 13:33   ` Gerd Hoffmann
2006-02-22 15:12   ` Gerd Hoffmann
2006-02-22 15:30     ` Jan Beulich
2006-02-22 16:10       ` Gerd Hoffmann
2006-02-22 16:11         ` Ronald G Minnich
2006-02-22 16:25         ` Jacob Gorm Hansen
     [not found] <E1F6x4v-0001lS-2n@host-192-168-0-1-bcn-london>
2006-02-22 18:17 ` Joe Bonasera
2006-02-23 11:15   ` Gerd Hoffmann
2006-03-01  9:48     ` Christian Limpach
2006-03-01 15:00       ` Gerd Hoffmann
2006-03-06 13:40       ` Gerd Hoffmann

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.