* [Qemu-devel] [PATCH 1/2] linux-user: Build vdso for x64.
2013-07-23 20:27 [Qemu-devel] [PATCH 0/2] vdso for x86_64-linux-user Richard Henderson
@ 2013-07-23 20:27 ` Richard Henderson
2013-07-23 20:27 ` [Qemu-devel] [PATCH 2/2] linux-user: Load a VDSO for x86-64 Richard Henderson
2013-07-23 21:15 ` [Qemu-devel] [PATCH 0/2] vdso for x86_64-linux-user Peter Maydell
2 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2013-07-23 20:27 UTC (permalink / raw)
To: qemu-devel; +Cc: riku.voipio
... Well, sortof. The Makefile bits are broken.
Patch to load the vdso into the running program to follow.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
Makefile | 3 +-
pc-bios/Makefile | 5 +++
pc-bios/vdso-linux-x64.S | 100 ++++++++++++++++++++++++++++++++++++++++++++++
pc-bios/vdso-linux-x64.ld | 79 ++++++++++++++++++++++++++++++++++++
pc-bios/vdso-linux-x64.so | Bin 0 -> 7419 bytes
5 files changed, 186 insertions(+), 1 deletion(-)
create mode 100644 pc-bios/vdso-linux-x64.S
create mode 100644 pc-bios/vdso-linux-x64.ld
create mode 100755 pc-bios/vdso-linux-x64.so
diff --git a/Makefile b/Makefile
index c06bfab..0148da1 100644
--- a/Makefile
+++ b/Makefile
@@ -296,7 +296,8 @@ multiboot.bin linuxboot.bin kvmvapic.bin \
s390-zipl.rom \
s390-ccw.img \
spapr-rtas.bin slof.bin \
-palcode-clipper
+palcode-clipper \
+vdso-linux-x64.so
else
BLOBS=
endif
diff --git a/pc-bios/Makefile b/pc-bios/Makefile
index 315288d..70e2485 100644
--- a/pc-bios/Makefile
+++ b/pc-bios/Makefile
@@ -15,5 +15,10 @@ all: $(TARGETS)
%.dtb: %.dts
dtc -I dts -O dtb -o $@ $<
+vdso-linux-x64.so: vdso-linux-x64.o vdso-linux-x64.ld
+ $(CC) -nostdlib -shared -Wl,-T,vdso-linux-x64.ld \
+ -Wl,-h,linux-vdso.so.1 -Wl,--hash-style=both \
+ vdso-linux-x64.o -o $@
+
clean:
rm -f $(TARGETS) *.o *~
diff --git a/pc-bios/vdso-linux-x64.S b/pc-bios/vdso-linux-x64.S
new file mode 100644
index 0000000..92efcd1
--- /dev/null
+++ b/pc-bios/vdso-linux-x64.S
@@ -0,0 +1,100 @@
+/*
+ * x86-64 linux replacement vdso.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <asm/unistd.h>
+
+ .globl __vdso_clock_gettime
+ .type __vdso_clock_gettime, @function
+ .balign 16
+ .cfi_startproc
+__vdso_clock_gettime:
+ mov $__NR_clock_gettime, %eax
+ syscall
+ ret
+ .cfi_endproc
+ .size __vdso_clock_gettime, . - __vdso_clock_gettime
+
+clock_gettime = __vdso_clock_gettime
+ .weak clock_gettime
+
+
+ .globl __vdso_gettimeofday
+ .type __vdso_gettimeofday, @function
+ .balign 16
+ .cfi_startproc
+__vdso_gettimeofday:
+ mov $__NR_gettimeofday, %eax
+ syscall
+ ret
+ .cfi_endproc
+ .size __vdso_gettimeofday, . - __vdso_gettimeofday
+
+gettimeofday = __vdso_gettimeofday
+ .weak gettimeofday
+
+
+ .globl __vdso_getcpu
+ .type __vdso_getcpu, @function
+ .balign 16
+ .cfi_startproc
+__vdso_getcpu:
+ /* ??? There is no syscall number for this allocated on x64.
+ We can handle this several ways:
+
+ (1) Invent a syscall number for use within qemu.
+ It should be easy enough to pick a number that
+ is well out of the way of the kernel numbers.
+
+ (2) Force the emulated cpu to support the rdtscp insn,
+ and initialize the TSC_AUX value the appropriate value.
+
+ (3) Pretend that we're always running on cpu 0.
+
+ This last is the one that's implemented here, with the
+ tiny bit of extra code to support rdtscp in place. */
+
+ xor %ecx, %ecx /* rdtscp w/ tsc_aux = 0 */
+
+ /* if (cpu != NULL) *cpu = (ecx & 0xfff); */
+ test %rdi, %rdi
+ jz 1f
+ mov %ecx, %eax
+ and $0xfff, %eax
+ mov %eax, (%rdi)
+
+ /* if (node != NULL) *node = (ecx >> 12); */
+1: test %rsi, %rsi
+ jz 2f
+ shr $12, %ecx
+ mov %ecx, (%rsi)
+
+2: xor %eax, %eax
+ ret
+ .cfi_endproc
+ .size __vdso_getcpu, . - __vdso_getcpu
+
+getcpu = __vdso_getcpu
+ .weak getcpu
+
+/* ??? Perhaps add elf notes. E.g.
+
+ #include <linux/elfnote.h>
+ ELFNOTE_START(Linux, 0, "a")
+ .long LINUX_VERSION_CODE
+ ELFNOTE_END
+
+ but what version number would we set for QEMU? */
diff --git a/pc-bios/vdso-linux-x64.ld b/pc-bios/vdso-linux-x64.ld
new file mode 100644
index 0000000..9e7e2d1
--- /dev/null
+++ b/pc-bios/vdso-linux-x64.ld
@@ -0,0 +1,79 @@
+/*
+ * Linker script for linux x64 replacement vdso.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+VERSION {
+ LINUX_2.6 {
+ global:
+ clock_gettime;
+ __vdso_clock_gettime;
+ gettimeofday;
+ __vdso_gettimeofday;
+ getcpu;
+ __vdso_getcpu;
+ local: *;
+ };
+}
+
+PHDRS {
+ phdr PT_PHDR FLAGS(4) PHDRS;
+ data PT_LOAD FLAGS(6) FILEHDR PHDRS;
+ text PT_LOAD FLAGS(5);
+ dynamic PT_DYNAMIC FLAGS(4);
+ note PT_NOTE FLAGS(4);
+ /* ??? Various versions of ld don't know PT_GNU_EH_FRAME. */
+ eh_frame_hdr 0x6474e550;
+}
+
+SECTIONS {
+ /* ??? We can't really prelink to any address without knowing
+ something about the virtual memory space of the host, since
+ that leaks over into the available memory space of the guest. */
+ . = SIZEOF_HEADERS;
+
+ /* The following, including the FILEHDRS and PHDRS, are modified
+ when we relocate the binary. We want them to be initially
+ writable for the relocation; we'll force them read-only after. */
+ .dynamic : { *(.dynamic) } :data :dynamic
+ .dynsym : { *(.dynsym) } :data
+ .data : {
+ /* There ought not be any real read-write data.
+ But since we manipulated the segment layout,
+ we have to put these sections somewhere. */
+ *(.data*)
+ *(.sdata*)
+ *(.got.plt) *(.got)
+ *(.gnu.linkonce.d.*)
+ *(.bss*)
+ *(.dynbss*)
+ *(.gnu.linkonce.b.*)
+ }
+
+ . += 4096;
+ .hash : { *(.hash) } :text
+ .gnu.hash : { *(.gnu.hash) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+ .note : { *(.note*) } :text :note
+ .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
+ .eh_frame : { *(.eh_frame) } :text
+ .rodata : { *(.rodata) }
+
+ . = ALIGN(0x100);
+ .text : { *(.text*) } :text =0x90909090
+}
diff --git a/pc-bios/vdso-linux-x64.so b/pc-bios/vdso-linux-x64.so
new file mode 100755
index 0000000000000000000000000000000000000000..5fd8221527f0fc54ce3f93e98e6b9e14dadc7489
GIT binary patch
literal 7419
zcmeHMQEMDk6h512nxw`isYR+$TqNy7se>t^g0>{7NmII}rA?rUFxh6O*<f~8lS$c#
z)QE->eXzek6b0X;;Ddd$B~aUfj|E@EKB(Y}=!2nxx}I<5eA}Jr#1|igGB>&N-SeGu
z?%aFk-r3>Yw?+$NZMmGVIC4-z>A{E@GR0++Ar8s2vQ@7=(yb`6CFG!iE-J+1)-|!y
z^q}FpSt&b_hA5H`3HY^ZEk6*7cj3*ph$qqyG{$+s#*@To(rRFOjuSqpD?9a)f&&pP
zMA)Jn|1l_`{i*pSk@iITZ4bRgBODd-obT^{H)_y)Wb;QXP;Qybm|n0rY>U)#!tg%B
zU50-)qW;EhoPU{Y?((b2`%Q0d{|h+M)r=<xfamnM=&Wpg^0`6Wp?q(Gf7I-gJRUpj
zll_b<-zUE{6k=tsO0dsc|L7NGP%Flh<U?h4$s&+NAd5g2fh+=91hNQZ5y&EtMIehn
z7J)1RSp@Fs2q2x+r4{)J<aC&~(Pmhfw_)A|_8nS*;E<ny&D%Tkxe;Ghz@ZOt=cc}U
zU2gPWJb!fcr-wc{RDEyn)jfMIp7txJ&NsyK&XyKy-pqV$=2dUj4}!UhPtwHNnNo4N
zg`0V8N#^IOOZ6w<>gxZX6pl<xo$~g(PwV|KqSik!Is5zgt6%O%s^}4|%wb{dZK~f`
z`rKg}33(M1!~z`Vd3a(<)<5{;#)kvfzbJfttMC0UzrXU?H_zP6^*hhy&E^&D?0Id^
zD^4oDsW`95esgb}kX*et*WcaIi5wqzjAfU$nHy3u<eyUPP2mg5cPh`E4(#vMa_I-1
zSx@_V&tva~Q@k`Cmj|zpuQr0NwQIW@JtAuz<EtA%`<0)&*R~H{UKe+9xe^plD+UV@
zmf0#TSBsUo8F96tR^rZ9m)vr3u^chMLd2Z)7Z&Gg)tKOw#I4o>UtGWJomnVWe6L(m
zdrIO4emw|>`=2lU+5`3w)*s4dd13BXuA$z@Jk0wdr^<cHyesirvx7fu=+FG?lgd#C
zQ|5Vz+(Yc2{@6Dt`?PLoj+T1ng^5Ej(n3A`BUS|aneP~I%p((-H>SOAi�RQlsVm
z%o7uLm^JU8#G|T5&Y3*(&_vFkF-ngAW%b8<lKw09A|MXf{Rxa4<!NnTPW0a_Pfz=#
ze>LHM&F)W}rlVnU{B`r^`=9w|;(>%e*B$9TZE)?910Jxv^to12+sWtGI`>x7m4_qg
zwmINj0+r+E`o(Wr+WqTz>1uf=g61`O|E40P;pk|&A6wOqJ}vIAIPWy~Pn_S<JTHm!
z?alQb=eG*iZ=CNCuFE*z**qVL^LL+DK<~&l;rfW%-#LFEk9gs?H5mt->*SiNmewM9
zZKj=vxXP!UV_<)!os-}<FztK;`wjgk&rfEJPo8^Vf25tOi1)%v6T>eaIV9fU!m*Lz
of_H3e?BwXAH#s~~81*Fny>jR4!#-#J<;x|eQUPBxsUJcA0EOZRod5s;
literal 0
HcmV?d00001
--
1.8.3.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [Qemu-devel] [PATCH 2/2] linux-user: Load a VDSO for x86-64.
2013-07-23 20:27 [Qemu-devel] [PATCH 0/2] vdso for x86_64-linux-user Richard Henderson
2013-07-23 20:27 ` [Qemu-devel] [PATCH 1/2] linux-user: Build vdso for x64 Richard Henderson
@ 2013-07-23 20:27 ` Richard Henderson
2013-07-23 21:15 ` [Qemu-devel] [PATCH 0/2] vdso for x86_64-linux-user Peter Maydell
2 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2013-07-23 20:27 UTC (permalink / raw)
To: qemu-devel; +Cc: riku.voipio
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
linux-user/elfload.c | 197 +++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 190 insertions(+), 7 deletions(-)
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 7ce2eab..0ceae28 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -197,6 +197,8 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUX86State *en
(*regs)[26] = env->segs[R_GS].selector & 0xffff;
}
+#define VDSO_BASENAME "vdso-linux-x64.so"
+
#else
#define ELF_START_MMAP 0x80000000
@@ -1029,6 +1031,10 @@ static inline void init_thread(struct target_pt_regs *regs, struct image_info *i
#define ELF_HWCAP 0
#endif
+#ifndef VDSO_BASENAME
+#define VDSO_BASENAME NULL
+#endif
+
#ifdef TARGET_ABI32
#undef ELF_CLASS
#define ELF_CLASS ELFCLASS32
@@ -1332,7 +1338,8 @@ static abi_ulong loader_build_fdpic_loadmap(struct image_info *info, abi_ulong s
static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc,
struct elfhdr *exec,
struct image_info *info,
- struct image_info *interp_info)
+ struct image_info *interp_info,
+ struct image_info *vdso_info)
{
abi_ulong sp;
abi_ulong sp_auxv;
@@ -1388,16 +1395,21 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc,
*/
sp = sp &~ (abi_ulong)15;
size = (DLINFO_ITEMS + 1) * 2;
- if (k_platform)
+ if (k_platform) {
size += 2;
+ }
+ if (vdso_info) {
+ size += 4;
+ }
#ifdef DLINFO_ARCH_ITEMS
size += DLINFO_ARCH_ITEMS * 2;
#endif
size += envc + argc + 2;
size += 1; /* argc itself */
size *= n;
- if (size & 15)
+ if (size & 15) {
sp -= 16 - (size & 15);
+ }
/* This is correct because Linux defines
* elf_addr_t as Elf32_Off / Elf64_Off
@@ -1426,8 +1438,13 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc,
NEW_AUX_ENT(AT_CLKTCK, (abi_ulong) sysconf(_SC_CLK_TCK));
NEW_AUX_ENT(AT_RANDOM, (abi_ulong) u_rand_bytes);
- if (k_platform)
+ if (k_platform) {
NEW_AUX_ENT(AT_PLATFORM, u_platform);
+ }
+ if (vdso_info) {
+ NEW_AUX_ENT(AT_SYSINFO, vdso_info->entry);
+ NEW_AUX_ENT(AT_SYSINFO_EHDR, vdso_info->load_addr);
+ }
#ifdef ARCH_DLINFO
/*
* ARCH_DLINFO must come last so platform specific code can enforce
@@ -1843,6 +1860,165 @@ static void load_elf_interp(const char *filename, struct image_info *info,
exit(-1);
}
+static void load_elf_vdso(const char *basename, struct image_info *info,
+ char bprm_buf[BPRM_BUF_SIZE])
+{
+ const char *errmsg;
+ char *filename;
+ int fd, retval, i;
+ abi_ulong load_bias;
+
+ /* ??? What we really need access to is qemu_find_file, but that is
+ only built for system targets at the moment. */
+ filename = alloca(strlen(CONFIG_QEMU_DATADIR "/") + strlen(basename) + 1);
+ stpcpy(stpcpy(filename, CONFIG_QEMU_DATADIR "/"), basename);
+
+ fd = open(filename, O_RDONLY);
+ if (fd < 0) {
+ goto exit_perror;
+ }
+
+ retval = read(fd, bprm_buf, BPRM_BUF_SIZE);
+ if (retval < 0) {
+ goto exit_perror;
+ }
+ if (retval < BPRM_BUF_SIZE) {
+ memset(bprm_buf + retval, 0, BPRM_BUF_SIZE - retval);
+ }
+
+ load_elf_image(basename, fd, info, NULL, bprm_buf);
+ load_bias = info->load_bias;
+
+ /* We most likely need to relocate the VDSO image. The one built into
+ the kernel is built for a fixed address. The one built for QEMU is
+ not, since that requires close control of the guest address space. */
+ if (load_bias) {
+ struct elfhdr *ehdr = (struct elfhdr *)bprm_buf;
+ struct elf_phdr *phdr;
+ abi_ulong dynamic_addr = -1;
+ abi_ulong dynsym_addr = -1;
+
+ /* ??? Assume QEMU's VDSO is built "properly", which arranges
+ for the PHDRs, and all the sections manipulated below, to
+ be included with a writable load segment. */
+
+ /* ??? One might think that we'd need to relocate ehdr.e_entry,
+ but for some reason glibc does that one itself, though that
+ is also available via the AT_SYSINFO entry. */
+
+ /* Relocate the program headers. */
+ phdr = (struct elf_phdr *)g2h(info->load_addr + ehdr->e_phoff);
+ bswap_phdr(phdr, ehdr->e_phnum);
+ for (i = 0; i < ehdr->e_phnum; ++i) {
+ phdr[i].p_vaddr += load_bias;
+ phdr[i].p_paddr += load_bias;
+ if (phdr[i].p_type == PT_DYNAMIC) {
+ dynamic_addr = phdr[i].p_vaddr;
+ }
+ }
+ bswap_phdr(phdr, ehdr->e_phnum);
+
+ /* Relocate the DYNAMIC entries. */
+ if (dynamic_addr != -1) {
+ abi_ulong tag, val, *dyn = (abi_ulong *)g2h(dynamic_addr);
+ do {
+ tag = tswapl(dyn[0]);
+ val = tswapl(dyn[1]);
+ switch (tag) {
+ case DT_SYMTAB:
+ dynsym_addr = load_bias + val;
+ dyn[1] = tswapl(dynsym_addr);
+ break;
+ case DT_SYMENT:
+ if (val != sizeof(struct elf_sym)) {
+ errmsg = "VDSO has an unexpected dynamic symbol size";
+ goto exit_errmsg;
+ }
+ break;
+
+ case DT_HASH:
+ case DT_STRTAB:
+ case DT_VERDEF:
+ case DT_VERSYM:
+ case DT_ADDRRNGLO ... DT_ADDRRNGHI:
+ /* These entries store an address in the entry. */
+ dyn[1] = tswapl(load_bias + val);
+ break;
+
+ case DT_NULL:
+ case DT_STRSZ:
+ case DT_SONAME:
+ case DT_DEBUG:
+ case DT_FLAGS:
+ case DT_FLAGS_1:
+ case DT_VERDEFNUM:
+ case DT_VALRNGLO ... DT_VALRNGHI:
+ /* These entries store an integer in the entry. */
+ break;
+
+ case DT_REL:
+ case DT_RELA:
+ /* These entries indicate that the VDSO was built
+ incorrectly. It should not have real relocations. */
+ errmsg = "VDSO has relocations";
+ goto exit_errmsg;
+ case DT_NEEDED:
+ case DT_VERNEED:
+ errmsg = "VDSO has external dependancies";
+ goto exit_errmsg;
+
+ default:
+ /* This is probably something target specific. */
+ errmsg = "VDSO has unknown DYNAMIC entry";
+ goto exit_errmsg;
+ }
+ dyn += 2;
+ } while (tag != DT_NULL);
+ }
+
+ /* Relocate the dynamic symbol table. */
+ if (dynsym_addr != -1) {
+ struct elf_shdr *shdr;
+ struct elf_sym *sym;
+ int dynsym_size = 0;
+
+ /* Read the section headers to find out the size of the
+ dynamic symbol table. */
+ shdr = (struct elf_shdr *)g2h(info->load_addr + ehdr->e_shoff);
+ for (i = 0; i < ehdr->e_shnum; ++i) {
+ abi_ulong addr = tswapl(shdr[i].sh_addr) + load_bias;
+ if (addr == dynsym_addr) {
+ dynsym_size = tswapl(shdr[i].sh_size);
+ break;
+ }
+ }
+
+ sym = (struct elf_sym *)g2h(dynsym_addr);
+ for (i = 0; i < dynsym_size / sizeof(*sym); ++i) {
+ sym[i].st_value = tswapl(tswapl(sym[i].st_value) + load_bias);
+ }
+ }
+ }
+
+ /* Mark the VDSO writable segment read-only. */
+ /* ??? This assumes that the VDSO implementation doesn't actually
+ have any truely writable data. Perhaps we should instead use
+ the PT_GNU_RELRO header to indicate that we really want this. */
+ retval = target_mprotect(info->start_data, info->brk - info->start_data,
+ PROT_READ);
+ if (retval < 0) {
+ goto exit_perror;
+ }
+ return;
+
+ exit_perror:
+ errmsg = strerror(errno);
+ exit_errmsg:
+ fprintf(stderr, "%s: %s\n", filename, errmsg);
+ exit(-1);
+}
+
+
static int symfind(const void *s0, const void *s1)
{
target_ulong addr = *(target_ulong *)s0;
@@ -1990,7 +2166,7 @@ give_up:
int load_elf_binary(struct linux_binprm * bprm, struct target_pt_regs * regs,
struct image_info * info)
{
- struct image_info interp_info;
+ struct image_info interp_info, vdso_info;
struct elfhdr elf_ex;
char *elf_interpreter = NULL;
@@ -2037,8 +2213,15 @@ int load_elf_binary(struct linux_binprm * bprm, struct target_pt_regs * regs,
}
}
- bprm->p = create_elf_tables(bprm->p, bprm->argc, bprm->envc, &elf_ex,
- info, (elf_interpreter ? &interp_info : NULL));
+ /* If we've been given a VDSO to load, do so. */
+ if (VDSO_BASENAME) {
+ load_elf_vdso(VDSO_BASENAME, &vdso_info, bprm->buf);
+ }
+
+ bprm->p = create_elf_tables(bprm->p, bprm->argc, bprm->envc,
+ &elf_ex, info,
+ (elf_interpreter ? &interp_info : NULL),
+ (VDSO_BASENAME ? &vdso_info : NULL));
info->start_stack = bprm->p;
/* If we have an interpreter, set that as the program's entry point.
--
1.8.3.1
^ permalink raw reply related [flat|nested] 7+ messages in thread