* Re: [Qemu-devel] What happened with NPTL/TLS support?
2007-11-02 13:28 ` Felipe Contreras
@ 2007-11-02 17:06 ` Thayne Harbaugh
0 siblings, 0 replies; 7+ messages in thread
From: Thayne Harbaugh @ 2007-11-02 17:06 UTC (permalink / raw)
To: Felipe Contreras; +Cc: qemu-devel
[-- Attachment #1: Type: text/plain, Size: 1172 bytes --]
On Fri, 2007-11-02 at 15:28 +0200, Felipe Contreras wrote:
> On 10/22/07, Thayne Harbaugh <thayne@c2.net> wrote:
> >
> > On Sat, 2007-10-20 at 21:34 +0100, Thiemo Seufer wrote:
<SNIP>
> > > Please submit this patch (and resend what you think was missed).
> >
> > I'll get them reworked and re-sent soon.
> >
> > Thanks for the interest.
>
> Do you have anything that can be tried out already?
Try these:
09_arm_eabitls.patch: This is for TLS on arm. We have used it
extensively and it appears quite solid. It should apply to a stock CVS
tree - although we use it on top of about 15 other patches so YMMV.
40_tls.patch: This is more for i386. It works mostly, but I don't trust
it. It doesn't apply to a stock CVS tree.
There are a few things that should be reworked on both of these patches.
I haven't sent them up-stream because I'm working to get foundational
patches accepted first. The sooner I can get the EFAULT patches
accepted then the sooner I can rework these patches and make them more
acceptable to be committed. Right now I don't think either of them
should be committed (although 09_arm_eabitls.patch is better than
40_tls.patch).
Good luck!
[-- Attachment #2: 09_arm_eabitls.patch --]
[-- Type: text/x-patch, Size: 26498 bytes --]
Index: qemu/configure
===================================================================
--- qemu.orig/configure 2007-10-15 13:52:07.000000000 -0600
+++ qemu/configure 2007-10-15 13:52:38.000000000 -0600
@@ -102,6 +102,7 @@
darwin_user="no"
build_docs="no"
uname_release=""
+nptl="yes"
# OS specific
targetos=`uname -s`
@@ -303,6 +304,8 @@
*) echo "undefined SPARC architecture. Exiting";exit 1;;
esac
;;
+ --disable-nptl) nptl="no"
+ ;;
esac
done
@@ -388,6 +391,7 @@
echo " --disable-linux-user disable all linux usermode emulation targets"
echo " --enable-darwin-user enable all darwin usermode emulation targets"
echo " --disable-darwin-user disable all darwin usermode emulation targets"
+echo " --disable-nptl disable usermode NPTL guest support"
echo " --fmod-lib path to FMOD library"
echo " --fmod-inc path to FMOD includes"
echo " --enable-uname-release=R Return R for uname -r in usermode emulation"
@@ -554,6 +558,23 @@
}
EOF
+# check NPTL support
+cat > $TMPC <<EOF
+#include <sched.h>
+void foo()
+{
+#ifndef CLONE_SETTLS
+#error bork
+#endif
+}
+EOF
+
+if $cc -c -o $TMPO $TMPC 2> /dev/null ; then
+ :
+else
+ nptl="no"
+fi
+
##########################################
# SDL probe
@@ -717,6 +738,7 @@
echo "Documentation $build_docs"
[ ! -z "$uname_release" ] && \
echo "uname -r $uname_release"
+echo "NPTL support $nptl"
if test $sdl_too_old = "yes"; then
echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -1122,6 +1144,14 @@
echo "SDL_CFLAGS=`$sdl_config --cflags`" >> $config_mak
fi
fi
+else
+ if test "$nptl" = "yes" ; then
+ case "$target_cpu" in
+ arm | armeb | ppc | ppc64)
+ echo "#define USE_NPTL 1" >> $config_h
+ ;;
+ esac
+ fi
fi
if test "$cocoa" = "yes" ; then
Index: qemu/exec-all.h
===================================================================
--- qemu.orig/exec-all.h 2007-10-15 13:52:07.000000000 -0600
+++ qemu/exec-all.h 2007-10-15 13:52:32.000000000 -0600
@@ -391,170 +391,7 @@
extern CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
extern void *io_mem_opaque[IO_MEM_NB_ENTRIES];
-#if defined(__powerpc__)
-static inline int testandset (int *p)
-{
- int ret;
- __asm__ __volatile__ (
- "0: lwarx %0,0,%1\n"
- " xor. %0,%3,%0\n"
- " bne 1f\n"
- " stwcx. %2,0,%1\n"
- " bne- 0b\n"
- "1: "
- : "=&r" (ret)
- : "r" (p), "r" (1), "r" (0)
- : "cr0", "memory");
- return ret;
-}
-#elif defined(__i386__)
-static inline int testandset (int *p)
-{
- long int readval = 0;
-
- __asm__ __volatile__ ("lock; cmpxchgl %2, %0"
- : "+m" (*p), "+a" (readval)
- : "r" (1)
- : "cc");
- return readval;
-}
-#elif defined(__x86_64__)
-static inline int testandset (int *p)
-{
- long int readval = 0;
-
- __asm__ __volatile__ ("lock; cmpxchgl %2, %0"
- : "+m" (*p), "+a" (readval)
- : "r" (1)
- : "cc");
- return readval;
-}
-#elif defined(__s390__)
-static inline int testandset (int *p)
-{
- int ret;
-
- __asm__ __volatile__ ("0: cs %0,%1,0(%2)\n"
- " jl 0b"
- : "=&d" (ret)
- : "r" (1), "a" (p), "0" (*p)
- : "cc", "memory" );
- return ret;
-}
-#elif defined(__alpha__)
-static inline int testandset (int *p)
-{
- int ret;
- unsigned long one;
-
- __asm__ __volatile__ ("0: mov 1,%2\n"
- " ldl_l %0,%1\n"
- " stl_c %2,%1\n"
- " beq %2,1f\n"
- ".subsection 2\n"
- "1: br 0b\n"
- ".previous"
- : "=r" (ret), "=m" (*p), "=r" (one)
- : "m" (*p));
- return ret;
-}
-#elif defined(__sparc__)
-static inline int testandset (int *p)
-{
- int ret;
-
- __asm__ __volatile__("ldstub [%1], %0"
- : "=r" (ret)
- : "r" (p)
- : "memory");
-
- return (ret ? 1 : 0);
-}
-#elif defined(__arm__)
-static inline int testandset (int *spinlock)
-{
- register unsigned int ret;
- __asm__ __volatile__("swp %0, %1, [%2]"
- : "=r"(ret)
- : "0"(1), "r"(spinlock));
-
- return ret;
-}
-#elif defined(__mc68000)
-static inline int testandset (int *p)
-{
- char ret;
- __asm__ __volatile__("tas %1; sne %0"
- : "=r" (ret)
- : "m" (p)
- : "cc","memory");
- return ret;
-}
-#elif defined(__ia64)
-
-#include <ia64intrin.h>
-
-static inline int testandset (int *p)
-{
- return __sync_lock_test_and_set (p, 1);
-}
-#elif defined(__mips__)
-static inline int testandset (int *p)
-{
- int ret;
-
- __asm__ __volatile__ (
- " .set push \n"
- " .set noat \n"
- " .set mips2 \n"
- "1: li $1, 1 \n"
- " ll %0, %1 \n"
- " sc $1, %1 \n"
- " beqz $1, 1b \n"
- " .set pop "
- : "=r" (ret), "+R" (*p)
- :
- : "memory");
-
- return ret;
-}
-#else
-#error unimplemented CPU support
-#endif
-
-typedef int spinlock_t;
-
-#define SPIN_LOCK_UNLOCKED 0
-
-#if defined(CONFIG_USER_ONLY)
-static inline void spin_lock(spinlock_t *lock)
-{
- while (testandset(lock));
-}
-
-static inline void spin_unlock(spinlock_t *lock)
-{
- *lock = 0;
-}
-
-static inline int spin_trylock(spinlock_t *lock)
-{
- return !testandset(lock);
-}
-#else
-static inline void spin_lock(spinlock_t *lock)
-{
-}
-
-static inline void spin_unlock(spinlock_t *lock)
-{
-}
-
-static inline int spin_trylock(spinlock_t *lock)
-{
- return 1;
-}
-#endif
+#include "spinlock.h"
extern spinlock_t tb_lock;
Index: qemu/linux-user/arm/syscall.h
===================================================================
--- qemu.orig/linux-user/arm/syscall.h 2007-10-15 13:52:07.000000000 -0600
+++ qemu/linux-user/arm/syscall.h 2007-10-15 13:52:13.000000000 -0600
@@ -28,7 +28,9 @@
#define ARM_SYSCALL_BASE 0x900000
#define ARM_THUMB_SYSCALL 0
-#define ARM_NR_cacheflush (ARM_SYSCALL_BASE + 0xf0000 + 2)
+#define ARM_NR_BASE 0xf0000
+#define ARM_NR_cacheflush (ARM_NR_BASE + 2)
+#define ARM_NR_set_tls (ARM_NR_BASE + 5)
#define ARM_NR_semihosting 0x123456
#define ARM_NR_thumb_semihosting 0xAB
Index: qemu/linux-user/main.c
===================================================================
--- qemu.orig/linux-user/main.c 2007-10-15 13:52:13.000000000 -0600
+++ qemu/linux-user/main.c 2007-10-15 13:52:41.000000000 -0600
@@ -325,6 +325,50 @@
}
}
+/* Handle a jump to the kernel code page. */
+static int
+do_kernel_trap(CPUARMState *env)
+{
+ uint32_t addr;
+ uint32_t *ptr;
+ uint32_t cpsr;
+
+ switch (env->regs[15]) {
+ case 0xffff0fc0: /* __kernel_cmpxchg */
+ /* XXX: This only works between threads, not between processes.
+ Use native atomic operations. */
+ /* ??? This probably breaks horribly if the access segfaults. */
+ cpu_lock();
+ ptr = (uint32_t *)env->regs[2];
+ cpsr = cpsr_read(env);
+ if (*ptr == env->regs[0]) {
+ *ptr = env->regs[1];
+ env->regs[0] = 0;
+ cpsr |= CPSR_C;
+ } else {
+ env->regs[0] = -1;
+ cpsr &= ~CPSR_C;
+ }
+ cpsr_write(env, cpsr, CPSR_C);
+ cpu_unlock();
+ break;
+ case 0xffff0fe0: /* __kernel_get_tls */
+ env->regs[0] = env->cp15.c13_tls;
+ break;
+ default:
+ return 1;
+ }
+ /* Jump back to the caller. */
+ addr = env->regs[14];
+ if (addr & 1) {
+ env->thumb = 1;
+ addr &= ~1;
+ }
+ env->regs[15] = addr;
+
+ return 0;
+}
+
void cpu_loop(CPUARMState *env)
{
int trapnr;
@@ -381,10 +425,8 @@
}
}
- if (n == ARM_NR_cacheflush) {
- arm_cache_flush(env->regs[0], env->regs[1]);
- } else if (n == ARM_NR_semihosting
- || n == ARM_NR_thumb_semihosting) {
+ if (n == ARM_NR_semihosting
+ || n == ARM_NR_thumb_semihosting) {
env->regs[0] = do_arm_semihosting (env);
} else if (n == 0 || n >= ARM_SYSCALL_BASE
|| (env->thumb && n == ARM_THUMB_SYSCALL)) {
@@ -395,6 +437,26 @@
n -= ARM_SYSCALL_BASE;
env->eabi = 0;
}
+ if (n > ARM_NR_BASE) {
+ switch (n)
+ {
+ case ARM_NR_cacheflush:
+ arm_cache_flush(env->regs[0], env->regs[1]);
+ break;
+#ifdef USE_NPTL
+ case ARM_NR_set_tls:
+ cpu_set_tls(env, env->regs[0]);
+ env->regs[0] = 0;
+ break;
+#endif
+ default:
+ printf ("Error: Bad syscall: %x\n", n);
+ env->regs[0] = -TARGET_ENOSYS;
+ goto error;
+ }
+ }
+ else
+ {
env->regs[0] = do_syscall(env,
n,
env->regs[0],
@@ -403,7 +465,9 @@
env->regs[3],
env->regs[4],
env->regs[5]);
+ }
} else {
+ printf ("Error: Bad syscall: %x\n", n);
goto error;
}
}
@@ -441,6 +505,10 @@
}
}
break;
+ case EXCP_KERNEL_TRAP:
+ if (do_kernel_trap(env))
+ goto error;
+ break;
default:
error:
fprintf(stderr, "qemu: unhandled CPU exception 0x%x - aborting\n",
@@ -2069,6 +2137,10 @@
for(i = 0; i < 16; i++) {
env->regs[i] = regs->uregs[i];
}
+ /* Register the magic kernel code page. The cpu will generate a
+ special exception when it tries to execute code here. We can't
+ put real code here because it may be in use by the host kernel. */
+ page_set_flags(0xffff0000, 0xffff0fff, 0);
}
#elif defined(TARGET_SPARC)
{
Index: qemu/linux-user/qemu.h
===================================================================
--- qemu.orig/linux-user/qemu.h 2007-10-15 13:52:13.000000000 -0600
+++ qemu/linux-user/qemu.h 2007-10-15 13:52:32.000000000 -0600
@@ -84,6 +84,9 @@
#endif
int used; /* non zero if used */
struct image_info *info;
+#ifdef USE_NPTL
+ uint32_t *child_tidptr;
+#endif
uint8_t stack[0];
} __attribute__((aligned(16))) TaskState;
Index: qemu/linux-user/syscall.c
===================================================================
--- qemu.orig/linux-user/syscall.c 2007-10-15 13:52:13.000000000 -0600
+++ qemu/linux-user/syscall.c 2007-10-15 13:52:39.000000000 -0600
@@ -70,9 +70,18 @@
#include <linux/kd.h>
#include "qemu.h"
+#include "spinlock.h"
//#define DEBUG
+#ifdef USE_NPTL
+#define CLONE_NPTL_FLAGS2 (CLONE_SETTLS | \
+ CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)
+#else
+/* XXX: Hardcode the above values. */
+#define CLONE_NPTL_FLAGS2 0
+#endif
+
#if defined(TARGET_I386) || defined(TARGET_ARM) || defined(TARGET_SPARC) \
|| defined(TARGET_M68K) || defined(TARGET_SH4)
/* 16 bit uid wrappers emulation */
@@ -2313,9 +2322,19 @@
thread/process */
#define NEW_STACK_SIZE 8192
+#ifdef USE_NPTL
+static spinlock_t nptl_lock = SPIN_LOCK_UNLOCKED;
+#endif
+
static int clone_func(void *arg)
{
CPUState *env = arg;
+#ifdef USE_NPTL
+ /* Wait until the parent has finshed initializing the tls state. */
+ while (!spin_trylock(&nptl_lock))
+ usleep(1);
+ spin_unlock(&nptl_lock);
+#endif
cpu_loop(env);
/* never exits */
return 0;
@@ -2324,11 +2343,19 @@
/* do_fork() Must return host values and target errnos (unlike most
do_*() functions). */
-int do_fork(CPUState *env, unsigned int flags, abi_ulong newsp)
+int do_fork(CPUState *env, unsigned int flags, abi_ulong newsp,
+ uint32_t *parent_tidptr, void *newtls,
+ uint32_t *child_tidptr)
{
int ret;
TaskState *ts;
uint8_t *new_stack;
CPUState *new_env;
+#ifdef USE_NPTL
+ unsigned int nptl_flags;
+
+ if (flags & CLONE_PARENT_SETTID)
+ *parent_tidptr = gettid();
+#endif
if (flags & CLONE_VM) {
ts = malloc(sizeof(TaskState) + NEW_STACK_SIZE);
@@ -2396,16 +2423,64 @@
#error unsupported target CPU
#endif
new_env->opaque = ts;
+#ifdef USE_NPTL
+ nptl_flags = flags;
+ flags &= ~CLONE_NPTL_FLAGS2;
+
+ if (nptl_flags & CLONE_CHILD_CLEARTID) {
+ ts->child_tidptr = child_tidptr;
+ }
+
+ if (nptl_flags & CLONE_SETTLS)
+ cpu_set_tls (new_env, newtls);
+
+ /* Grab the global cpu lock so that the thread setup appears
+ atomic. */
+ if (nptl_flags & CLONE_CHILD_SETTID)
+ spin_lock(&nptl_lock);
+
+#else
+ if (flags & CLONE_NPTL_FLAGS2)
+ return -EINVAL;
+#endif
#ifdef __ia64__
ret = __clone2(clone_func, new_stack + NEW_STACK_SIZE, flags, new_env);
#else
ret = clone(clone_func, new_stack + NEW_STACK_SIZE, flags, new_env);
#endif
+#ifdef USE_NPTL
+ if (ret != -1) {
+ if (nptl_flags & CLONE_CHILD_SETTID)
+ *child_tidptr = ret;
+ }
+
+ /* Allow the child to continue. */
+ if (nptl_flags & CLONE_CHILD_SETTID)
+ spin_unlock(&nptl_lock);
+#endif
} else {
/* if no CLONE_VM, we consider it is a fork */
- if ((flags & ~CSIGNAL) != 0)
+ if ((flags & ~(CSIGNAL | CLONE_NPTL_FLAGS2)) != 0)
return -EINVAL;
ret = fork();
+#ifdef USE_NPTL
+ /* There is a race condition here. The parent process could
+ theoretically read the TID in the child process before the child
+ tid is set. This would require using either ptrace
+ (not implemented) or having *_tidptr to point at a shared memory
+ mapping. We can't repeat the spinlock hack used above because
+ the child process gets its own copy of the lock. */
+ if (ret == 0) {
+ /* Child Process. */
+ if (flags & CLONE_CHILD_SETTID)
+ *child_tidptr = gettid();
+ ts = (TaskState *)env->opaque;
+ if (flags & CLONE_CHILD_CLEARTID)
+ ts->child_tidptr = child_tidptr;
+ if (flags & CLONE_SETTLS)
+ cpu_set_tls (env, newtls);
+ }
+#endif
}
return ret;
}
@@ -2727,7 +2802,7 @@
ret = do_brk(arg1);
break;
case TARGET_NR_fork:
- ret = get_errno(do_fork(cpu_env, SIGCHLD, 0));
+ ret = get_errno(do_fork(cpu_env, SIGCHLD, 0, NULL, NULL, NULL));
break;
#ifdef TARGET_NR_waitpid
case TARGET_NR_waitpid:
@@ -4170,7 +4245,8 @@
ret = get_errno(fsync(arg1));
break;
case TARGET_NR_clone:
- ret = get_errno(do_fork(cpu_env, arg1, arg2));
+ ret = get_errno(do_fork(cpu_env, arg1, arg2, (uint32_t *)arg3,
+ (void *)arg4, (uint32_t *)arg5));
break;
#ifdef __NR_exit_group
/* new thread calls */
@@ -4590,7 +4666,8 @@
#endif
#ifdef TARGET_NR_vfork
case TARGET_NR_vfork:
- ret = get_errno(do_fork(cpu_env, CLONE_VFORK | CLONE_VM | SIGCHLD, 0));
+ ret = get_errno(do_fork(cpu_env, CLONE_VFORK | CLONE_VM | SIGCHLD, 0,
+ NULL, NULL, NULL));
break;
#endif
#ifdef TARGET_NR_ugetrlimit
Index: qemu/spinlock.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ qemu/spinlock.h 2007-10-15 13:52:13.000000000 -0600
@@ -0,0 +1,188 @@
+/*
+ * Atomic operation helper include
+ *
+ * Copyright (c) 2005 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef SPINLOCK_H
+#define SPINLOCK_H
+
+#if defined(__powerpc__)
+static inline int testandset (int *p)
+{
+ int ret;
+ __asm__ __volatile__ (
+ "0: lwarx %0,0,%1\n"
+ " xor. %0,%3,%0\n"
+ " bne 1f\n"
+ " stwcx. %2,0,%1\n"
+ " bne- 0b\n"
+ "1: "
+ : "=&r" (ret)
+ : "r" (p), "r" (1), "r" (0)
+ : "cr0", "memory");
+ return ret;
+}
+#elif defined(__i386__)
+static inline int testandset (int *p)
+{
+ long int readval = 0;
+
+ __asm__ __volatile__ ("lock; cmpxchgl %2, %0"
+ : "+m" (*p), "+a" (readval)
+ : "r" (1)
+ : "cc");
+ return readval;
+}
+#elif defined(__x86_64__)
+static inline int testandset (int *p)
+{
+ long int readval = 0;
+
+ __asm__ __volatile__ ("lock; cmpxchgl %2, %0"
+ : "+m" (*p), "+a" (readval)
+ : "r" (1)
+ : "cc");
+ return readval;
+}
+#elif defined(__s390__)
+static inline int testandset (int *p)
+{
+ int ret;
+
+ __asm__ __volatile__ ("0: cs %0,%1,0(%2)\n"
+ " jl 0b"
+ : "=&d" (ret)
+ : "r" (1), "a" (p), "0" (*p)
+ : "cc", "memory" );
+ return ret;
+}
+#elif defined(__alpha__)
+static inline int testandset (int *p)
+{
+ int ret;
+ unsigned long one;
+
+ __asm__ __volatile__ ("0: mov 1,%2\n"
+ " ldl_l %0,%1\n"
+ " stl_c %2,%1\n"
+ " beq %2,1f\n"
+ ".subsection 2\n"
+ "1: br 0b\n"
+ ".previous"
+ : "=r" (ret), "=m" (*p), "=r" (one)
+ : "m" (*p));
+ return ret;
+}
+#elif defined(__sparc__)
+static inline int testandset (int *p)
+{
+ int ret;
+
+ __asm__ __volatile__("ldstub [%1], %0"
+ : "=r" (ret)
+ : "r" (p)
+ : "memory");
+
+ return (ret ? 1 : 0);
+}
+#elif defined(__arm__)
+static inline int testandset (int *spinlock)
+{
+ register unsigned int ret;
+ __asm__ __volatile__("swp %0, %1, [%2]"
+ : "=r"(ret)
+ : "0"(1), "r"(spinlock));
+
+ return ret;
+}
+#elif defined(__mc68000)
+static inline int testandset (int *p)
+{
+ char ret;
+ __asm__ __volatile__("tas %1; sne %0"
+ : "=r" (ret)
+ : "m" (p)
+ : "cc","memory");
+ return ret;
+}
+#elif defined(__ia64)
+
+#include <ia64intrin.h>
+
+static inline int testandset (int *p)
+{
+ return __sync_lock_test_and_set (p, 1);
+}
+#elif defined(__mips__)
+static inline int testandset (int *p)
+{
+ int ret;
+
+ __asm__ __volatile__ (
+ " .set push \n"
+ " .set noat \n"
+ " .set mips2 \n"
+ "1: li $1, 1 \n"
+ " ll %0, %1 \n"
+ " sc $1, %1 \n"
+ " beqz $1, 1b \n"
+ " .set pop "
+ : "=r" (ret), "+R" (*p)
+ :
+ : "memory");
+
+ return ret;
+}
+#else
+#error unimplemented CPU support
+#endif
+
+typedef int spinlock_t;
+
+#define SPIN_LOCK_UNLOCKED 0
+
+#if defined(CONFIG_USER_ONLY)
+static inline void spin_lock(spinlock_t *lock)
+{
+ while (testandset(lock));
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+ *lock = 0;
+}
+
+static inline int spin_trylock(spinlock_t *lock)
+{
+ return !testandset(lock);
+}
+#else
+static inline void spin_lock(spinlock_t *lock)
+{
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+}
+
+static inline int spin_trylock(spinlock_t *lock)
+{
+ return 1;
+}
+#endif
+
+#endif
Index: qemu/target-arm/cpu.h
===================================================================
--- qemu.orig/target-arm/cpu.h 2007-10-15 13:52:07.000000000 -0600
+++ qemu/target-arm/cpu.h 2007-10-15 13:52:13.000000000 -0600
@@ -37,6 +37,7 @@
#define EXCP_IRQ 5
#define EXCP_FIQ 6
#define EXCP_BKPT 7
+#define EXCP_KERNEL_TRAP 8 /* Jumped to kernel code page. */
typedef void ARMWriteCPFunc(void *opaque, int cp_info,
int srcreg, int operand, uint32_t value);
@@ -98,6 +99,7 @@
uint32_t c9_data;
uint32_t c13_fcse; /* FCSE PID. */
uint32_t c13_context; /* Context ID. */
+ uint32_t c13_tls; /* Thread ID */
uint32_t c15_cpar; /* XScale Coprocessor Access Register */
uint32_t c15_ticonfig; /* TI925T configuration byte. */
uint32_t c15_i_max; /* Maximum D-cache dirty line index. */
@@ -174,6 +176,15 @@
int cpu_arm_signal_handler(int host_signum, void *pinfo,
void *puc);
+void cpu_lock(void);
+void cpu_unlock(void);
+#if defined(USE_NPTL)
+static inline void cpu_set_tls(CPUARMState *env, void *newtls)
+{
+ env->cp15.c13_tls = (uint32_t)(long)newtls;
+}
+#endif
+
#define CPSR_M (0x1f)
#define CPSR_T (1 << 5)
#define CPSR_F (1 << 6)
@@ -185,7 +196,11 @@
#define CPSR_J (1 << 24)
#define CPSR_IT_0_1 (3 << 25)
#define CPSR_Q (1 << 27)
-#define CPSR_NZCV (0xf << 28)
+#define CPSR_V (1 << 28)
+#define CPSR_C (1 << 29)
+#define CPSR_Z (1 << 30)
+#define CPSR_N (1 << 31)
+#define CPSR_NZCV (CPSR_N | CPSR_Z | CPSR_C | CPSR_V)
#define CACHED_CPSR_BITS (CPSR_T | CPSR_Q | CPSR_NZCV)
/* Return the current CPSR value. */
Index: qemu/target-arm/exec.h
===================================================================
--- qemu.orig/target-arm/exec.h 2007-10-15 13:52:07.000000000 -0600
+++ qemu/target-arm/exec.h 2007-10-15 13:52:13.000000000 -0600
@@ -68,8 +68,6 @@
/* In op_helper.c */
-void cpu_lock(void);
-void cpu_unlock(void);
void helper_set_cp(CPUState *, uint32_t, uint32_t);
uint32_t helper_get_cp(CPUState *, uint32_t);
void helper_set_cp15(CPUState *, uint32_t, uint32_t);
Index: qemu/target-arm/op.c
===================================================================
--- qemu.orig/target-arm/op.c 2007-10-15 13:52:07.000000000 -0600
+++ qemu/target-arm/op.c 2007-10-15 13:52:13.000000000 -0600
@@ -891,6 +891,12 @@
cpu_loop_exit();
}
+void OPPROTO op_kernel_trap(void)
+{
+ env->exception_index = EXCP_KERNEL_TRAP;
+ cpu_loop_exit();
+}
+
/* VFP support. We follow the convention used for VFP instrunctions:
Single precition routines have a "s" suffix, double precision a
"d" suffix. */
Index: qemu/target-arm/op_mem.h
===================================================================
--- qemu.orig/target-arm/op_mem.h 2007-10-15 13:52:07.000000000 -0600
+++ qemu/target-arm/op_mem.h 2007-10-15 13:52:13.000000000 -0600
@@ -1,5 +1,6 @@
/* ARM memory operations. */
+void helper_ld(uint32_t);
/* Load from address T1 into T0. */
#define MEM_LD_OP(name) \
void OPPROTO glue(op_ld##name,MEMSUFFIX)(void) \
Index: qemu/target-arm/translate.c
===================================================================
--- qemu.orig/target-arm/translate.c 2007-10-15 13:52:07.000000000 -0600
+++ qemu/target-arm/translate.c 2007-10-15 13:52:13.000000000 -0600
@@ -3555,6 +3555,15 @@
nb_gen_labels = 0;
lj = -1;
do {
+#ifdef CONFIG_USER_ONLY
+ /* Intercept jump to the magic kernel page. */
+ if (dc->pc > 0xffff0000) {
+ gen_op_kernel_trap();
+ dc->is_jmp = DISAS_UPDATE;
+ break;
+ }
+#endif
+
if (env->nb_breakpoints > 0) {
for(j = 0; j < env->nb_breakpoints; j++) {
if (env->breakpoints[j] == dc->pc) {
Index: qemu/arm.ld
===================================================================
--- qemu.orig/arm.ld 2007-10-15 13:52:07.000000000 -0600
+++ qemu/arm.ld 2007-10-15 13:52:13.000000000 -0600
@@ -26,6 +26,10 @@
{ *(.rel.rodata) *(.rel.gnu.linkonce.r*) }
.rela.rodata :
{ *(.rela.rodata) *(.rela.gnu.linkonce.r*) }
+ .rel.tdata : { *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*) }
+ .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
+ .rel.tbss : { *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*) }
+ .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
.rel.got : { *(.rel.got) }
.rela.got : { *(.rela.got) }
.rel.ctors : { *(.rel.ctors) }
Index: qemu/target-ppc/cpu.h
===================================================================
--- qemu.orig/target-ppc/cpu.h 2007-10-15 13:52:07.000000000 -0600
+++ qemu/target-ppc/cpu.h 2007-10-15 13:52:13.000000000 -0600
@@ -589,6 +589,12 @@
void do_interrupt (CPUPPCState *env);
void ppc_hw_interrupt (CPUPPCState *env);
void cpu_loop_exit (void);
+#if defined(USE_NPTL)
+static inline void cpu_set_tls(CPUPPCState *env, void *newtls)
+{
+ env->gpr[2] = (uint32_t)(long)newtls;
+}
+#endif
void dump_stack (CPUPPCState *env);
[-- Attachment #3: 40_tls_i386.patch --]
[-- Type: text/x-patch, Size: 11371 bytes --]
Index: qemu/linux-user/main.c
===================================================================
--- qemu.orig/linux-user/main.c 2007-10-15 13:52:13.000000000 -0600
+++ qemu/linux-user/main.c 2007-10-15 13:52:29.000000000 -0600
@@ -156,7 +156,7 @@
p[1] = tswapl(e2);
}
-uint64_t gdt_table[6];
+uint64_t gdt_table[9];
uint64_t idt_table[256];
/* only dpl matters as we do only user space emulation */
Index: qemu/linux-user/syscall.c
===================================================================
--- qemu.orig/linux-user/syscall.c 2007-10-15 13:52:13.000000000 -0600
+++ qemu/linux-user/syscall.c 2007-10-15 13:52:30.000000000 -0600
@@ -183,6 +183,7 @@
#define __NR_sys_symlinkat __NR_symlinkat
#define __NR_sys_syslog __NR_syslog
#define __NR_sys_tgkill __NR_tgkill
+#define __NR_sys_clone __NR_clone
#define __NR_sys_tkill __NR_tkill
#define __NR_sys_unlinkat __NR_unlinkat
#define __NR_sys_utimensat __NR_utimensat
@@ -258,6 +259,7 @@
#if defined(TARGET_NR_tgkill) && defined(__NR_tgkill)
_syscall3(int,sys_tgkill,int,tgid,int,pid,int,sig)
#endif
+_syscall5(int,sys_clone, int, flags, void *, child_stack, int *, parent_tidptr, struct user_desc *, newtls, int *, child_tidptr)
#if defined(TARGET_NR_tkill) && defined(__NR_tkill)
_syscall2(int,sys_tkill,int,tid,int,sig)
#endif
@@ -2416,6 +2418,81 @@
return ret;
}
+int do_set_thread_area(CPUX86State *env, target_ulong ptr)
+{
+ uint64_t *gdt_table = g2h(env->gdt.base);
+ struct target_modify_ldt_ldt_s ldt_info;
+ struct target_modify_ldt_ldt_s *target_ldt_info;
+ int seg_32bit, contents, read_exec_only, limit_in_pages;
+ int seg_not_present, useable;
+ uint32_t *lp, entry_1, entry_2;
+ int i;
+ SegmentCache *sc = &env->segs[R_GS];
+
+ lock_user_struct(target_ldt_info, ptr, 1);
+ ldt_info.entry_number = tswap32(target_ldt_info->entry_number);
+ ldt_info.base_addr = tswapl(target_ldt_info->base_addr);
+ ldt_info.limit = tswap32(target_ldt_info->limit);
+ ldt_info.flags = tswap32(target_ldt_info->flags);
+ if (ldt_info.entry_number == -1) {
+ for (i=6; i<8; i++)
+ if (gdt_table[i] == 0) {
+ ldt_info.entry_number = i;
+ target_ldt_info->entry_number = tswap32(i);
+ break;
+ }
+ }
+ unlock_user_struct(target_ldt_info, ptr, 0);
+
+ if (ldt_info.entry_number < 6 || ldt_info.entry_number > 8)
+ return -EINVAL;
+ seg_32bit = ldt_info.flags & 1;
+ contents = (ldt_info.flags >> 1) & 3;
+ read_exec_only = (ldt_info.flags >> 3) & 1;
+ limit_in_pages = (ldt_info.flags >> 4) & 1;
+ seg_not_present = (ldt_info.flags >> 5) & 1;
+ useable = (ldt_info.flags >> 6) & 1;
+
+ if (contents == 3) {
+ if (seg_not_present == 0)
+ return -EINVAL;
+ }
+
+ /* NOTE: same code as Linux kernel */
+ /* Allow LDTs to be cleared by the user. */
+ if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
+ if ((contents == 0 &&
+ read_exec_only == 1 &&
+ seg_32bit == 0 &&
+ limit_in_pages == 0 &&
+ seg_not_present == 1 &&
+ useable == 0 )) {
+ entry_1 = 0;
+ entry_2 = 0;
+ goto install;
+ }
+ }
+
+ entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) |
+ (ldt_info.limit & 0x0ffff);
+ entry_2 = (ldt_info.base_addr & 0xff000000) |
+ ((ldt_info.base_addr & 0x00ff0000) >> 16) |
+ (ldt_info.limit & 0xf0000) |
+ ((read_exec_only ^ 1) << 9) |
+ (contents << 10) |
+ ((seg_not_present ^ 1) << 15) |
+ (seg_32bit << 22) |
+ (limit_in_pages << 23) |
+ (useable << 20) |
+ 0x7000;
+
+ /* Install the new entry ... */
+install:
+ lp = (uint32_t *)(gdt_table + ldt_info.entry_number);
+ lp[0] = tswap32(entry_1);
+ lp[1] = tswap32(entry_2);
+ return 0;
+}
#endif /* defined(TARGET_I386) */
/* this stack is the equivalent of the kernel stack associated with a
@@ -2426,40 +2503,62 @@
static spinlock_t nptl_lock = SPIN_LOCK_UNLOCKED;
#endif
-static int clone_func(void *arg)
+static int clone_func(CPUState *cloneenv)
{
- CPUState *env = arg;
#ifdef USE_NPTL
/* Wait until the parent has finshed initializing the tls state. */
while (!spin_trylock(&nptl_lock))
usleep(1);
spin_unlock(&nptl_lock);
#endif
- cpu_loop(env);
+ cpu_loop(cloneenv);
/* never exits */
return 0;
}
+#ifdef __ia64__
+#define clone(...) __clone2(__VA_ARGS__)
+#endif
+
/* do_fork() Must return host values and target errnos (unlike most
* do_*() functions).
*/
int do_fork(CPUState *env, unsigned int flags, target_ulong newsp,
- uint32_t *parent_tidptr, void *newtls,
- uint32_t *child_tidptr)
+ target_ulong parent_tidptr, target_ulong newtls,
+ target_ulong child_tidptr)
{
int ret;
TaskState *ts;
uint8_t *new_stack;
CPUState *new_env;
#ifdef USE_NPTL
+ unsigned long parent_tid=gettid();
+#if defined(TARGET_I386)
+ uint64_t *new_gdt_table;
+#endif
unsigned int nptl_flags;
- if (flags & CLONE_PARENT_SETTID)
- *parent_tidptr = gettid();
+ /* check for invalid combinations */
+ if (((flags & CLONE_PARENT_SETTID) && !parent_tidptr)
+ || ((flags & CLONE_CHILD_SETTID) && !child_tidptr))
+ return -EINVAL;
+
+ if (flags & CLONE_CHILD_SETTID
+ && !access_ok(VERIFY_WRITE, child_tidptr, sizeof(target_ulong)))
+ return -EFAULT;
+
+ if (flags & CLONE_PARENT_SETTID
+ && !access_ok(VERIFY_WRITE, parent_tidptr, sizeof(target_ulong))) {
+ return -EFAULT;
+ if (flags & CLONE_PARENT_SETTID)
+ tput32(parent_tidptr, parent_tid);
+ }
#endif
if (flags & CLONE_VM) {
ts = malloc(sizeof(TaskState) + NEW_STACK_SIZE);
+ if (!ts)
+ return -ENOMEM;
memset(ts, 0, sizeof(TaskState));
new_stack = ts->stack;
ts->used = 1;
@@ -2471,6 +2570,31 @@
#if defined(TARGET_I386)
if (!newsp)
newsp = env->regs[R_ESP];
+#ifdef USE_NPTL
+ new_gdt_table = malloc(9 * 8);
+ if (!new_gdt_table) {
+ free(ts);
+ free(new_env);
+ return -ENOMEM;
+ }
+ /* Copy main GDT table from parent, but clear TLS entries */
+ memcpy(new_gdt_table, g2h(env->gdt.base), 6 * 8);
+ memset(&new_gdt_table[6], 0, 3 * 8);
+ new_env->gdt.base = h2g(new_gdt_table);
+ if (flags & CLONE_SETTLS) {
+ ret = do_set_thread_area(new_env, newtls);
+ if (ret) {
+ free(ts);
+ free(new_env);
+ free(new_gdt_table);
+ return ret;
+ }
+ }
+#endif /* USE_NPTL */
+
+ cpu_x86_load_seg(new_env, R_FS, new_env->segs[R_FS].selector);
+ cpu_x86_load_seg(new_env, R_GS, new_env->segs[R_GS].selector);
+
new_env->regs[R_ESP] = newsp;
new_env->regs[R_EAX] = 0;
#elif defined(TARGET_ARM)
@@ -2540,18 +2664,22 @@
spin_lock(&nptl_lock);
#else
- if (flags & CLONE_NPTL_FLAGS2)
+ if (flags & CLONE_NPTL_FLAGS2) {
+ free(ts);
+ free(new_env);
+#ifdef USE_NPTL
+ free(new_gdt_table);
+#endif
return -EINVAL;
+ }
#endif
-#ifdef __ia64__
- ret = __clone2(clone_func, new_stack + NEW_STACK_SIZE, flags, new_env);
-#else
ret = clone(clone_func, new_stack + NEW_STACK_SIZE, flags, new_env);
-#endif
#ifdef USE_NPTL
- if (ret != -1) {
- if (nptl_flags & CLONE_CHILD_SETTID)
- *child_tidptr = ret;
+ if (ret != -1 && nptl_flags & CLONE_CHILD_SETTID) {
+ if (ret==0) // only in client memory for fork()
+ tput32(child_tidptr, gettid());
+ else if (flags & CLONE_VM) // real threads need it too
+ tput32(child_tidptr, ret);
}
/* Allow the child to continue. */
@@ -2562,7 +2690,7 @@
/* if no CLONE_VM, we consider it is a fork */
if ((flags & ~(CSIGNAL | CLONE_NPTL_FLAGS2)) != 0)
return -EINVAL;
- ret = fork();
+ ret = sys_clone(flags & ~CLONE_NPTL_FLAGS2, 0, g2h(parent_tidptr), NULL, g2h(child_tidptr));
#ifdef USE_NPTL
/* There is a race condition here. The parent process could
theoretically read the TID in the child process before the child
@@ -2570,10 +2698,13 @@
(not implemented) or having *_tidptr to point at a shared memory
mapping. We can't repeat the spinlock hack used above because
the child process gets its own copy of the lock. */
+ if (flags & CLONE_CHILD_SETTID) {
+ if (ret == 0)
+ tput32(child_tidptr, gettid());
+ else if (flags & CLONE_VM)
+ tput32(child_tidptr, ret);
+ }
if (ret == 0) {
- /* Child Process. */
- if (flags & CLONE_CHILD_SETTID)
- *child_tidptr = gettid();
ts = (TaskState *)env->opaque;
if (flags & CLONE_CHILD_CLEARTID)
ts->child_tidptr = child_tidptr;
@@ -2585,6 +2716,10 @@
return ret;
}
+#ifdef __ia64__
+#undef clone
+#endif
+
static target_long do_fcntl(int fd, int cmd, target_ulong arg)
{
struct flock fl;
@@ -2940,7 +3075,7 @@
_mcleanup();
#endif
gdb_exit(cpu_env, arg1);
- /* XXX: should free thread stack and CPU env */
+ /* XXX: should free thread stack, GDT and CPU env */
_exit(arg1);
ret = 0; /* avoid warning */
break;
@@ -5420,12 +5555,12 @@
#ifdef TARGET_NR_set_thread_area
case TARGET_NR_set_thread_area:
#ifdef TARGET_MIPS
- ((CPUMIPSState *) cpu_env)->tls_value = arg1;
- ret = 0;
- break;
+ ((CPUMIPSState *) cpu_env)->tls_value = arg1;
+ ret = 0;
#else
- goto unimplemented_nowarn;
+ ret = get_errno(do_set_thread_area(cpu_env, arg1));
#endif
+ break;
#endif
#ifdef TARGET_NR_get_thread_area
case TARGET_NR_get_thread_area:
Index: qemu/configure
===================================================================
--- qemu.orig/configure 2007-10-15 13:52:13.000000000 -0600
+++ qemu/configure 2007-10-15 13:52:13.000000000 -0600
@@ -1153,7 +1153,7 @@
else
if test "$nptl" = "yes" ; then
case "$target_cpu" in
- arm | armeb | ppc | ppc64)
+ arm | armeb | i386 | ppc | ppc64)
echo "#define USE_NPTL 1" >> $config_h
;;
esac
Index: qemu/target-i386/cpu.h
===================================================================
--- qemu.orig/target-i386/cpu.h 2007-10-15 13:52:06.000000000 -0600
+++ qemu/target-i386/cpu.h 2007-10-15 13:52:13.000000000 -0600
@@ -567,6 +567,9 @@
int cpu_get_pic_interrupt(CPUX86State *s);
/* MSDOS compatibility mode FPU exception support */
void cpu_set_ferr(CPUX86State *s);
+#if defined(USE_NPTL)
+#define cpu_set_tls(...) do {} while(0)
+#endif
/* this function must always be used to load data in the segment
cache: it synchronizes the hflags with the segment cache values */
^ permalink raw reply [flat|nested] 7+ messages in thread