From mboxrd@z Thu Jan 1 00:00:00 1970 From: Mukesh Rathor Subject: Linux spin lock enhancement on xen Date: Mon, 16 Aug 2010 18:33:57 -0700 Message-ID: <20100816183357.08623c4c@mantra.us.oracle.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="MP_/.g89yXnNivP59eIJxZh=W9N" Return-path: List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xensource.com Errors-To: xen-devel-bounces@lists.xensource.com To: "Xen-devel@lists.xensource.com" List-Id: xen-devel@lists.xenproject.org --MP_/.g89yXnNivP59eIJxZh=W9N Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Content-Disposition: inline Hi guys, Check out the attached patches. I changed the spin lock semantics so the lock contains the vcpu id of the vcpu holding it. This then tells xen to make that vcpu runnable if not already running: Linux: spin_lock() if (try_lock() == failed) loop X times if (try_lock() == failed) sched_op_yield_to(vcpu_num of holder) start again; endif endif Xen: sched_op_yield_to: if (vcpu_running(vcpu_num arg)) do nothing else vcpu_kick(vcpu_num arg) do_yield() endif In my worst case test scenario, I get about 20-36% improvement when the system is two to three times over provisioned. Please provide any feedback. I would like to submit official patch for SCHEDOP_yield_to in xen. thanks, Mukesh --MP_/.g89yXnNivP59eIJxZh=W9N Content-Type: text/x-patch Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename=spin-lin.diff diff --git a/Makefile b/Makefile index 8b0b42f..d8d1dbb 100644 --- a/Makefile +++ b/Makefile @@ -303,7 +303,8 @@ LINUXINCLUDE := -Iinclude \ $(if $(KBUILD_SRC),-Iinclude2 -I$(srctree)/include) \ -include include/linux/autoconf.h -CPPFLAGS := -D__KERNEL__ $(LINUXINCLUDE) +# CPPFLAGS := -D__KERNEL__ $(LINUXINCLUDE) +CPPFLAGS := -D__KERNEL__ $(LINUXINCLUDE) -D _XEN_SPIN_LOCK CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -fno-common -Wstrict-prototypes -Wundef -Werror-implicit-function-declaration -fno-delete-null-pointer-checks diff --git a/include/asm-i386/mach-xen/asm/hypervisor.h b/include/asm-i386/mach-xen/asm/hypervisor.h index 89cde62..a3806f1 100644 --- a/include/asm-i386/mach-xen/asm/hypervisor.h +++ b/include/asm-i386/mach-xen/asm/hypervisor.h @@ -143,6 +143,15 @@ HYPERVISOR_yield( } static inline int +HYPERVISOR_yield_to(uint vcpu) +{ + struct sched_yield_to yield_to = { .version = 1, .vcpu_id = vcpu }; + int rc = HYPERVISOR_sched_op(SCHEDOP_yield_to, &yield_to); + /* TBD: compat */ + return rc; +} + +static inline int HYPERVISOR_block( void) { diff --git a/include/asm-x86_64/spinlock.h b/include/asm-x86_64/spinlock.h index a8e3d89..c76e20f 100644 --- a/include/asm-x86_64/spinlock.h +++ b/include/asm-x86_64/spinlock.h @@ -16,6 +16,81 @@ * (the type definitions are in asm/spinlock_types.h) */ +#ifdef _XEN_SPIN_LOCK +#include + +#define __raw_spin_is_locked(x) \ + (*(volatile signed int *)(&(x)->slock) >= 0) + +static inline int _attempt_raw_spin_lock(raw_spinlock_t *lock) +{ + const int COUNTMAX = 10000, myid=read_pda(cpunumber); + int oldval; + + asm volatile + ("1: movsxl %1, %%rax \n" + " cmpq $0, %%rax \n" + " jge 4f \n" + "2: \n" + LOCK_PREFIX " cmpxchgl %k2, %1 \n" + " jnz 4f \n" + "3: /* exit */ \n" + LOCK_SECTION_START("") + "4: xor %%rdx, %%rdx \n" + "6: inc %%rdx \n" + " cmpl %k3, %%edx \n" + " jge 3b \n" + " pause \n" + " movsxl %1, %%rax \n" + " cmpq $0, %%rax \n" + " jge 6b \n" + " jmp 2b \n" + LOCK_SECTION_END + + : "=&a" (oldval) + : "m" (lock->slock), "c" (myid), "g" (COUNTMAX) + : "rdx", "memory", "cc" + ); + return oldval; +} + +static inline void __raw_spin_lock(raw_spinlock_t *lock) +{ + int rc, old_lock_holder; + + do { + old_lock_holder = _attempt_raw_spin_lock(lock); + + if (old_lock_holder >= 0) + if ((rc=HYPERVISOR_yield_to(old_lock_holder)) != 0) + printk("XEN: Yield failed. rc:%d\n", rc); + } while (old_lock_holder != -1); +} + +#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) + +static inline int __raw_spin_trylock(raw_spinlock_t *lock) +{ + int oldval, myid = read_pda(cpunumber); + + __asm__ __volatile__ ( + "movl $-1, %%eax \n" + LOCK_PREFIX " cmpxchgl %k2, %1 \n" + : "=&a" (oldval) + : "m" (lock->slock), "c" (myid) + : "memory", "cc" + ); + + return (oldval == -1); +} + +static inline void __raw_spin_unlock(raw_spinlock_t *lock) +{ + __asm__ __volatile__ ("movl $-1, %0" : "=m"(lock->slock) : : "memory"); +} + +#else + #define __raw_spin_is_locked(x) \ (*(volatile signed int *)(&(x)->slock) <= 0) @@ -64,6 +139,8 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock) ); } +#endif + #define __raw_spin_unlock_wait(lock) \ do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0) @@ -124,4 +201,5 @@ static inline void __raw_write_unlock(raw_rwlock_t *rw) : "=m" (rw->lock) : : "memory"); } + #endif /* __ASM_SPINLOCK_H */ diff --git a/include/asm-x86_64/spinlock_types.h b/include/asm-x86_64/spinlock_types.h index 59efe84..6fb8da0 100644 --- a/include/asm-x86_64/spinlock_types.h +++ b/include/asm-x86_64/spinlock_types.h @@ -9,7 +9,11 @@ typedef struct { volatile unsigned int slock; } raw_spinlock_t; +#ifdef _XEN_SPIN_LOCK +#define __RAW_SPIN_LOCK_UNLOCKED { -1 } +#else #define __RAW_SPIN_LOCK_UNLOCKED { 1 } +#endif typedef struct { volatile unsigned int lock; diff --git a/include/xen/interface/sched.h b/include/xen/interface/sched.h index abf11cc..dc60001 100644 --- a/include/xen/interface/sched.h +++ b/include/xen/interface/sched.h @@ -90,6 +90,17 @@ DEFINE_XEN_GUEST_HANDLE(sched_remote_shutdown_t); #define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */ #define SHUTDOWN_crash 3 /* Tell controller we've crashed. */ + +/* + * Voluntarily yield the CPU to another given vcpu + * @arg == vcpu info. + */ +#define SCHEDOP_yield_to 5 +struct sched_yield_to { + unsigned int version; + unsigned int vcpu_id; +}; + #endif /* __XEN_PUBLIC_SCHED_H__ */ /* --MP_/.g89yXnNivP59eIJxZh=W9N Content-Type: text/x-patch Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename=spin-xen.diff diff -r c840095b9359 xen/common/schedule.c --- a/xen/common/schedule.c Mon Jul 26 03:55:45 2010 -0700 +++ b/xen/common/schedule.c Mon Aug 16 18:33:07 2010 -0700 @@ -627,6 +627,30 @@ ret_t do_sched_op(int cmd, XEN_GUEST_HAN break; } + case SCHEDOP_yield_to: + { + struct sched_yield_to yld_s; + struct vcpu *vp; + struct domain *dp = current->domain; + + ret = -EFAULT; + if ( copy_from_guest(&yld_s, arg, 1) ) + break; + + ret = -EINVAL; + if (is_idle_vcpu(current) || yld_s.vcpu_id > dp->max_vcpus) + break; + + vp = dp->vcpu[yld_s.vcpu_id]; + if (!vp->is_running) { + vcpu_kick(dp->vcpu[yld_s.vcpu_id]); + ret = do_yield(); + } else + ret = 0; + + break; + } + case SCHEDOP_block: { ret = do_block(); diff -r c840095b9359 xen/include/public/sched.h --- a/xen/include/public/sched.h Mon Jul 26 03:55:45 2010 -0700 +++ b/xen/include/public/sched.h Mon Aug 16 18:33:07 2010 -0700 @@ -108,6 +108,17 @@ DEFINE_XEN_GUEST_HANDLE(sched_remote_shu #define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */ #define SHUTDOWN_crash 3 /* Tell controller we've crashed. */ + +/* + * * Voluntarily yield the CPU to another given vcpu + * * @arg == vcpu info. + * */ +#define SCHEDOP_yield_to 5 +struct sched_yield_to { + unsigned int version; + unsigned int vcpu_id; +}; + #endif /* __XEN_PUBLIC_SCHED_H__ */ /* --MP_/.g89yXnNivP59eIJxZh=W9N Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Disposition: inline _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel --MP_/.g89yXnNivP59eIJxZh=W9N--