From: Mukesh Rathor <mukesh.rathor@oracle.com>
To: "Xen-devel@lists.xensource.com" <Xen-devel@lists.xensource.com>
Subject: Linux spin lock enhancement on xen
Date: Mon, 16 Aug 2010 18:33:57 -0700 [thread overview]
Message-ID: <20100816183357.08623c4c@mantra.us.oracle.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 859 bytes --]
Hi guys,
Check out the attached patches. I changed the spin lock semantics so the
lock contains the vcpu id of the vcpu holding it. This then tells xen
to make that vcpu runnable if not already running:
Linux:
spin_lock()
if (try_lock() == failed)
loop X times
if (try_lock() == failed)
sched_op_yield_to(vcpu_num of holder)
start again;
endif
endif
Xen:
sched_op_yield_to:
if (vcpu_running(vcpu_num arg))
do nothing
else
vcpu_kick(vcpu_num arg)
do_yield()
endif
In my worst case test scenario, I get about 20-36% improvement when the
system is two to three times over provisioned.
Please provide any feedback. I would like to submit official patch for
SCHEDOP_yield_to in xen.
thanks,
Mukesh
[-- Attachment #2: spin-lin.diff --]
[-- Type: text/x-patch, Size: 5303 bytes --]
diff --git a/Makefile b/Makefile
index 8b0b42f..d8d1dbb 100644
--- a/Makefile
+++ b/Makefile
@@ -303,7 +303,8 @@ LINUXINCLUDE := -Iinclude \
$(if $(KBUILD_SRC),-Iinclude2 -I$(srctree)/include) \
-include include/linux/autoconf.h
-CPPFLAGS := -D__KERNEL__ $(LINUXINCLUDE)
+# CPPFLAGS := -D__KERNEL__ $(LINUXINCLUDE)
+CPPFLAGS := -D__KERNEL__ $(LINUXINCLUDE) -D _XEN_SPIN_LOCK
CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
-fno-strict-aliasing -fno-common -Wstrict-prototypes -Wundef -Werror-implicit-function-declaration -fno-delete-null-pointer-checks
diff --git a/include/asm-i386/mach-xen/asm/hypervisor.h b/include/asm-i386/mach-xen/asm/hypervisor.h
index 89cde62..a3806f1 100644
--- a/include/asm-i386/mach-xen/asm/hypervisor.h
+++ b/include/asm-i386/mach-xen/asm/hypervisor.h
@@ -143,6 +143,15 @@ HYPERVISOR_yield(
}
static inline int
+HYPERVISOR_yield_to(uint vcpu)
+{
+ struct sched_yield_to yield_to = { .version = 1, .vcpu_id = vcpu };
+ int rc = HYPERVISOR_sched_op(SCHEDOP_yield_to, &yield_to);
+ /* TBD: compat */
+ return rc;
+}
+
+static inline int
HYPERVISOR_block(
void)
{
diff --git a/include/asm-x86_64/spinlock.h b/include/asm-x86_64/spinlock.h
index a8e3d89..c76e20f 100644
--- a/include/asm-x86_64/spinlock.h
+++ b/include/asm-x86_64/spinlock.h
@@ -16,6 +16,81 @@
* (the type definitions are in asm/spinlock_types.h)
*/
+#ifdef _XEN_SPIN_LOCK
+#include <asm/hypervisor.h>
+
+#define __raw_spin_is_locked(x) \
+ (*(volatile signed int *)(&(x)->slock) >= 0)
+
+static inline int _attempt_raw_spin_lock(raw_spinlock_t *lock)
+{
+ const int COUNTMAX = 10000, myid=read_pda(cpunumber);
+ int oldval;
+
+ asm volatile
+ ("1: movsxl %1, %%rax \n"
+ " cmpq $0, %%rax \n"
+ " jge 4f \n"
+ "2: \n"
+ LOCK_PREFIX " cmpxchgl %k2, %1 \n"
+ " jnz 4f \n"
+ "3: /* exit */ \n"
+ LOCK_SECTION_START("")
+ "4: xor %%rdx, %%rdx \n"
+ "6: inc %%rdx \n"
+ " cmpl %k3, %%edx \n"
+ " jge 3b \n"
+ " pause \n"
+ " movsxl %1, %%rax \n"
+ " cmpq $0, %%rax \n"
+ " jge 6b \n"
+ " jmp 2b \n"
+ LOCK_SECTION_END
+
+ : "=&a" (oldval)
+ : "m" (lock->slock), "c" (myid), "g" (COUNTMAX)
+ : "rdx", "memory", "cc"
+ );
+ return oldval;
+}
+
+static inline void __raw_spin_lock(raw_spinlock_t *lock)
+{
+ int rc, old_lock_holder;
+
+ do {
+ old_lock_holder = _attempt_raw_spin_lock(lock);
+
+ if (old_lock_holder >= 0)
+ if ((rc=HYPERVISOR_yield_to(old_lock_holder)) != 0)
+ printk("XEN: Yield failed. rc:%d\n", rc);
+ } while (old_lock_holder != -1);
+}
+
+#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
+
+static inline int __raw_spin_trylock(raw_spinlock_t *lock)
+{
+ int oldval, myid = read_pda(cpunumber);
+
+ __asm__ __volatile__ (
+ "movl $-1, %%eax \n"
+ LOCK_PREFIX " cmpxchgl %k2, %1 \n"
+ : "=&a" (oldval)
+ : "m" (lock->slock), "c" (myid)
+ : "memory", "cc"
+ );
+
+ return (oldval == -1);
+}
+
+static inline void __raw_spin_unlock(raw_spinlock_t *lock)
+{
+ __asm__ __volatile__ ("movl $-1, %0" : "=m"(lock->slock) : : "memory");
+}
+
+#else
+
#define __raw_spin_is_locked(x) \
(*(volatile signed int *)(&(x)->slock) <= 0)
@@ -64,6 +139,8 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock)
);
}
+#endif
+
#define __raw_spin_unlock_wait(lock) \
do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0)
@@ -124,4 +201,5 @@ static inline void __raw_write_unlock(raw_rwlock_t *rw)
: "=m" (rw->lock) : : "memory");
}
+
#endif /* __ASM_SPINLOCK_H */
diff --git a/include/asm-x86_64/spinlock_types.h b/include/asm-x86_64/spinlock_types.h
index 59efe84..6fb8da0 100644
--- a/include/asm-x86_64/spinlock_types.h
+++ b/include/asm-x86_64/spinlock_types.h
@@ -9,7 +9,11 @@ typedef struct {
volatile unsigned int slock;
} raw_spinlock_t;
+#ifdef _XEN_SPIN_LOCK
+#define __RAW_SPIN_LOCK_UNLOCKED { -1 }
+#else
#define __RAW_SPIN_LOCK_UNLOCKED { 1 }
+#endif
typedef struct {
volatile unsigned int lock;
diff --git a/include/xen/interface/sched.h b/include/xen/interface/sched.h
index abf11cc..dc60001 100644
--- a/include/xen/interface/sched.h
+++ b/include/xen/interface/sched.h
@@ -90,6 +90,17 @@ DEFINE_XEN_GUEST_HANDLE(sched_remote_shutdown_t);
#define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */
#define SHUTDOWN_crash 3 /* Tell controller we've crashed. */
+
+/*
+ * Voluntarily yield the CPU to another given vcpu
+ * @arg == vcpu info.
+ */
+#define SCHEDOP_yield_to 5
+struct sched_yield_to {
+ unsigned int version;
+ unsigned int vcpu_id;
+};
+
#endif /* __XEN_PUBLIC_SCHED_H__ */
/*
[-- Attachment #3: spin-xen.diff --]
[-- Type: text/x-patch, Size: 1540 bytes --]
diff -r c840095b9359 xen/common/schedule.c
--- a/xen/common/schedule.c Mon Jul 26 03:55:45 2010 -0700
+++ b/xen/common/schedule.c Mon Aug 16 18:33:07 2010 -0700
@@ -627,6 +627,30 @@ ret_t do_sched_op(int cmd, XEN_GUEST_HAN
break;
}
+ case SCHEDOP_yield_to:
+ {
+ struct sched_yield_to yld_s;
+ struct vcpu *vp;
+ struct domain *dp = current->domain;
+
+ ret = -EFAULT;
+ if ( copy_from_guest(&yld_s, arg, 1) )
+ break;
+
+ ret = -EINVAL;
+ if (is_idle_vcpu(current) || yld_s.vcpu_id > dp->max_vcpus)
+ break;
+
+ vp = dp->vcpu[yld_s.vcpu_id];
+ if (!vp->is_running) {
+ vcpu_kick(dp->vcpu[yld_s.vcpu_id]);
+ ret = do_yield();
+ } else
+ ret = 0;
+
+ break;
+ }
+
case SCHEDOP_block:
{
ret = do_block();
diff -r c840095b9359 xen/include/public/sched.h
--- a/xen/include/public/sched.h Mon Jul 26 03:55:45 2010 -0700
+++ b/xen/include/public/sched.h Mon Aug 16 18:33:07 2010 -0700
@@ -108,6 +108,17 @@ DEFINE_XEN_GUEST_HANDLE(sched_remote_shu
#define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */
#define SHUTDOWN_crash 3 /* Tell controller we've crashed. */
+
+/*
+ * * Voluntarily yield the CPU to another given vcpu
+ * * @arg == vcpu info.
+ * */
+#define SCHEDOP_yield_to 5
+struct sched_yield_to {
+ unsigned int version;
+ unsigned int vcpu_id;
+};
+
#endif /* __XEN_PUBLIC_SCHED_H__ */
/*
[-- Attachment #4: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
next reply other threads:[~2010-08-17 1:33 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-08-17 1:33 Mukesh Rathor [this message]
2010-08-17 7:33 ` Linux spin lock enhancement on xen Keir Fraser
2010-08-17 7:53 ` Jan Beulich
2010-08-18 1:58 ` Mukesh Rathor
2010-08-17 14:34 ` Ky Srinivasan
2010-08-18 1:58 ` Mukesh Rathor
2010-08-17 17:43 ` Jeremy Fitzhardinge
2010-08-18 1:58 ` Mukesh Rathor
2010-08-18 16:37 ` Jeremy Fitzhardinge
2010-08-18 17:09 ` Keir Fraser
2010-08-19 2:52 ` Mukesh Rathor
2010-08-24 8:08 ` George Dunlap
2010-08-24 8:20 ` Keir Fraser
2010-08-24 8:43 ` George Dunlap
2010-08-24 8:48 ` Jan Beulich
2010-08-24 9:09 ` George Dunlap
2010-08-24 13:25 ` Jan Beulich
2010-08-24 16:11 ` George Dunlap
2010-08-26 14:08 ` Tim Deegan
2010-08-25 1:03 ` Dong, Eddie
2010-08-26 2:13 ` Mukesh Rathor
2010-08-19 2:52 ` Mukesh Rathor
2010-08-23 21:33 ` Jeremy Fitzhardinge
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100816183357.08623c4c@mantra.us.oracle.com \
--to=mukesh.rathor@oracle.com \
--cc=Xen-devel@lists.xensource.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).