* Re: [PATCH] xen, xen-sparse: modify spinlocks to use directed yield
2005-05-20 16:54 [PATCH] xen, xen-sparse: modify spinlocks to use directed yield Ryan Harper
@ 2005-05-20 18:16 ` Ryan Harper
2005-05-20 18:17 ` Ryan Harper
2005-06-03 19:40 ` [PATCH] Yield to VCPU hcall, spinlock yielding Ryan Harper
2 siblings, 0 replies; 4+ messages in thread
From: Ryan Harper @ 2005-05-20 18:16 UTC (permalink / raw)
To: Ryan Harper; +Cc: xen-devel
* Ryan Harper <ryanh@us.ibm.com> [2005-05-20 11:55]:
> The following patch creates a new hypercall, do_confer() which allows a
Oops. I left in fixes to my domU config which doesnt exist in the main
tree. I've got that part removed in this version.
--
Ryan Harper
Software Engineer; Linux Technology Center
IBM Corp., Austin, Tx
(512) 838-9253 T/L: 678-9253
ryanh@us.ibm.com
diffstat output:
linux-2.6.11-xen-sparse/arch/i386/lib/Makefile | 11
linux-2.6.11-xen-sparse/arch/i386/lib/locks.c | 76 +++++
linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S | 2
linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h | 16 +
linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h | 140 ++++++++---
xen/arch/x86/domain.c | 2
xen/arch/x86/x86_32/entry.S | 1
xen/common/domain.c | 1
xen/common/schedule.c | 69 +++++
xen/include/public/xen.h | 11
xen/include/xen/sched.h | 9
11 files changed, 300 insertions(+), 38 deletions(-)
Signed-off-by: Ryan Harper <ryanh@us.ibm.com>
---
diff -urN b/linux-2.6.11-xen-sparse/arch/i386/lib/locks.c confer/linux-2.6.11-xen-sparse/arch/i386/lib/locks.c
--- b/linux-2.6.11-xen-sparse/arch/i386/lib/locks.c 1969-12-31 18:00:00.000000000 -0600
+++ confer/linux-2.6.11-xen-sparse/arch/i386/lib/locks.c 2005-05-20 10:37:58.300767080 -0500
@@ -0,0 +1,76 @@
+/*
+ * Spin and read/write lock operations.
+ *
+ * Copyright (C) 2001-2004 Paul Mackerras <paulus@au.ibm.com>, IBM
+ * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
+ * Copyright (C) 2002 Dave Engebretsen <engebret@us.ibm.com>, IBM
+ * Rework to support virtual processors
+ * Copyright (C) 2005 Ryan Harper <ryanh@us.ibm.com>, IBM
+ * Rework for Xen on x86
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/stringify.h>
+#include <asm/hypercall.h>
+#include <asm/processor.h>
+
+/* waiting for a spinlock... */
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+void __spin_yield(spinlock_t *lock)
+{
+ unsigned int lock_value, holder_cpu, yield_count;
+ shared_info_t *s = HYPERVISOR_shared_info;
+
+ lock_value = lock->slock;
+ if (lock_value == 1)
+ return;
+ holder_cpu = lock->cpu;
+ BUG_ON(holder_cpu >= NR_CPUS);
+ yield_count = s->vcpu_data[holder_cpu].yield_count;
+ if ((yield_count & 1) == 0)
+ return; /* virtual cpu is currently running */
+ rmb();
+ if (lock->slock != lock_value)
+ return; /* something has changed */
+ HYPERVISOR_confer(holder_cpu, yield_count);
+}
+
+void __rw_yield(rwlock_t *rw)
+{
+ unsigned int lock_value, holder_cpu, yield_count;
+ shared_info_t *s = HYPERVISOR_shared_info;
+
+ lock_value = rw->lock;
+ if (lock_value == RW_LOCK_BIAS)
+ return;
+ holder_cpu = rw->cpu;
+ BUG_ON(holder_cpu >= NR_CPUS);
+ yield_count = s->vcpu_data[holder_cpu].yield_count;
+ if ((yield_count & 1) == 0)
+ return; /* virtual cpu is currently running */
+ rmb();
+ if (rw->lock != lock_value)
+ return; /* something has changed */
+ HYPERVISOR_confer(holder_cpu, yield_count);
+}
+
+void spin_unlock_wait(spinlock_t *lock)
+{
+ while (spin_is_locked(lock)) {
+ cpu_relax();
+ if (SHARED_PROCESSOR)
+ __spin_yield(lock);
+ }
+ cpu_relax();
+}
+EXPORT_SYMBOL(spin_unlock_wait);
+#endif
diff -urN b/linux-2.6.11-xen-sparse/arch/i386/lib/Makefile confer/linux-2.6.11-xen-sparse/arch/i386/lib/Makefile
--- b/linux-2.6.11-xen-sparse/arch/i386/lib/Makefile 1969-12-31 18:00:00.000000000 -0600
+++ confer/linux-2.6.11-xen-sparse/arch/i386/lib/Makefile 2005-05-20 10:37:58.301766928 -0500
@@ -0,0 +1,11 @@
+#
+# Makefile for i386-specific library files..
+#
+
+
+lib-y = checksum.o delay.o usercopy.o getuser.o memcpy.o strstr.o \
+ bitops.o
+
+lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
+lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
+lib-$(CONFIG_XEN) += locks.o
diff -urN b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S confer/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S
--- b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S 2005-05-19 22:20:32.000000000 -0500
+++ confer/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S 2005-05-20 10:37:58.304766472 -0500
@@ -80,7 +80,7 @@
#define evtchn_upcall_pending /* 0 */
#define evtchn_upcall_mask 1
-#define sizeof_vcpu_shift 3
+#define sizeof_vcpu_shift 4
#ifdef CONFIG_SMP
#define preempt_disable(reg) incl TI_preempt_count(reg)
diff -urN b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h confer/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h
--- b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h 2005-05-19 22:20:32.000000000 -0500
+++ confer/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h 2005-05-20 10:37:58.306766168 -0500
@@ -517,4 +517,20 @@
return ret;
}
+static inline int
+HYPERVISOR_confer(
+ unsigned int vcpu, unsigned int yield_count)
+{
+ int ret;
+ unsigned long ign1, ign2;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2)
+ : "0" (__HYPERVISOR_confer), "1" (vcpu), "2" (yield_count)
+ : "memory");
+
+ return ret;
+}
+
#endif /* __HYPERCALL_H__ */
diff -urN b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h confer/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h
--- b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h 2005-05-19 22:20:14.000000000 -0500
+++ confer/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h 2005-05-20 10:37:58.307766016 -0500
@@ -22,10 +22,36 @@
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+ unsigned int cpu;
+#endif
} spinlock_t;
#define SPINLOCK_MAGIC 0xdead4ead
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+typedef struct {
+ volatile unsigned int lock;
+#ifdef CONFIG_DEBUG_SPINLOCK
+ unsigned magic;
+#endif
+#ifdef CONFIG_PREEMPT
+ unsigned int break_lock;
+#endif
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+ unsigned int cpu;
+#endif
+} rwlock_t;
+
#ifdef CONFIG_DEBUG_SPINLOCK
#define SPINLOCK_MAGIC_INIT , SPINLOCK_MAGIC
#else
@@ -44,7 +70,20 @@
*/
#define spin_is_locked(x) (*(volatile signed char *)(&(x)->slock) <= 0)
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+#include <linux/smp.h>
+#define SPINLOCK_CPU (smp_processor_id())
+/* We only yield to the hypervisor if we are in shared processor mode */
+#define SHARED_PROCESSOR (HYPERVISOR_shared_info->shproc == 0)
+extern void __spin_yield(spinlock_t *lock);
+extern void __rw_yield(rwlock_t *rw);
+extern void spin_unlock_wait(spinlock_t *lock);
+#else
+#define __spin_yield(x) barrier()
+#define __rw_yield(x) barrier()
+#define SHARED_PROCESSOR 0
#define spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x))
+#endif
#define spin_lock_string \
"\n1:\t" \
@@ -125,6 +164,9 @@
"xchgb %b0,%1"
:"=q" (oldval), "=m" (lock->slock)
:"0" (0) : "memory");
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+ lock->cpu = SPINLOCK_CPU;
+#endif
return oldval > 0;
}
@@ -136,43 +178,55 @@
BUG();
}
#endif
- __asm__ __volatile__(
- spin_lock_string
- :"=m" (lock->slock) : : "memory");
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+ while (1) {
+ if ( likely(_raw_spin_trylock(lock)) )
+ break;
+ do {
+ cpu_relax();
+ if (SHARED_PROCESSOR)
+ __spin_yield(lock);
+ } while (likely(spin_is_locked(lock)));
+ cpu_relax();
+ }
+#else
+ __asm__ __volatile__(
+ spin_lock_string
+ :"=m" (lock->slock) : : "memory");
+#endif
}
static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags)
{
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+ unsigned long flags_dis;
+#endif
#ifdef CONFIG_DEBUG_SPINLOCK
if (unlikely(lock->magic != SPINLOCK_MAGIC)) {
printk("eip: %p\n", __builtin_return_address(0));
BUG();
}
#endif
- __asm__ __volatile__(
- spin_lock_string_flags
- :"=m" (lock->slock) : "r" (flags) : "memory");
-}
-
-/*
- * Read-write spinlocks, allowing multiple readers
- * but only one writer.
- *
- * NOTE! it is quite common to have readers in interrupts
- * but no interrupt writers. For those circumstances we
- * can "mix" irq-safe locks - any writer needs to get a
- * irq-safe write-lock, but readers can get non-irqsafe
- * read-locks.
- */
-typedef struct {
- volatile unsigned int lock;
-#ifdef CONFIG_DEBUG_SPINLOCK
- unsigned magic;
-#endif
-#ifdef CONFIG_PREEMPT
- unsigned int break_lock;
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+ while (1) {
+ if ( likely(_raw_spin_trylock(lock)) )
+ break;
+ local_save_flags(flags_dis);
+ local_irq_restore(flags);
+ do {
+ cpu_relax();
+ if (SHARED_PROCESSOR)
+ __spin_yield(lock);
+ } while (likely(spin_is_locked(lock)));
+ cpu_relax();
+ local_irq_restore(flags_dis);
+ }
+#else
+ __asm__ __volatile__(
+ spin_lock_string_flags
+ :"=m" (lock->slock) : "r" (flags) : "memory");
#endif
-} rwlock_t;
+}
#define RWLOCK_MAGIC 0xdeaf1eed
@@ -198,6 +252,18 @@
*/
#define write_can_lock(x) ((x)->lock == RW_LOCK_BIAS)
+static inline int _raw_write_trylock(rwlock_t *lock)
+{
+ atomic_t *count = (atomic_t *)lock;
+ if (atomic_sub_and_test(RW_LOCK_BIAS, count)) {
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+ lock->cpu = SPINLOCK_CPU;
+#endif
+ return 1;
+ }
+ atomic_add(RW_LOCK_BIAS, count);
+ return 0;
+}
/*
* On x86, we implement read-write locks as a 32-bit counter
* with the high bit (sign) being the "contended" bit.
@@ -222,7 +288,20 @@
#ifdef CONFIG_DEBUG_SPINLOCK
BUG_ON(rw->magic != RWLOCK_MAGIC);
#endif
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+ while (1) {
+ if ( likely(_raw_write_trylock(rw)) )
+ break;
+ do {
+ cpu_relax();
+ if (SHARED_PROCESSOR)
+ __rw_yield(rw);
+ } while ( likely(!write_can_lock(rw)));
+ cpu_relax();
+ }
+#else
__build_write_lock(rw, "__write_lock_failed");
+#endif
}
#define _raw_read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
@@ -238,13 +317,6 @@
return 0;
}
-static inline int _raw_write_trylock(rwlock_t *lock)
-{
- atomic_t *count = (atomic_t *)lock;
- if (atomic_sub_and_test(RW_LOCK_BIAS, count))
- return 1;
- atomic_add(RW_LOCK_BIAS, count);
- return 0;
-}
+
#endif /* __ASM_SPINLOCK_H */
diff -urN b/xen/arch/x86/domain.c confer/xen/arch/x86/domain.c
--- b/xen/arch/x86/domain.c 2005-05-19 22:20:28.000000000 -0500
+++ confer/xen/arch/x86/domain.c 2005-05-20 10:38:29.187071648 -0500
@@ -253,6 +253,8 @@
memset(d->shared_info, 0, PAGE_SIZE);
ed->vcpu_info = &d->shared_info->vcpu_data[ed->vcpu_id];
ed->cpumap = CPUMAP_RUNANYWHERE;
+ /* default vcpus to sharing physical cpus */
+ d->shared_info->shproc = 1;
SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d);
machine_to_phys_mapping[virt_to_phys(d->shared_info) >>
PAGE_SHIFT] = INVALID_M2P_ENTRY;
diff -urN b/xen/arch/x86/x86_32/entry.S confer/xen/arch/x86/x86_32/entry.S
--- b/xen/arch/x86/x86_32/entry.S 2005-05-19 22:20:33.000000000 -0500
+++ confer/xen/arch/x86/x86_32/entry.S 2005-05-20 10:37:58.353759024 -0500
@@ -749,6 +749,7 @@
.long do_boot_vcpu
.long do_ni_hypercall /* 25 */
.long do_mmuext_op
+ .long do_confer
.rept NR_hypercalls-((.-hypercall_table)/4)
.long do_ni_hypercall
.endr
diff -urN b/xen/common/domain.c confer/xen/common/domain.c
--- b/xen/common/domain.c 2005-05-19 22:20:15.000000000 -0500
+++ confer/xen/common/domain.c 2005-05-20 10:37:58.354758872 -0500
@@ -289,6 +289,7 @@
atomic_set(&ed->pausecnt, 0);
ed->cpumap = CPUMAP_RUNANYWHERE;
+ set_bit(_VCPUF_canconfer, &ed->vcpu_flags);
memcpy(&ed->arch, &idle0_exec_domain.arch, sizeof(ed->arch));
diff -urN b/xen/common/schedule.c confer/xen/common/schedule.c
--- b/xen/common/schedule.c 2005-05-19 22:20:30.000000000 -0500
+++ confer/xen/common/schedule.c 2005-05-20 10:45:41.493351104 -0500
@@ -224,6 +224,11 @@
spin_lock_irqsave(&schedule_data[ed->processor].schedule_lock, flags);
if ( likely(domain_runnable(ed)) )
{
+ /* mark current's confer state */
+ if ( test_bit(_VCPUF_conferring, ¤t->vcpu_flags) ) {
+ clear_bit(_VCPUF_conferring, ¤t->vcpu_flags);
+ set_bit(_VCPUF_conferred, ¤t->vcpu_flags);
+ }
SCHED_OP(wake, ed);
#ifdef WAKE_HISTO
ed->wokenup = NOW();
@@ -273,6 +278,54 @@
return 0;
}
+/* Confer control to another vcpu */
+long do_confer(unsigned int vcpu, unsigned int yield_count)
+{
+ struct domain *d = current->domain;
+
+ /* Validate CONFER prereqs:
+ * - vcpu is within bounds
+ * - vcpu is a valid in this domain
+ * - current has not already conferred its slice to vcpu
+ * - vcpu is not already running
+ * - designated vcpu's yield_count matches value from call
+ *
+ * of all are ok, then set conferred value and enter scheduler
+ */
+
+ if (vcpu > MAX_VIRT_CPUS)
+ return 0;
+
+ if (d->exec_domain[vcpu] == NULL)
+ return 0;
+
+ if (!test_bit(_VCPUF_canconfer, ¤t->vcpu_flags))
+ return 0;
+
+ /* even counts indicate a running vcpu, odd is preempted/conferred */
+ /* don't confer if holder is currently running */
+ if ((d->exec_domain[vcpu]->vcpu_info->yield_count & 1) == 0)
+ return 0;
+
+ if (d->exec_domain[vcpu]->vcpu_info->yield_count != yield_count)
+ return 0;
+
+ /*
+ * set current's state to conferring, wake target
+ */
+ clear_bit(_VCPUF_canconfer, ¤t->vcpu_flags);
+ set_bit(_VCPUF_conferring, ¤t->vcpu_flags);
+ domain_wake(d->exec_domain[vcpu]);
+
+ /* request scheduling for woken domain */
+ raise_softirq(SCHEDULE_SOFTIRQ);
+
+ /* give up my timeslice */
+ do_yield();
+
+ return 0;
+}
+
/*
* Demultiplex scheduler-related hypercalls.
*/
@@ -441,7 +494,15 @@
r_time = next_slice.time;
next = next_slice.task;
-
+
+ /*
+ * always clear conferred state so this vcpu can confer during its slice
+ * since it can confer, clear all other confer state
+ */
+ set_bit(_VCPUF_canconfer, &next->vcpu_flags);
+ clear_bit(_VCPUF_conferring, &next->vcpu_flags);
+ clear_bit(_VCPUF_conferred, &next->vcpu_flags);
+
schedule_data[cpu].curr = next;
next->lastschd = now;
@@ -455,6 +516,12 @@
spin_unlock_irq(&schedule_data[cpu].schedule_lock);
+ /* bump vcpu yield_count when controlling domain is not-idle */
+ if ( !is_idle_task(prev->domain) )
+ prev->vcpu_info->yield_count++;
+ if ( !is_idle_task(next->domain) )
+ next->vcpu_info->yield_count++;
+
if ( unlikely(prev == next) ) {
#ifdef ADV_SCHED_HISTO
adv_sched_hist_to_stop(cpu);
diff -urN b/xen/include/public/xen.h confer/xen/include/public/xen.h
--- b/xen/include/public/xen.h 2005-05-19 22:20:11.000000000 -0500
+++ confer/xen/include/public/xen.h 2005-05-20 10:37:58.368756744 -0500
@@ -58,6 +58,7 @@
#define __HYPERVISOR_boot_vcpu 24
#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */
#define __HYPERVISOR_mmuext_op 26
+#define __HYPERVISOR_confer 27
/*
* MULTICALLS
@@ -334,8 +335,11 @@
u8 evtchn_upcall_mask; /* 1 */
u8 pad0, pad1;
u32 evtchn_pending_sel; /* 4 */
- arch_vcpu_info_t arch; /* 8 */
-} PACKED vcpu_info_t; /* 8 + arch */
+ /* Even when vcpu is running, Odd when it is preempted/conferred */
+ u32 yield_count; /* 8 */
+ u32 pad2; /* 12 */
+ arch_vcpu_info_t arch; /* 16 */
+} PACKED vcpu_info_t; /* 16 + arch */
/*
* Xen/kernel shared data -- pointer provided in start_info.
@@ -347,6 +351,9 @@
u32 n_vcpu;
+ /* set if domains' vcpus share physical cpus */
+ int shproc;
+
/*
* A domain can have up to 1024 "event channels" on which it can send
* and receive asynchronous event notifications. There are three classes
diff -urN b/xen/include/xen/sched.h confer/xen/include/xen/sched.h
--- b/xen/include/xen/sched.h 2005-05-19 22:20:07.000000000 -0500
+++ confer/xen/include/xen/sched.h 2005-05-20 10:37:58.378755224 -0500
@@ -358,6 +358,15 @@
/* Initialization completed. */
#define _VCPUF_initialised 8
#define VCPUF_initialised (1UL<<_VCPUF_initialised)
+ /* Able to give time slice to another vcpu */
+#define _VCPUF_canconfer 9
+#define VCPUF_canconfer (1UL<<_VCPUF_canconfer)
+ /* Currently giving time slice to another vcpu */
+#define _VCPUF_conferring 10
+#define VCPUF_conferring (1UL<<_VCPUF_conferring)
+ /* Already given time slice to another vcpu */
+#define _VCPUF_conferred 11
+#define VCPUF_conferred (1UL<<_VCPUF_conferred)
/*
* Per-domain flags (domain_flags).
^ permalink raw reply [flat|nested] 4+ messages in thread* Re: [PATCH] xen, xen-sparse: modify spinlocks to use directed yield
2005-05-20 16:54 [PATCH] xen, xen-sparse: modify spinlocks to use directed yield Ryan Harper
2005-05-20 18:16 ` Ryan Harper
@ 2005-05-20 18:17 ` Ryan Harper
2005-06-03 19:40 ` [PATCH] Yield to VCPU hcall, spinlock yielding Ryan Harper
2 siblings, 0 replies; 4+ messages in thread
From: Ryan Harper @ 2005-05-20 18:17 UTC (permalink / raw)
To: Ryan Harper; +Cc: xen-devel
* Ryan Harper <ryanh@us.ibm.com> [2005-05-20 11:55]:
> The following patch creates a new hypercall, do_confer() which allows a
Here is a debug patch I've been using to go along with it. Comment out
the printks in the do_confer() routine if you are measuring performance.
--
Ryan Harper
Software Engineer; Linux Technology Center
IBM Corp., Austin, Tx
(512) 838-9253 T/L: 678-9253
ryanh@us.ibm.com
diffstat output:
common/keyhandler.c | 30 ++++++++++++++++++++++++++++++
common/schedule.c | 19 ++++++++++++++++---
include/xen/sched.h | 3 +++
3 files changed, 49 insertions(+), 3 deletions(-)
Signed-off-by: Ryan Harper <ryanh@us.ibm.com>
---
diff -urN confer/xen/common/keyhandler.c debug/xen/common/keyhandler.c
--- confer/xen/common/keyhandler.c 2005-05-19 22:20:27.000000000 -0500
+++ debug/xen/common/keyhandler.c 2005-05-20 10:39:03.565845280 -0500
@@ -11,6 +11,7 @@
#include <xen/sched.h>
#include <xen/softirq.h>
#include <asm/debugger.h>
+#include <public/xen.h>
#define KEY_MAX 256
#define STR_MAX 64
@@ -138,6 +139,33 @@
read_unlock(&domlist_lock);
}
+static void do_dump_confer(unsigned char key)
+{
+ struct domain *d;
+ struct exec_domain *ed;
+ s_time_t now = NOW();
+
+ printk("'%c' pressed -> dumping confer stats (now=0x%X:%08X)\n", key,
+ (u32)(now>>32), (u32)now);
+
+ read_lock(&domlist_lock);
+ for_each_domain ( d )
+ {
+ for_each_exec_domain ( d, ed )
+ {
+ printk("Xen: DOM %d, VCPU %d, CPU %d,"
+ " confers %d, confer_out %d,"
+ " confer_in %d, yield_count %d\n",
+ d->domain_id, ed->vcpu_id, ed->processor,
+ ed->confercnt, ed->confer_out,
+ ed->confer_in, ed->vcpu_info->yield_count
+ );
+ }
+
+ }
+ read_unlock(&domlist_lock);
+}
+
extern void dump_runq(unsigned char key);
extern void print_sched_histo(unsigned char key);
extern void reset_sched_histo(unsigned char key);
@@ -183,6 +211,8 @@
register_keyhandler(
'q', do_task_queues, "dump task queues + guest state");
register_keyhandler(
+ 'c', do_dump_confer, "dump confer stats");
+ register_keyhandler(
'r', dump_runq, "dump run queues");
register_irq_keyhandler(
'R', halt_machine, "reboot machine");
diff -urN confer/xen/common/schedule.c debug/xen/common/schedule.c
--- confer/xen/common/schedule.c 2005-05-20 10:37:58.367756896 -0500
+++ debug/xen/common/schedule.c 2005-05-20 10:39:03.575843760 -0500
@@ -228,7 +228,11 @@
if ( test_bit(_VCPUF_conferring, ¤t->vcpu_flags) ) {
clear_bit(_VCPUF_conferring, ¤t->vcpu_flags);
set_bit(_VCPUF_conferred, ¤t->vcpu_flags);
+ /* increment confer counters */
+ current->confer_out++;
+ ed->confer_in++;
}
+
SCHED_OP(wake, ed);
#ifdef WAKE_HISTO
ed->wokenup = NOW();
@@ -283,6 +287,9 @@
{
struct domain *d = current->domain;
+ /* count hcalls */
+ current->confercnt++;
+
/* Validate CONFER prereqs:
* - vcpu is within bounds
* - vcpu is a valid in this domain
@@ -299,16 +306,22 @@
if (d->exec_domain[vcpu] == NULL)
return 0;
- if (!test_bit(_VCPUF_canconfer, ¤t->vcpu_flags))
+ if (!test_bit(_VCPUF_canconfer, ¤t->vcpu_flags)) {
+ printk("confer: canconfer not set, %d->vcpu-flags = 0x%08lx\n", current->vcpu_id, current->vcpu_flags);
return 0;
+ }
/* even counts indicate a running vcpu, odd is preempted/conferred */
/* don't confer if holder is currently running */
- if ((d->exec_domain[vcpu]->vcpu_info->yield_count & 1) == 0)
+ if ((d->exec_domain[vcpu]->vcpu_info->yield_count & 1) == 0) {
+ printk("confer: vcpu %d already running\n", vcpu);
return 0;
+ }
- if (d->exec_domain[vcpu]->vcpu_info->yield_count != yield_count)
+ if (d->exec_domain[vcpu]->vcpu_info->yield_count != yield_count) {
+ printk("confer: yield_count mismatch\n");
return 0;
+ }
/*
* set current's state to conferring, wake target
diff -urN confer/xen/include/xen/sched.h debug/xen/include/xen/sched.h
--- confer/xen/include/xen/sched.h 2005-05-20 10:37:58.378755224 -0500
+++ debug/xen/include/xen/sched.h 2005-05-20 10:39:03.577843456 -0500
@@ -87,6 +87,9 @@
atomic_t pausecnt;
cpumap_t cpumap; /* which cpus this domain can run on */
+ u32 confer_out; /* inc when conferring to another vcpu */
+ u32 confer_in; /* inc when conferred from another vcpu */
+ u32 confercnt; /* # of do_confer hcalls */
struct arch_exec_domain arch;
};
^ permalink raw reply [flat|nested] 4+ messages in thread* Re: [PATCH] Yield to VCPU hcall, spinlock yielding
2005-05-20 16:54 [PATCH] xen, xen-sparse: modify spinlocks to use directed yield Ryan Harper
2005-05-20 18:16 ` Ryan Harper
2005-05-20 18:17 ` Ryan Harper
@ 2005-06-03 19:40 ` Ryan Harper
2 siblings, 0 replies; 4+ messages in thread
From: Ryan Harper @ 2005-06-03 19:40 UTC (permalink / raw)
To: xen-devel
[-- Attachment #1: Type: text/plain, Size: 478 bytes --]
* Ryan Harper <ryanh@us.ibm.com> [2005-05-20 11:55]:
> The following patch creates a new hypercall, do_confer() which allows a
I've not recieved any feedback on this. Following this patch up with
one that applies against current. Builds, but haven't tested it since
current SMP domains don't run.
--
Ryan Harper
Software Engineer; Linux Technology Center
IBM Corp., Austin, Tx
(512) 838-9253 T/L: 678-9253
ryanh@us.ibm.com
Signed-off-by: Ryan Harper <ryanh@us.ibm.com>
[-- Attachment #2: confer.patch --]
[-- Type: text/plain, Size: 16792 bytes --]
diff -urN b/linux-2.6.11-xen-sparse/arch/i386/lib/locks.c c/linux-2.6.11-xen-sparse/arch/i386/lib/locks.c
--- b/linux-2.6.11-xen-sparse/arch/i386/lib/locks.c 1969-12-31 18:00:00.000000000 -0600
+++ c/linux-2.6.11-xen-sparse/arch/i386/lib/locks.c 2005-06-03 09:41:10.933009544 -0500
@@ -0,0 +1,76 @@
+/*
+ * Spin and read/write lock operations.
+ *
+ * Copyright (C) 2001-2004 Paul Mackerras <paulus@au.ibm.com>, IBM
+ * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
+ * Copyright (C) 2002 Dave Engebretsen <engebret@us.ibm.com>, IBM
+ * Rework to support virtual processors
+ * Copyright (C) 2005 Ryan Harper <ryanh@us.ibm.com>, IBM
+ * Rework for Xen on x86
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/stringify.h>
+#include <asm/hypercall.h>
+#include <asm/processor.h>
+
+/* waiting for a spinlock... */
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+void __spin_yield(spinlock_t *lock)
+{
+ unsigned int lock_value, holder_cpu, yield_count;
+ shared_info_t *s = HYPERVISOR_shared_info;
+
+ lock_value = lock->slock;
+ if (lock_value == 1)
+ return;
+ holder_cpu = lock->cpu;
+ BUG_ON(holder_cpu >= NR_CPUS);
+ yield_count = s->vcpu_data[holder_cpu].yield_count;
+ if ((yield_count & 1) == 0)
+ return; /* virtual cpu is currently running */
+ rmb();
+ if (lock->slock != lock_value)
+ return; /* something has changed */
+ HYPERVISOR_confer(holder_cpu, yield_count);
+}
+
+void __rw_yield(rwlock_t *rw)
+{
+ unsigned int lock_value, holder_cpu, yield_count;
+ shared_info_t *s = HYPERVISOR_shared_info;
+
+ lock_value = rw->lock;
+ if (lock_value == RW_LOCK_BIAS)
+ return;
+ holder_cpu = rw->cpu;
+ BUG_ON(holder_cpu >= NR_CPUS);
+ yield_count = s->vcpu_data[holder_cpu].yield_count;
+ if ((yield_count & 1) == 0)
+ return; /* virtual cpu is currently running */
+ rmb();
+ if (rw->lock != lock_value)
+ return; /* something has changed */
+ HYPERVISOR_confer(holder_cpu, yield_count);
+}
+
+void spin_unlock_wait(spinlock_t *lock)
+{
+ while (spin_is_locked(lock)) {
+ cpu_relax();
+ if (SHARED_PROCESSOR)
+ __spin_yield(lock);
+ }
+ cpu_relax();
+}
+EXPORT_SYMBOL(spin_unlock_wait);
+#endif
diff -urN b/linux-2.6.11-xen-sparse/arch/i386/lib/Makefile c/linux-2.6.11-xen-sparse/arch/i386/lib/Makefile
--- b/linux-2.6.11-xen-sparse/arch/i386/lib/Makefile 1969-12-31 18:00:00.000000000 -0600
+++ c/linux-2.6.11-xen-sparse/arch/i386/lib/Makefile 2005-06-03 09:41:10.948007446 -0500
@@ -0,0 +1,11 @@
+#
+# Makefile for i386-specific library files..
+#
+
+
+lib-y = checksum.o delay.o usercopy.o getuser.o memcpy.o strstr.o \
+ bitops.o
+
+lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
+lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
+lib-$(CONFIG_XEN) += locks.o
diff -urN b/linux-2.6.11-xen-sparse/arch/xen/configs/xenU-smp_defconfig_x86_32 c/linux-2.6.11-xen-sparse/arch/xen/configs/xenU-smp_defconfig_x86_32
--- b/linux-2.6.11-xen-sparse/arch/xen/configs/xenU-smp_defconfig_x86_32 2005-06-03 09:02:36.837705157 -0500
+++ c/linux-2.6.11-xen-sparse/arch/xen/configs/xenU-smp_defconfig_x86_32 2005-06-03 09:41:10.949007306 -0500
@@ -117,8 +117,8 @@
CONFIG_SMP=y
CONFIG_NR_CPUS=8
# CONFIG_SCHED_SMT is not set
-CONFIG_PREEMPT=y
-CONFIG_PREEMPT_BKL=y
+# CONFIG_PREEMPT is not set
+# CONFIG_PREEMPT_BKL is not set
CONFIG_X86_CPUID=y
#
diff -urN b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S c/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S
--- b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S 2005-06-02 22:21:42.000000000 -0500
+++ c/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S 2005-06-03 09:41:10.953006747 -0500
@@ -80,7 +80,7 @@
#define evtchn_upcall_pending /* 0 */
#define evtchn_upcall_mask 1
-#define sizeof_vcpu_shift 3
+#define sizeof_vcpu_shift 4
#ifdef CONFIG_SMP
#define preempt_disable(reg) incl TI_preempt_count(reg)
diff -urN b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h c/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h
--- b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h 2005-06-02 22:21:42.000000000 -0500
+++ c/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h 2005-06-03 09:41:10.954006607 -0500
@@ -517,4 +517,20 @@
return ret;
}
+static inline int
+HYPERVISOR_confer(
+ unsigned int vcpu, unsigned int yield_count)
+{
+ int ret;
+ unsigned long ign1, ign2;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2)
+ : "0" (__HYPERVISOR_confer), "1" (vcpu), "2" (yield_count)
+ : "memory");
+
+ return ret;
+}
+
#endif /* __HYPERCALL_H__ */
diff -urN b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h c/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h
--- b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h 2005-06-02 22:21:37.000000000 -0500
+++ c/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h 2005-06-03 09:41:10.975003670 -0500
@@ -22,10 +22,36 @@
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+ unsigned int cpu;
+#endif
} spinlock_t;
#define SPINLOCK_MAGIC 0xdead4ead
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+typedef struct {
+ volatile unsigned int lock;
+#ifdef CONFIG_DEBUG_SPINLOCK
+ unsigned magic;
+#endif
+#ifdef CONFIG_PREEMPT
+ unsigned int break_lock;
+#endif
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+ unsigned int cpu;
+#endif
+} rwlock_t;
+
#ifdef CONFIG_DEBUG_SPINLOCK
#define SPINLOCK_MAGIC_INIT , SPINLOCK_MAGIC
#else
@@ -44,7 +70,20 @@
*/
#define spin_is_locked(x) (*(volatile signed char *)(&(x)->slock) <= 0)
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+#include <linux/smp.h>
+#define SPINLOCK_CPU (smp_processor_id())
+/* We only yield to the hypervisor if we are in shared processor mode */
+#define SHARED_PROCESSOR (HYPERVISOR_shared_info->shproc == 0)
+extern void __spin_yield(spinlock_t *lock);
+extern void __rw_yield(rwlock_t *rw);
+extern void spin_unlock_wait(spinlock_t *lock);
+#else
+#define __spin_yield(x) barrier()
+#define __rw_yield(x) barrier()
+#define SHARED_PROCESSOR 0
#define spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x))
+#endif
#define spin_lock_string \
"\n1:\t" \
@@ -125,6 +164,9 @@
"xchgb %b0,%1"
:"=q" (oldval), "=m" (lock->slock)
:"0" (0) : "memory");
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+ lock->cpu = SPINLOCK_CPU;
+#endif
return oldval > 0;
}
@@ -136,43 +178,55 @@
BUG();
}
#endif
- __asm__ __volatile__(
- spin_lock_string
- :"=m" (lock->slock) : : "memory");
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+ while (1) {
+ if ( likely(_raw_spin_trylock(lock)) )
+ break;
+ do {
+ cpu_relax();
+ if (SHARED_PROCESSOR)
+ __spin_yield(lock);
+ } while (likely(spin_is_locked(lock)));
+ cpu_relax();
+ }
+#else
+ __asm__ __volatile__(
+ spin_lock_string
+ :"=m" (lock->slock) : : "memory");
+#endif
}
static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags)
{
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+ unsigned long flags_dis;
+#endif
#ifdef CONFIG_DEBUG_SPINLOCK
if (unlikely(lock->magic != SPINLOCK_MAGIC)) {
printk("eip: %p\n", __builtin_return_address(0));
BUG();
}
#endif
- __asm__ __volatile__(
- spin_lock_string_flags
- :"=m" (lock->slock) : "r" (flags) : "memory");
-}
-
-/*
- * Read-write spinlocks, allowing multiple readers
- * but only one writer.
- *
- * NOTE! it is quite common to have readers in interrupts
- * but no interrupt writers. For those circumstances we
- * can "mix" irq-safe locks - any writer needs to get a
- * irq-safe write-lock, but readers can get non-irqsafe
- * read-locks.
- */
-typedef struct {
- volatile unsigned int lock;
-#ifdef CONFIG_DEBUG_SPINLOCK
- unsigned magic;
-#endif
-#ifdef CONFIG_PREEMPT
- unsigned int break_lock;
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+ while (1) {
+ if ( likely(_raw_spin_trylock(lock)) )
+ break;
+ local_save_flags(flags_dis);
+ local_irq_restore(flags);
+ do {
+ cpu_relax();
+ if (SHARED_PROCESSOR)
+ __spin_yield(lock);
+ } while (likely(spin_is_locked(lock)));
+ cpu_relax();
+ local_irq_restore(flags_dis);
+ }
+#else
+ __asm__ __volatile__(
+ spin_lock_string_flags
+ :"=m" (lock->slock) : "r" (flags) : "memory");
#endif
-} rwlock_t;
+}
#define RWLOCK_MAGIC 0xdeaf1eed
@@ -198,6 +252,18 @@
*/
#define write_can_lock(x) ((x)->lock == RW_LOCK_BIAS)
+static inline int _raw_write_trylock(rwlock_t *lock)
+{
+ atomic_t *count = (atomic_t *)lock;
+ if (atomic_sub_and_test(RW_LOCK_BIAS, count)) {
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+ lock->cpu = SPINLOCK_CPU;
+#endif
+ return 1;
+ }
+ atomic_add(RW_LOCK_BIAS, count);
+ return 0;
+}
/*
* On x86, we implement read-write locks as a 32-bit counter
* with the high bit (sign) being the "contended" bit.
@@ -222,7 +288,20 @@
#ifdef CONFIG_DEBUG_SPINLOCK
BUG_ON(rw->magic != RWLOCK_MAGIC);
#endif
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+ while (1) {
+ if ( likely(_raw_write_trylock(rw)) )
+ break;
+ do {
+ cpu_relax();
+ if (SHARED_PROCESSOR)
+ __rw_yield(rw);
+ } while ( likely(!write_can_lock(rw)));
+ cpu_relax();
+ }
+#else
__build_write_lock(rw, "__write_lock_failed");
+#endif
}
#define _raw_read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
@@ -238,13 +317,6 @@
return 0;
}
-static inline int _raw_write_trylock(rwlock_t *lock)
-{
- atomic_t *count = (atomic_t *)lock;
- if (atomic_sub_and_test(RW_LOCK_BIAS, count))
- return 1;
- atomic_add(RW_LOCK_BIAS, count);
- return 0;
-}
+
#endif /* __ASM_SPINLOCK_H */
diff -urN b/xen/arch/x86/domain.c c/xen/arch/x86/domain.c
--- b/xen/arch/x86/domain.c 2005-06-02 22:21:41.000000000 -0500
+++ c/xen/arch/x86/domain.c 2005-06-03 09:42:37.487868084 -0500
@@ -240,6 +240,8 @@
memset(d->shared_info, 0, PAGE_SIZE);
v->vcpu_info = &d->shared_info->vcpu_data[v->vcpu_id];
v->cpumap = CPUMAP_RUNANYWHERE;
+ /* default vcpus to sharing physical cpus */
+ d->shared_info->shproc = 1;
SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d);
machine_to_phys_mapping[virt_to_phys(d->shared_info) >>
PAGE_SHIFT] = INVALID_M2P_ENTRY;
diff -urN b/xen/arch/x86/x86_32/entry.S c/xen/arch/x86/x86_32/entry.S
--- b/xen/arch/x86/x86_32/entry.S 2005-06-02 22:21:43.000000000 -0500
+++ c/xen/arch/x86/x86_32/entry.S 2005-06-03 09:41:11.000000173 -0500
@@ -751,6 +751,7 @@
.long do_boot_vcpu
.long do_ni_hypercall /* 25 */
.long do_mmuext_op
+ .long do_confer
.rept NR_hypercalls-((.-hypercall_table)/4)
.long do_ni_hypercall
.endr
diff -urN b/xen/common/domain.c c/xen/common/domain.c
--- b/xen/common/domain.c 2005-06-02 22:21:37.000000000 -0500
+++ c/xen/common/domain.c 2005-06-03 09:42:09.839752947 -0500
@@ -392,6 +392,7 @@
atomic_set(&v->pausecnt, 0);
v->cpumap = CPUMAP_RUNANYWHERE;
+ set_bit(_VCPUF_canconfer, &v->vcpu_flags);
memcpy(&v->arch, &idle0_vcpu.arch, sizeof(v->arch));
diff -urN b/xen/common/schedule.c c/xen/common/schedule.c
--- b/xen/common/schedule.c 2005-06-02 22:21:42.000000000 -0500
+++ c/xen/common/schedule.c 2005-06-03 09:41:49.540601494 -0500
@@ -219,6 +219,11 @@
spin_lock_irqsave(&schedule_data[v->processor].schedule_lock, flags);
if ( likely(domain_runnable(v)) )
{
+ /* mark current's confer state */
+ if ( test_bit(_VCPUF_conferring, ¤t->vcpu_flags) ) {
+ clear_bit(_VCPUF_conferring, ¤t->vcpu_flags);
+ set_bit(_VCPUF_conferred, ¤t->vcpu_flags);
+ }
SCHED_OP(wake, v);
#ifdef WAKE_HISTO
v->wokenup = NOW();
@@ -260,6 +265,51 @@
return 0;
}
+/* Confer control to another vcpu */
+long do_confer(unsigned int vcpu, unsigned int yield_count)
+{
+ struct domain *d = current->domain;
+
+ /* Validate CONFER prereqs:
+ * - vcpu is within bounds
+ * - vcpu is a valid in this domain
+ * - current has not already conferred its slice to vcpu
+ * - vcpu is not already running
+ * - designated vcpu's yield_count matches value from call
+ *
+ * of all are ok, then set conferred value and enter scheduler
+ */
+
+ if (unlikely(vcpu > MAX_VIRT_CPUS))
+ return 0;
+
+ if (unlikely(d->vcpu[vcpu] == NULL))
+ return 0;
+
+ if (unlikely(!test_bit(_VCPUF_canconfer, ¤t->vcpu_flags)))
+ return 0;
+
+ /* even counts indicate a running vcpu, odd is preempted/conferred */
+ /* don't confer if holder is currently running */
+ if (unlikely((d->vcpu[vcpu]->vcpu_info->yield_count & 1) == 0))
+ return 0;
+
+ if (unlikely(d->vcpu[vcpu]->vcpu_info->yield_count != yield_count))
+ return 0;
+
+ /*
+ * set current's state to conferring, wake target
+ */
+ clear_bit(_VCPUF_canconfer, ¤t->vcpu_flags);
+ set_bit(_VCPUF_conferring, ¤t->vcpu_flags);
+ domain_wake(d->vcpu[vcpu]);
+
+ /* give up my timeslice */
+ do_yield();
+
+ return 0;
+}
+
/*
* Demultiplex scheduler-related hypercalls.
*/
@@ -422,7 +472,15 @@
r_time = next_slice.time;
next = next_slice.task;
-
+
+ /*
+ * always clear conferred state so this vcpu can confer during its slice
+ * since it can confer, clear all other confer state
+ */
+ set_bit(_VCPUF_canconfer, &next->vcpu_flags);
+ clear_bit(_VCPUF_conferring, &next->vcpu_flags);
+ clear_bit(_VCPUF_conferred, &next->vcpu_flags);
+
schedule_data[cpu].curr = next;
next->lastschd = now;
@@ -434,6 +492,12 @@
spin_unlock_irq(&schedule_data[cpu].schedule_lock);
+ /* bump vcpu yield_count when controlling domain is not-idle */
+ if ( !is_idle_task(prev->domain) )
+ prev->vcpu_info->yield_count++;
+ if ( !is_idle_task(next->domain) )
+ next->vcpu_info->yield_count++;
+
if ( unlikely(prev == next) )
return continue_running(prev);
diff -urN b/xen/include/public/xen.h c/xen/include/public/xen.h
--- b/xen/include/public/xen.h 2005-06-02 22:21:41.000000000 -0500
+++ c/xen/include/public/xen.h 2005-06-03 09:41:11.040994437 -0500
@@ -58,6 +58,7 @@
#define __HYPERVISOR_boot_vcpu 24
#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */
#define __HYPERVISOR_mmuext_op 26
+#define __HYPERVISOR_confer 27
/*
* VIRTUAL INTERRUPTS
@@ -324,8 +325,11 @@
u8 evtchn_upcall_mask; /* 1 */
u8 pad0, pad1;
u32 evtchn_pending_sel; /* 4 */
- arch_vcpu_info_t arch; /* 8 */
-} PACKED vcpu_info_t; /* 8 + arch */
+ /* Even when vcpu is running, Odd when it is preempted/conferred */
+ u32 yield_count; /* 8 */
+ u32 pad2; /* 12 */
+ arch_vcpu_info_t arch; /* 16 */
+} PACKED vcpu_info_t; /* 16 + arch */
/*
* Xen/kernel shared data -- pointer provided in start_info.
@@ -337,6 +341,9 @@
u32 n_vcpu;
+ /* set if domains' vcpus share physical cpus */
+ int shproc;
+
/*
* A domain can have up to 1024 "event channels" on which it can send
* and receive asynchronous event notifications. There are three classes
diff -urN b/xen/include/xen/sched.h c/xen/include/xen/sched.h
--- b/xen/include/xen/sched.h 2005-06-02 22:21:36.000000000 -0500
+++ c/xen/include/xen/sched.h 2005-06-03 09:41:11.042994158 -0500
@@ -342,6 +342,15 @@
/* Initialization completed. */
#define _VCPUF_initialised 8
#define VCPUF_initialised (1UL<<_VCPUF_initialised)
+ /* Able to give time slice to another vcpu */
+#define _VCPUF_canconfer 9
+#define VCPUF_canconfer (1UL<<_VCPUF_canconfer)
+ /* Currently giving time slice to another vcpu */
+#define _VCPUF_conferring 10
+#define VCPUF_conferring (1UL<<_VCPUF_conferring)
+ /* Already given time slice to another vcpu */
+#define _VCPUF_conferred 11
+#define VCPUF_conferred (1UL<<_VCPUF_conferred)
/*
* Per-domain flags (domain_flags).
[-- Attachment #3: confer_debug.patch --]
[-- Type: text/plain, Size: 4031 bytes --]
diff -urN c/xen/common/keyhandler.c d/xen/common/keyhandler.c
--- c/xen/common/keyhandler.c 2005-06-02 22:21:41.000000000 -0500
+++ d/xen/common/keyhandler.c 2005-06-03 09:48:15.075135457 -0500
@@ -12,6 +12,7 @@
#include <xen/softirq.h>
#include <xen/domain.h>
#include <asm/debugger.h>
+#include <public/xen.h>
#define KEY_MAX 256
#define STR_MAX 64
@@ -139,6 +140,33 @@
read_unlock(&domlist_lock);
}
+static void do_dump_confer(unsigned char key)
+{
+ struct domain *d;
+ struct vcpu *v;
+ s_time_t now = NOW();
+
+ printk("'%c' pressed -> dumping confer stats (now=0x%X:%08X)\n", key,
+ (u32)(now>>32), (u32)now);
+
+ read_lock(&domlist_lock);
+ for_each_domain ( d )
+ {
+ for_each_vcpu ( d, v )
+ {
+ printk("Xen: D%d, V%d, C%d,"
+ " cnfs %d, c_out %d,"
+ " c_in %d, ycnt %d\n",
+ d->domain_id, v->vcpu_id, v->processor,
+ v->confercnt, v->confer_out,
+ v->confer_in, v->vcpu_info->yield_count
+ );
+ }
+
+ }
+ read_unlock(&domlist_lock);
+}
+
extern void dump_runq(unsigned char key);
extern void print_sched_histo(unsigned char key);
extern void reset_sched_histo(unsigned char key);
@@ -184,6 +212,8 @@
register_keyhandler(
'q', do_task_queues, "dump task queues + guest state");
register_keyhandler(
+ 'c', do_dump_confer, "dump confer stats");
+ register_keyhandler(
'r', dump_runq, "dump run queues");
register_irq_keyhandler(
'R', halt_machine, "reboot machine");
diff -urN c/xen/common/schedule.c d/xen/common/schedule.c
--- c/xen/common/schedule.c 2005-06-03 09:41:49.540601494 -0500
+++ d/xen/common/schedule.c 2005-06-03 09:46:01.768032249 -0500
@@ -223,6 +223,9 @@
if ( test_bit(_VCPUF_conferring, ¤t->vcpu_flags) ) {
clear_bit(_VCPUF_conferring, ¤t->vcpu_flags);
set_bit(_VCPUF_conferred, ¤t->vcpu_flags);
+ /* increment confer counters */
+ current->confer_out++;
+ v->confer_in++;
}
SCHED_OP(wake, v);
#ifdef WAKE_HISTO
@@ -270,6 +273,9 @@
{
struct domain *d = current->domain;
+ /* count hcalls */
+ current->confercnt++;
+
/* Validate CONFER prereqs:
* - vcpu is within bounds
* - vcpu is a valid in this domain
@@ -286,16 +292,22 @@
if (unlikely(d->vcpu[vcpu] == NULL))
return 0;
- if (unlikely(!test_bit(_VCPUF_canconfer, ¤t->vcpu_flags)))
+ if (unlikely(!test_bit(_VCPUF_canconfer, ¤t->vcpu_flags))) {
+ printk("confer: canconfer not set, %d->vcpu-flags = 0x%08lx\n", current->vcpu_id, current->vcpu_flags);
return 0;
+ }
/* even counts indicate a running vcpu, odd is preempted/conferred */
/* don't confer if holder is currently running */
- if (unlikely((d->vcpu[vcpu]->vcpu_info->yield_count & 1) == 0))
+ if (unlikely((d->vcpu[vcpu]->vcpu_info->yield_count & 1) == 0)) {
+ printk("confer: vcpu %d already running\n", vcpu);
return 0;
+ }
- if (unlikely(d->vcpu[vcpu]->vcpu_info->yield_count != yield_count))
+ if (unlikely(d->vcpu[vcpu]->vcpu_info->yield_count != yield_count)) {
+ printk("confer: yield_count mismatch\n");
return 0;
+ }
/*
* set current's state to conferring, wake target
diff -urN c/xen/include/xen/sched.h d/xen/include/xen/sched.h
--- c/xen/include/xen/sched.h 2005-06-03 09:41:11.042994158 -0500
+++ d/xen/include/xen/sched.h 2005-06-03 09:43:52.881249246 -0500
@@ -76,6 +76,9 @@
atomic_t pausecnt;
cpumap_t cpumap; /* which cpus this domain can run on */
+ u32 confer_out; /* inc when conferring to another vcpu */
+ u32 confer_in; /* inc when conferred from another vcpu */
+ u32 confercnt; /* # of do_confer hcalls */
struct arch_vcpu arch;
};
[-- Attachment #4: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 4+ messages in thread