All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] xen, xen-sparse: modify spinlocks to use directed yield
@ 2005-05-20 16:54 Ryan Harper
  2005-05-20 18:16 ` Ryan Harper
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Ryan Harper @ 2005-05-20 16:54 UTC (permalink / raw)
  To: xen-devel

The following patch creates a new hypercall, do_confer() which allows a
vcpu to yield to another vcpu that is not currently running.  Also
included in the patch is modification to x86 spinlock code to use the
new hcall modeled on the ppc64 implementation which has an identical
hcall.  When a vcpu acquires a spin or write lock, the vcpu id of the
holder is recorded in the lock.  When a different vcpu attempts to
acquire the contested lock, the spinlock code will yield its timeslice
to the lock holder if they are not currently running rather than just
spinning until the lock holder's next timeslice.

There are a couple things not done with this patch.  First, I wasn't
able to determine if there was a guaranteed, scheduler independent way
of switching to another vcpu immediately.  Currently, the implementation
calls domain_wake() on the vcpu that is holding the lock, and then
yields.  This has two issues: 1) there is no guarantee that the
scheduler will pick the domain that has been woken next 2) it is wrong
IMHO to have the woken domain run for more than the remainder of the
caller's slice as this would be preferential to lock-holder vcpus.
Ideally I would like a way to donate the remainder of the current vcpu's
slice to a target vcpu and a SCHED_OP that allows the marking of a vcpu
as high priority that the various scheduler can implement in whatever
way makes sense.

Second, there is a conflict in Linux between CONFIG_PREEMPT and the
spinlock code that yields the vcpu to the lock holder.  That is, when
CONFIG_PREEMPT is enabled, the actually spinlock code that is used never
calls into the code that invokes the spin_yield() function which utilizes
the confer hcall.  I believe that the behavior is intentional as it
doesn't make sense to encourage the lock to be broken (what
CONFIG_PREEMPT spinlocks do) and then yield to a vcpu.  I am currently
investigating the performance difference between CONFIG_PREEMPT and
directed yielding in spinlocks and should have some numbers shortly to
see the perf trade off of these two options.

Trivial, but important, for some reason, the shared_info->shproc value
got reset to 0 at some point after it's initial allocation where I set
it to 1.  I've not tracked down why that is the case, but once that is
fixed, the the SHARED_PROCESSOR define in spinlocks.h will change to
check if shproc=1.  

Comments and questions requested.

-- 
Ryan Harper
Software Engineer; Linux Technology Center
IBM Corp., Austin, Tx
(512) 838-9253   T/L: 678-9253
ryanh@us.ibm.com


diffstat output:
 linux-2.6.11-xen-sparse/arch/i386/lib/Makefile                     |   11 
 linux-2.6.11-xen-sparse/arch/i386/lib/locks.c                      |   76 +++++
 linux-2.6.11-xen-sparse/arch/xen/configs/xenU-smp_defconfig_x86_32 |    4 
 linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S               |    2 
 linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h       |   16 +
 linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h        |  140 +++++++---
 xen/arch/x86/domain.c                                              |    2 
 xen/arch/x86/x86_32/entry.S                                        |    1 
 xen/common/domain.c                                                |    1 
 xen/common/schedule.c                                              |   69 ++++
 xen/include/public/xen.h                                           |   11 
 xen/include/xen/sched.h                                            |    9 
 12 files changed, 302 insertions(+), 40 deletions(-)

Signed-off-by: Ryan Harper <ryanh@us.ibm.com>
---
diff -urN b/linux-2.6.11-xen-sparse/arch/i386/lib/locks.c confer/linux-2.6.11-xen-sparse/arch/i386/lib/locks.c
--- b/linux-2.6.11-xen-sparse/arch/i386/lib/locks.c	1969-12-31 18:00:00.000000000 -0600
+++ confer/linux-2.6.11-xen-sparse/arch/i386/lib/locks.c	2005-05-20 10:37:58.300767080 -0500
@@ -0,0 +1,76 @@
+/*
+ * Spin and read/write lock operations.
+ *
+ * Copyright (C) 2001-2004 Paul Mackerras <paulus@au.ibm.com>, IBM
+ * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
+ * Copyright (C) 2002 Dave Engebretsen <engebret@us.ibm.com>, IBM
+ *   Rework to support virtual processors
+ * Copyright (C) 2005 Ryan Harper <ryanh@us.ibm.com>, IBM
+ *   Rework for Xen on x86
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/stringify.h>
+#include <asm/hypercall.h>
+#include <asm/processor.h>
+
+/* waiting for a spinlock... */
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+void __spin_yield(spinlock_t *lock)
+{
+	unsigned int lock_value, holder_cpu, yield_count;
+   shared_info_t *s = HYPERVISOR_shared_info;
+
+	lock_value = lock->slock;
+	if (lock_value == 1)
+		return;
+	holder_cpu = lock->cpu;
+	BUG_ON(holder_cpu >= NR_CPUS);
+	yield_count = s->vcpu_data[holder_cpu].yield_count;
+	if ((yield_count & 1) == 0)
+		return;		/* virtual cpu is currently running */
+	rmb();
+	if (lock->slock != lock_value)
+		return;		/* something has changed */
+   HYPERVISOR_confer(holder_cpu, yield_count);
+}
+
+void __rw_yield(rwlock_t *rw)
+{
+	unsigned int lock_value, holder_cpu, yield_count;
+   shared_info_t *s = HYPERVISOR_shared_info;
+
+	lock_value = rw->lock;
+	if (lock_value == RW_LOCK_BIAS)
+		return;
+	holder_cpu = rw->cpu;
+	BUG_ON(holder_cpu >= NR_CPUS);
+	yield_count = s->vcpu_data[holder_cpu].yield_count;
+	if ((yield_count & 1) == 0)
+		return;		/* virtual cpu is currently running */
+	rmb();
+	if (rw->lock != lock_value)
+		return;		/* something has changed */
+   HYPERVISOR_confer(holder_cpu, yield_count);
+}
+
+void spin_unlock_wait(spinlock_t *lock)
+{
+	while (spin_is_locked(lock)) {
+		cpu_relax();
+		if (SHARED_PROCESSOR)
+			__spin_yield(lock);
+	}
+	cpu_relax();
+}
+EXPORT_SYMBOL(spin_unlock_wait);
+#endif
diff -urN b/linux-2.6.11-xen-sparse/arch/i386/lib/Makefile confer/linux-2.6.11-xen-sparse/arch/i386/lib/Makefile
--- b/linux-2.6.11-xen-sparse/arch/i386/lib/Makefile	1969-12-31 18:00:00.000000000 -0600
+++ confer/linux-2.6.11-xen-sparse/arch/i386/lib/Makefile	2005-05-20 10:37:58.301766928 -0500
@@ -0,0 +1,11 @@
+#
+# Makefile for i386-specific library files..
+#
+
+
+lib-y = checksum.o delay.o usercopy.o getuser.o memcpy.o strstr.o \
+	bitops.o 
+
+lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
+lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
+lib-$(CONFIG_XEN) += locks.o
diff -urN b/linux-2.6.11-xen-sparse/arch/xen/configs/xenU-smp_defconfig_x86_32 confer/linux-2.6.11-xen-sparse/arch/xen/configs/xenU-smp_defconfig_x86_32
--- b/linux-2.6.11-xen-sparse/arch/xen/configs/xenU-smp_defconfig_x86_32	2005-05-20 10:39:36.826788848 -0500
+++ confer/linux-2.6.11-xen-sparse/arch/xen/configs/xenU-smp_defconfig_x86_32	2005-05-20 10:37:58.303766624 -0500
@@ -117,8 +117,8 @@
 CONFIG_SMP=y
 CONFIG_NR_CPUS=8
 # CONFIG_SCHED_SMT is not set
-CONFIG_PREEMPT=y
-CONFIG_PREEMPT_BKL=y
+# CONFIG_PREEMPT is not set
+# CONFIG_PREEMPT_BKL is not set
 CONFIG_X86_CPUID=y
 
 #
diff -urN b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S confer/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S
--- b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S	2005-05-19 22:20:32.000000000 -0500
+++ confer/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S	2005-05-20 10:37:58.304766472 -0500
@@ -80,7 +80,7 @@
 #define evtchn_upcall_pending		/* 0 */
 #define evtchn_upcall_mask		1
 
-#define sizeof_vcpu_shift		3
+#define sizeof_vcpu_shift		4
 
 #ifdef CONFIG_SMP
 #define preempt_disable(reg)	incl TI_preempt_count(reg)
diff -urN b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h confer/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h
--- b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h	2005-05-19 22:20:32.000000000 -0500
+++ confer/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h	2005-05-20 10:37:58.306766168 -0500
@@ -517,4 +517,20 @@
     return ret;
 }
 
+static inline int
+HYPERVISOR_confer(
+	unsigned int vcpu, unsigned int yield_count)
+{
+    int ret;
+    unsigned long ign1, ign2;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1), "=c" (ign2)
+	: "0" (__HYPERVISOR_confer), "1" (vcpu), "2" (yield_count)
+	: "memory");
+
+    return ret;
+}
+
 #endif /* __HYPERCALL_H__ */
diff -urN b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h confer/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h
--- b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h	2005-05-19 22:20:14.000000000 -0500
+++ confer/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h	2005-05-20 10:37:58.307766016 -0500
@@ -22,10 +22,36 @@
 #ifdef CONFIG_PREEMPT
 	unsigned int break_lock;
 #endif
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+	unsigned int cpu;
+#endif
 } spinlock_t;
 
 #define SPINLOCK_MAGIC	0xdead4ead
 
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+typedef struct {
+	volatile unsigned int lock;
+#ifdef CONFIG_DEBUG_SPINLOCK
+	unsigned magic;
+#endif
+#ifdef CONFIG_PREEMPT
+	unsigned int break_lock;
+#endif
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+	unsigned int cpu;
+#endif
+} rwlock_t;
+
 #ifdef CONFIG_DEBUG_SPINLOCK
 #define SPINLOCK_MAGIC_INIT	, SPINLOCK_MAGIC
 #else
@@ -44,7 +70,20 @@
  */
 
 #define spin_is_locked(x)	(*(volatile signed char *)(&(x)->slock) <= 0)
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+#include <linux/smp.h>
+#define SPINLOCK_CPU (smp_processor_id())
+/* We only yield to the hypervisor if we are in shared processor mode */
+#define SHARED_PROCESSOR (HYPERVISOR_shared_info->shproc == 0)
+extern void __spin_yield(spinlock_t *lock);
+extern void __rw_yield(rwlock_t *rw);
+extern void spin_unlock_wait(spinlock_t *lock);
+#else
+#define __spin_yield(x) barrier()
+#define __rw_yield(x) barrier()
+#define SHARED_PROCESSOR 0
 #define spin_unlock_wait(x)	do { barrier(); } while(spin_is_locked(x))
+#endif
 
 #define spin_lock_string \
 	"\n1:\t" \
@@ -125,6 +164,9 @@
 		"xchgb %b0,%1"
 		:"=q" (oldval), "=m" (lock->slock)
 		:"0" (0) : "memory");
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+	lock->cpu = SPINLOCK_CPU;
+#endif
 	return oldval > 0;
 }
 
@@ -136,43 +178,55 @@
 		BUG();
 	}
 #endif
-	__asm__ __volatile__(
-		spin_lock_string
-		:"=m" (lock->slock) : : "memory");
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+	while (1) {
+		if ( likely(_raw_spin_trylock(lock)) )
+			break;
+		do {
+			cpu_relax();
+			if (SHARED_PROCESSOR)
+				__spin_yield(lock);
+		} while (likely(spin_is_locked(lock)));
+		cpu_relax();
+	}
+#else
+   __asm__ __volatile__(
+      spin_lock_string
+      :"=m" (lock->slock) : : "memory");
+#endif
 }
 
 static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags)
 {
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+	unsigned long flags_dis;
+#endif
 #ifdef CONFIG_DEBUG_SPINLOCK
 	if (unlikely(lock->magic != SPINLOCK_MAGIC)) {
 		printk("eip: %p\n", __builtin_return_address(0));
 		BUG();
 	}
 #endif
-	__asm__ __volatile__(
-		spin_lock_string_flags
-		:"=m" (lock->slock) : "r" (flags) : "memory");
-}
-
-/*
- * Read-write spinlocks, allowing multiple readers
- * but only one writer.
- *
- * NOTE! it is quite common to have readers in interrupts
- * but no interrupt writers. For those circumstances we
- * can "mix" irq-safe locks - any writer needs to get a
- * irq-safe write-lock, but readers can get non-irqsafe
- * read-locks.
- */
-typedef struct {
-	volatile unsigned int lock;
-#ifdef CONFIG_DEBUG_SPINLOCK
-	unsigned magic;
-#endif
-#ifdef CONFIG_PREEMPT
-	unsigned int break_lock;
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+	while (1) {
+		if ( likely(_raw_spin_trylock(lock)) )
+			break;
+		local_save_flags(flags_dis);
+		local_irq_restore(flags);
+		do {
+			cpu_relax();
+			if (SHARED_PROCESSOR)
+				__spin_yield(lock);
+		} while (likely(spin_is_locked(lock)));
+		cpu_relax();
+		local_irq_restore(flags_dis);
+	}
+#else
+   __asm__ __volatile__(
+      spin_lock_string_flags
+      :"=m" (lock->slock) : "r" (flags) : "memory");
 #endif
-} rwlock_t;
+}
 
 #define RWLOCK_MAGIC	0xdeaf1eed
 
@@ -198,6 +252,18 @@
  */
 #define write_can_lock(x) ((x)->lock == RW_LOCK_BIAS)
 
+static inline int _raw_write_trylock(rwlock_t *lock)
+{
+	atomic_t *count = (atomic_t *)lock;
+	if (atomic_sub_and_test(RW_LOCK_BIAS, count)) {
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+      lock->cpu = SPINLOCK_CPU;
+#endif
+		return 1;
+   }
+	atomic_add(RW_LOCK_BIAS, count);
+	return 0;
+}
 /*
  * On x86, we implement read-write locks as a 32-bit counter
  * with the high bit (sign) being the "contended" bit.
@@ -222,7 +288,20 @@
 #ifdef CONFIG_DEBUG_SPINLOCK
 	BUG_ON(rw->magic != RWLOCK_MAGIC);
 #endif
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+	while (1) {
+		if ( likely(_raw_write_trylock(rw)) )
+			break;
+		do {
+			cpu_relax();
+			if (SHARED_PROCESSOR)
+				__rw_yield(rw);
+		} while ( likely(!write_can_lock(rw)));
+		cpu_relax();
+	}
+#else
 	__build_write_lock(rw, "__write_lock_failed");
+#endif
 }
 
 #define _raw_read_unlock(rw)		asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
@@ -238,13 +317,6 @@
 	return 0;
 }
 
-static inline int _raw_write_trylock(rwlock_t *lock)
-{
-	atomic_t *count = (atomic_t *)lock;
-	if (atomic_sub_and_test(RW_LOCK_BIAS, count))
-		return 1;
-	atomic_add(RW_LOCK_BIAS, count);
-	return 0;
-}
+
 
 #endif /* __ASM_SPINLOCK_H */
diff -urN b/xen/arch/x86/domain.c confer/xen/arch/x86/domain.c
--- b/xen/arch/x86/domain.c	2005-05-19 22:20:28.000000000 -0500
+++ confer/xen/arch/x86/domain.c	2005-05-20 10:38:29.187071648 -0500
@@ -253,6 +253,8 @@
     memset(d->shared_info, 0, PAGE_SIZE);
     ed->vcpu_info = &d->shared_info->vcpu_data[ed->vcpu_id];
     ed->cpumap = CPUMAP_RUNANYWHERE;
+    /* default vcpus to sharing physical cpus */
+    d->shared_info->shproc = 1;
     SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d);
     machine_to_phys_mapping[virt_to_phys(d->shared_info) >> 
                            PAGE_SHIFT] = INVALID_M2P_ENTRY;
diff -urN b/xen/arch/x86/x86_32/entry.S confer/xen/arch/x86/x86_32/entry.S
--- b/xen/arch/x86/x86_32/entry.S	2005-05-19 22:20:33.000000000 -0500
+++ confer/xen/arch/x86/x86_32/entry.S	2005-05-20 10:37:58.353759024 -0500
@@ -749,6 +749,7 @@
         .long do_boot_vcpu
         .long do_ni_hypercall       /* 25 */
         .long do_mmuext_op
+        .long do_confer
         .rept NR_hypercalls-((.-hypercall_table)/4)
         .long do_ni_hypercall
         .endr
diff -urN b/xen/common/domain.c confer/xen/common/domain.c
--- b/xen/common/domain.c	2005-05-19 22:20:15.000000000 -0500
+++ confer/xen/common/domain.c	2005-05-20 10:37:58.354758872 -0500
@@ -289,6 +289,7 @@
 
     atomic_set(&ed->pausecnt, 0);
     ed->cpumap = CPUMAP_RUNANYWHERE;
+    set_bit(_VCPUF_canconfer, &ed->vcpu_flags);
 
     memcpy(&ed->arch, &idle0_exec_domain.arch, sizeof(ed->arch));
 
diff -urN b/xen/common/schedule.c confer/xen/common/schedule.c
--- b/xen/common/schedule.c	2005-05-19 22:20:30.000000000 -0500
+++ confer/xen/common/schedule.c	2005-05-20 10:45:41.493351104 -0500
@@ -224,6 +224,11 @@
     spin_lock_irqsave(&schedule_data[ed->processor].schedule_lock, flags);
     if ( likely(domain_runnable(ed)) )
     {
+        /* mark current's confer state */
+        if ( test_bit(_VCPUF_conferring, &current->vcpu_flags) ) {
+            clear_bit(_VCPUF_conferring, &current->vcpu_flags);
+            set_bit(_VCPUF_conferred, &current->vcpu_flags);
+        }
         SCHED_OP(wake, ed);
 #ifdef WAKE_HISTO
         ed->wokenup = NOW();
@@ -273,6 +278,54 @@
     return 0;
 }
 
+/* Confer control to another vcpu */
+long do_confer(unsigned int vcpu, unsigned int yield_count)
+{
+    struct domain *d = current->domain;
+   
+    /* Validate CONFER prereqs:
+    * - vcpu is within bounds
+    * - vcpu is a valid in this domain
+    * - current has not already conferred its slice to vcpu
+    * - vcpu is not already running
+    * - designated vcpu's yield_count matches value from call
+    *
+    * of all are ok, then set conferred value and enter scheduler
+    */
+
+    if (vcpu > MAX_VIRT_CPUS)
+        return 0; 
+
+    if (d->exec_domain[vcpu] == NULL)
+        return 0;
+
+    if (!test_bit(_VCPUF_canconfer, &current->vcpu_flags))
+        return 0;
+
+    /* even counts indicate a running vcpu, odd is preempted/conferred */
+    /* don't confer if holder is currently running */
+    if ((d->exec_domain[vcpu]->vcpu_info->yield_count & 1) == 0)
+        return 0;
+
+    if (d->exec_domain[vcpu]->vcpu_info->yield_count != yield_count)
+        return 0;
+
+    /*
+     * set current's state to conferring, wake target
+     */
+    clear_bit(_VCPUF_canconfer, &current->vcpu_flags);
+    set_bit(_VCPUF_conferring, &current->vcpu_flags);
+    domain_wake(d->exec_domain[vcpu]);
+
+    /* request scheduling for woken domain */
+    raise_softirq(SCHEDULE_SOFTIRQ);
+
+    /* give up my timeslice */
+    do_yield();
+
+    return 0;
+}
+
 /*
  * Demultiplex scheduler-related hypercalls.
  */
@@ -441,7 +494,15 @@
 
     r_time = next_slice.time;
     next = next_slice.task;
-    
+
+    /*
+     * always clear conferred state so this vcpu can confer during its slice
+     * since it can confer, clear all other confer state
+     */
+    set_bit(_VCPUF_canconfer,   &next->vcpu_flags);
+    clear_bit(_VCPUF_conferring, &next->vcpu_flags);
+    clear_bit(_VCPUF_conferred,  &next->vcpu_flags);
+
     schedule_data[cpu].curr = next;
     
     next->lastschd = now;
@@ -455,6 +516,12 @@
 
     spin_unlock_irq(&schedule_data[cpu].schedule_lock);
 
+    /* bump vcpu yield_count when controlling domain is not-idle */
+    if ( !is_idle_task(prev->domain) )
+        prev->vcpu_info->yield_count++;
+    if ( !is_idle_task(next->domain) )
+        next->vcpu_info->yield_count++;
+
     if ( unlikely(prev == next) ) {
 #ifdef ADV_SCHED_HISTO
         adv_sched_hist_to_stop(cpu);
diff -urN b/xen/include/public/xen.h confer/xen/include/public/xen.h
--- b/xen/include/public/xen.h	2005-05-19 22:20:11.000000000 -0500
+++ confer/xen/include/public/xen.h	2005-05-20 10:37:58.368756744 -0500
@@ -58,6 +58,7 @@
 #define __HYPERVISOR_boot_vcpu            24
 #define __HYPERVISOR_set_segment_base     25 /* x86/64 only */
 #define __HYPERVISOR_mmuext_op            26
+#define __HYPERVISOR_confer               27
 
 /*
  * MULTICALLS
@@ -334,8 +335,11 @@
     u8 evtchn_upcall_mask;              /* 1 */
     u8 pad0, pad1;
     u32 evtchn_pending_sel;             /* 4 */
-    arch_vcpu_info_t arch;              /* 8 */
-} PACKED vcpu_info_t;                   /* 8 + arch */
+    /* Even when vcpu is running, Odd when it is preempted/conferred */
+    u32 yield_count;                    /* 8 */
+    u32 pad2;                           /* 12 */
+    arch_vcpu_info_t arch;              /* 16 */
+} PACKED vcpu_info_t;                   /* 16 + arch */
 
 /*
  * Xen/kernel shared data -- pointer provided in start_info.
@@ -347,6 +351,9 @@
 
     u32 n_vcpu;
 
+    /* set if domains' vcpus share physical cpus */
+    int shproc; 
+
     /*
      * A domain can have up to 1024 "event channels" on which it can send
      * and receive asynchronous event notifications. There are three classes
diff -urN b/xen/include/xen/sched.h confer/xen/include/xen/sched.h
--- b/xen/include/xen/sched.h	2005-05-19 22:20:07.000000000 -0500
+++ confer/xen/include/xen/sched.h	2005-05-20 10:37:58.378755224 -0500
@@ -358,6 +358,15 @@
  /* Initialization completed. */
 #define _VCPUF_initialised     8
 #define VCPUF_initialised      (1UL<<_VCPUF_initialised)
+ /* Able to give time slice to another vcpu */
+#define _VCPUF_canconfer       9
+#define VCPUF_canconfer        (1UL<<_VCPUF_canconfer)
+ /* Currently giving time slice to another vcpu */
+#define _VCPUF_conferring      10
+#define VCPUF_conferring       (1UL<<_VCPUF_conferring)
+ /* Already given time slice to another vcpu */
+#define _VCPUF_conferred       11
+#define VCPUF_conferred        (1UL<<_VCPUF_conferred)
 
 /*
  * Per-domain flags (domain_flags).

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] xen, xen-sparse: modify spinlocks to use directed yield
  2005-05-20 16:54 [PATCH] xen, xen-sparse: modify spinlocks to use directed yield Ryan Harper
@ 2005-05-20 18:16 ` Ryan Harper
  2005-05-20 18:17 ` Ryan Harper
  2005-06-03 19:40 ` [PATCH] Yield to VCPU hcall, spinlock yielding Ryan Harper
  2 siblings, 0 replies; 4+ messages in thread
From: Ryan Harper @ 2005-05-20 18:16 UTC (permalink / raw)
  To: Ryan Harper; +Cc: xen-devel

* Ryan Harper <ryanh@us.ibm.com> [2005-05-20 11:55]:
> The following patch creates a new hypercall, do_confer() which allows a

Oops.  I left in fixes to my domU config which doesnt exist in the main
tree.  I've got that part removed in this version.

--
Ryan Harper
Software Engineer; Linux Technology Center
IBM Corp., Austin, Tx
(512) 838-9253   T/L: 678-9253
ryanh@us.ibm.com


diffstat output:
 linux-2.6.11-xen-sparse/arch/i386/lib/Makefile               |   11 
 linux-2.6.11-xen-sparse/arch/i386/lib/locks.c                |   76 +++++
 linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S         |    2 
 linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h |   16 +
 linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h  |  140 ++++++++---
 xen/arch/x86/domain.c                                        |    2 
 xen/arch/x86/x86_32/entry.S                                  |    1 
 xen/common/domain.c                                          |    1 
 xen/common/schedule.c                                        |   69 +++++
 xen/include/public/xen.h                                     |   11 
 xen/include/xen/sched.h                                      |    9 
 11 files changed, 300 insertions(+), 38 deletions(-)

Signed-off-by: Ryan Harper <ryanh@us.ibm.com>
---
diff -urN b/linux-2.6.11-xen-sparse/arch/i386/lib/locks.c confer/linux-2.6.11-xen-sparse/arch/i386/lib/locks.c
--- b/linux-2.6.11-xen-sparse/arch/i386/lib/locks.c	1969-12-31 18:00:00.000000000 -0600
+++ confer/linux-2.6.11-xen-sparse/arch/i386/lib/locks.c	2005-05-20 10:37:58.300767080 -0500
@@ -0,0 +1,76 @@
+/*
+ * Spin and read/write lock operations.
+ *
+ * Copyright (C) 2001-2004 Paul Mackerras <paulus@au.ibm.com>, IBM
+ * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
+ * Copyright (C) 2002 Dave Engebretsen <engebret@us.ibm.com>, IBM
+ *   Rework to support virtual processors
+ * Copyright (C) 2005 Ryan Harper <ryanh@us.ibm.com>, IBM
+ *   Rework for Xen on x86
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/stringify.h>
+#include <asm/hypercall.h>
+#include <asm/processor.h>
+
+/* waiting for a spinlock... */
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+void __spin_yield(spinlock_t *lock)
+{
+	unsigned int lock_value, holder_cpu, yield_count;
+   shared_info_t *s = HYPERVISOR_shared_info;
+
+	lock_value = lock->slock;
+	if (lock_value == 1)
+		return;
+	holder_cpu = lock->cpu;
+	BUG_ON(holder_cpu >= NR_CPUS);
+	yield_count = s->vcpu_data[holder_cpu].yield_count;
+	if ((yield_count & 1) == 0)
+		return;		/* virtual cpu is currently running */
+	rmb();
+	if (lock->slock != lock_value)
+		return;		/* something has changed */
+   HYPERVISOR_confer(holder_cpu, yield_count);
+}
+
+void __rw_yield(rwlock_t *rw)
+{
+	unsigned int lock_value, holder_cpu, yield_count;
+   shared_info_t *s = HYPERVISOR_shared_info;
+
+	lock_value = rw->lock;
+	if (lock_value == RW_LOCK_BIAS)
+		return;
+	holder_cpu = rw->cpu;
+	BUG_ON(holder_cpu >= NR_CPUS);
+	yield_count = s->vcpu_data[holder_cpu].yield_count;
+	if ((yield_count & 1) == 0)
+		return;		/* virtual cpu is currently running */
+	rmb();
+	if (rw->lock != lock_value)
+		return;		/* something has changed */
+   HYPERVISOR_confer(holder_cpu, yield_count);
+}
+
+void spin_unlock_wait(spinlock_t *lock)
+{
+	while (spin_is_locked(lock)) {
+		cpu_relax();
+		if (SHARED_PROCESSOR)
+			__spin_yield(lock);
+	}
+	cpu_relax();
+}
+EXPORT_SYMBOL(spin_unlock_wait);
+#endif
diff -urN b/linux-2.6.11-xen-sparse/arch/i386/lib/Makefile confer/linux-2.6.11-xen-sparse/arch/i386/lib/Makefile
--- b/linux-2.6.11-xen-sparse/arch/i386/lib/Makefile	1969-12-31 18:00:00.000000000 -0600
+++ confer/linux-2.6.11-xen-sparse/arch/i386/lib/Makefile	2005-05-20 10:37:58.301766928 -0500
@@ -0,0 +1,11 @@
+#
+# Makefile for i386-specific library files..
+#
+
+
+lib-y = checksum.o delay.o usercopy.o getuser.o memcpy.o strstr.o \
+	bitops.o 
+
+lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
+lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
+lib-$(CONFIG_XEN) += locks.o
diff -urN b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S confer/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S
--- b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S	2005-05-19 22:20:32.000000000 -0500
+++ confer/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S	2005-05-20 10:37:58.304766472 -0500
@@ -80,7 +80,7 @@
 #define evtchn_upcall_pending		/* 0 */
 #define evtchn_upcall_mask		1
 
-#define sizeof_vcpu_shift		3
+#define sizeof_vcpu_shift		4
 
 #ifdef CONFIG_SMP
 #define preempt_disable(reg)	incl TI_preempt_count(reg)
diff -urN b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h confer/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h
--- b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h	2005-05-19 22:20:32.000000000 -0500
+++ confer/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h	2005-05-20 10:37:58.306766168 -0500
@@ -517,4 +517,20 @@
     return ret;
 }
 
+static inline int
+HYPERVISOR_confer(
+	unsigned int vcpu, unsigned int yield_count)
+{
+    int ret;
+    unsigned long ign1, ign2;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1), "=c" (ign2)
+	: "0" (__HYPERVISOR_confer), "1" (vcpu), "2" (yield_count)
+	: "memory");
+
+    return ret;
+}
+
 #endif /* __HYPERCALL_H__ */
diff -urN b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h confer/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h
--- b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h	2005-05-19 22:20:14.000000000 -0500
+++ confer/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h	2005-05-20 10:37:58.307766016 -0500
@@ -22,10 +22,36 @@
 #ifdef CONFIG_PREEMPT
 	unsigned int break_lock;
 #endif
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+	unsigned int cpu;
+#endif
 } spinlock_t;
 
 #define SPINLOCK_MAGIC	0xdead4ead
 
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+typedef struct {
+	volatile unsigned int lock;
+#ifdef CONFIG_DEBUG_SPINLOCK
+	unsigned magic;
+#endif
+#ifdef CONFIG_PREEMPT
+	unsigned int break_lock;
+#endif
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+	unsigned int cpu;
+#endif
+} rwlock_t;
+
 #ifdef CONFIG_DEBUG_SPINLOCK
 #define SPINLOCK_MAGIC_INIT	, SPINLOCK_MAGIC
 #else
@@ -44,7 +70,20 @@
  */
 
 #define spin_is_locked(x)	(*(volatile signed char *)(&(x)->slock) <= 0)
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+#include <linux/smp.h>
+#define SPINLOCK_CPU (smp_processor_id())
+/* We only yield to the hypervisor if we are in shared processor mode */
+#define SHARED_PROCESSOR (HYPERVISOR_shared_info->shproc == 0)
+extern void __spin_yield(spinlock_t *lock);
+extern void __rw_yield(rwlock_t *rw);
+extern void spin_unlock_wait(spinlock_t *lock);
+#else
+#define __spin_yield(x) barrier()
+#define __rw_yield(x) barrier()
+#define SHARED_PROCESSOR 0
 #define spin_unlock_wait(x)	do { barrier(); } while(spin_is_locked(x))
+#endif
 
 #define spin_lock_string \
 	"\n1:\t" \
@@ -125,6 +164,9 @@
 		"xchgb %b0,%1"
 		:"=q" (oldval), "=m" (lock->slock)
 		:"0" (0) : "memory");
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+	lock->cpu = SPINLOCK_CPU;
+#endif
 	return oldval > 0;
 }
 
@@ -136,43 +178,55 @@
 		BUG();
 	}
 #endif
-	__asm__ __volatile__(
-		spin_lock_string
-		:"=m" (lock->slock) : : "memory");
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+	while (1) {
+		if ( likely(_raw_spin_trylock(lock)) )
+			break;
+		do {
+			cpu_relax();
+			if (SHARED_PROCESSOR)
+				__spin_yield(lock);
+		} while (likely(spin_is_locked(lock)));
+		cpu_relax();
+	}
+#else
+   __asm__ __volatile__(
+      spin_lock_string
+      :"=m" (lock->slock) : : "memory");
+#endif
 }
 
 static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags)
 {
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+	unsigned long flags_dis;
+#endif
 #ifdef CONFIG_DEBUG_SPINLOCK
 	if (unlikely(lock->magic != SPINLOCK_MAGIC)) {
 		printk("eip: %p\n", __builtin_return_address(0));
 		BUG();
 	}
 #endif
-	__asm__ __volatile__(
-		spin_lock_string_flags
-		:"=m" (lock->slock) : "r" (flags) : "memory");
-}
-
-/*
- * Read-write spinlocks, allowing multiple readers
- * but only one writer.
- *
- * NOTE! it is quite common to have readers in interrupts
- * but no interrupt writers. For those circumstances we
- * can "mix" irq-safe locks - any writer needs to get a
- * irq-safe write-lock, but readers can get non-irqsafe
- * read-locks.
- */
-typedef struct {
-	volatile unsigned int lock;
-#ifdef CONFIG_DEBUG_SPINLOCK
-	unsigned magic;
-#endif
-#ifdef CONFIG_PREEMPT
-	unsigned int break_lock;
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+	while (1) {
+		if ( likely(_raw_spin_trylock(lock)) )
+			break;
+		local_save_flags(flags_dis);
+		local_irq_restore(flags);
+		do {
+			cpu_relax();
+			if (SHARED_PROCESSOR)
+				__spin_yield(lock);
+		} while (likely(spin_is_locked(lock)));
+		cpu_relax();
+		local_irq_restore(flags_dis);
+	}
+#else
+   __asm__ __volatile__(
+      spin_lock_string_flags
+      :"=m" (lock->slock) : "r" (flags) : "memory");
 #endif
-} rwlock_t;
+}
 
 #define RWLOCK_MAGIC	0xdeaf1eed
 
@@ -198,6 +252,18 @@
  */
 #define write_can_lock(x) ((x)->lock == RW_LOCK_BIAS)
 
+static inline int _raw_write_trylock(rwlock_t *lock)
+{
+	atomic_t *count = (atomic_t *)lock;
+	if (atomic_sub_and_test(RW_LOCK_BIAS, count)) {
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+      lock->cpu = SPINLOCK_CPU;
+#endif
+		return 1;
+   }
+	atomic_add(RW_LOCK_BIAS, count);
+	return 0;
+}
 /*
  * On x86, we implement read-write locks as a 32-bit counter
  * with the high bit (sign) being the "contended" bit.
@@ -222,7 +288,20 @@
 #ifdef CONFIG_DEBUG_SPINLOCK
 	BUG_ON(rw->magic != RWLOCK_MAGIC);
 #endif
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+	while (1) {
+		if ( likely(_raw_write_trylock(rw)) )
+			break;
+		do {
+			cpu_relax();
+			if (SHARED_PROCESSOR)
+				__rw_yield(rw);
+		} while ( likely(!write_can_lock(rw)));
+		cpu_relax();
+	}
+#else
 	__build_write_lock(rw, "__write_lock_failed");
+#endif
 }
 
 #define _raw_read_unlock(rw)		asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
@@ -238,13 +317,6 @@
 	return 0;
 }
 
-static inline int _raw_write_trylock(rwlock_t *lock)
-{
-	atomic_t *count = (atomic_t *)lock;
-	if (atomic_sub_and_test(RW_LOCK_BIAS, count))
-		return 1;
-	atomic_add(RW_LOCK_BIAS, count);
-	return 0;
-}
+
 
 #endif /* __ASM_SPINLOCK_H */
diff -urN b/xen/arch/x86/domain.c confer/xen/arch/x86/domain.c
--- b/xen/arch/x86/domain.c	2005-05-19 22:20:28.000000000 -0500
+++ confer/xen/arch/x86/domain.c	2005-05-20 10:38:29.187071648 -0500
@@ -253,6 +253,8 @@
     memset(d->shared_info, 0, PAGE_SIZE);
     ed->vcpu_info = &d->shared_info->vcpu_data[ed->vcpu_id];
     ed->cpumap = CPUMAP_RUNANYWHERE;
+    /* default vcpus to sharing physical cpus */
+    d->shared_info->shproc = 1;
     SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d);
     machine_to_phys_mapping[virt_to_phys(d->shared_info) >> 
                            PAGE_SHIFT] = INVALID_M2P_ENTRY;
diff -urN b/xen/arch/x86/x86_32/entry.S confer/xen/arch/x86/x86_32/entry.S
--- b/xen/arch/x86/x86_32/entry.S	2005-05-19 22:20:33.000000000 -0500
+++ confer/xen/arch/x86/x86_32/entry.S	2005-05-20 10:37:58.353759024 -0500
@@ -749,6 +749,7 @@
         .long do_boot_vcpu
         .long do_ni_hypercall       /* 25 */
         .long do_mmuext_op
+        .long do_confer
         .rept NR_hypercalls-((.-hypercall_table)/4)
         .long do_ni_hypercall
         .endr
diff -urN b/xen/common/domain.c confer/xen/common/domain.c
--- b/xen/common/domain.c	2005-05-19 22:20:15.000000000 -0500
+++ confer/xen/common/domain.c	2005-05-20 10:37:58.354758872 -0500
@@ -289,6 +289,7 @@
 
     atomic_set(&ed->pausecnt, 0);
     ed->cpumap = CPUMAP_RUNANYWHERE;
+    set_bit(_VCPUF_canconfer, &ed->vcpu_flags);
 
     memcpy(&ed->arch, &idle0_exec_domain.arch, sizeof(ed->arch));
 
diff -urN b/xen/common/schedule.c confer/xen/common/schedule.c
--- b/xen/common/schedule.c	2005-05-19 22:20:30.000000000 -0500
+++ confer/xen/common/schedule.c	2005-05-20 10:45:41.493351104 -0500
@@ -224,6 +224,11 @@
     spin_lock_irqsave(&schedule_data[ed->processor].schedule_lock, flags);
     if ( likely(domain_runnable(ed)) )
     {
+        /* mark current's confer state */
+        if ( test_bit(_VCPUF_conferring, &current->vcpu_flags) ) {
+            clear_bit(_VCPUF_conferring, &current->vcpu_flags);
+            set_bit(_VCPUF_conferred, &current->vcpu_flags);
+        }
         SCHED_OP(wake, ed);
 #ifdef WAKE_HISTO
         ed->wokenup = NOW();
@@ -273,6 +278,54 @@
     return 0;
 }
 
+/* Confer control to another vcpu */
+long do_confer(unsigned int vcpu, unsigned int yield_count)
+{
+    struct domain *d = current->domain;
+   
+    /* Validate CONFER prereqs:
+    * - vcpu is within bounds
+    * - vcpu is a valid in this domain
+    * - current has not already conferred its slice to vcpu
+    * - vcpu is not already running
+    * - designated vcpu's yield_count matches value from call
+    *
+    * of all are ok, then set conferred value and enter scheduler
+    */
+
+    if (vcpu > MAX_VIRT_CPUS)
+        return 0; 
+
+    if (d->exec_domain[vcpu] == NULL)
+        return 0;
+
+    if (!test_bit(_VCPUF_canconfer, &current->vcpu_flags))
+        return 0;
+
+    /* even counts indicate a running vcpu, odd is preempted/conferred */
+    /* don't confer if holder is currently running */
+    if ((d->exec_domain[vcpu]->vcpu_info->yield_count & 1) == 0)
+        return 0;
+
+    if (d->exec_domain[vcpu]->vcpu_info->yield_count != yield_count)
+        return 0;
+
+    /*
+     * set current's state to conferring, wake target
+     */
+    clear_bit(_VCPUF_canconfer, &current->vcpu_flags);
+    set_bit(_VCPUF_conferring, &current->vcpu_flags);
+    domain_wake(d->exec_domain[vcpu]);
+
+    /* request scheduling for woken domain */
+    raise_softirq(SCHEDULE_SOFTIRQ);
+
+    /* give up my timeslice */
+    do_yield();
+
+    return 0;
+}
+
 /*
  * Demultiplex scheduler-related hypercalls.
  */
@@ -441,7 +494,15 @@
 
     r_time = next_slice.time;
     next = next_slice.task;
-    
+
+    /*
+     * always clear conferred state so this vcpu can confer during its slice
+     * since it can confer, clear all other confer state
+     */
+    set_bit(_VCPUF_canconfer,   &next->vcpu_flags);
+    clear_bit(_VCPUF_conferring, &next->vcpu_flags);
+    clear_bit(_VCPUF_conferred,  &next->vcpu_flags);
+
     schedule_data[cpu].curr = next;
     
     next->lastschd = now;
@@ -455,6 +516,12 @@
 
     spin_unlock_irq(&schedule_data[cpu].schedule_lock);
 
+    /* bump vcpu yield_count when controlling domain is not-idle */
+    if ( !is_idle_task(prev->domain) )
+        prev->vcpu_info->yield_count++;
+    if ( !is_idle_task(next->domain) )
+        next->vcpu_info->yield_count++;
+
     if ( unlikely(prev == next) ) {
 #ifdef ADV_SCHED_HISTO
         adv_sched_hist_to_stop(cpu);
diff -urN b/xen/include/public/xen.h confer/xen/include/public/xen.h
--- b/xen/include/public/xen.h	2005-05-19 22:20:11.000000000 -0500
+++ confer/xen/include/public/xen.h	2005-05-20 10:37:58.368756744 -0500
@@ -58,6 +58,7 @@
 #define __HYPERVISOR_boot_vcpu            24
 #define __HYPERVISOR_set_segment_base     25 /* x86/64 only */
 #define __HYPERVISOR_mmuext_op            26
+#define __HYPERVISOR_confer               27
 
 /*
  * MULTICALLS
@@ -334,8 +335,11 @@
     u8 evtchn_upcall_mask;              /* 1 */
     u8 pad0, pad1;
     u32 evtchn_pending_sel;             /* 4 */
-    arch_vcpu_info_t arch;              /* 8 */
-} PACKED vcpu_info_t;                   /* 8 + arch */
+    /* Even when vcpu is running, Odd when it is preempted/conferred */
+    u32 yield_count;                    /* 8 */
+    u32 pad2;                           /* 12 */
+    arch_vcpu_info_t arch;              /* 16 */
+} PACKED vcpu_info_t;                   /* 16 + arch */
 
 /*
  * Xen/kernel shared data -- pointer provided in start_info.
@@ -347,6 +351,9 @@
 
     u32 n_vcpu;
 
+    /* set if domains' vcpus share physical cpus */
+    int shproc; 
+
     /*
      * A domain can have up to 1024 "event channels" on which it can send
      * and receive asynchronous event notifications. There are three classes
diff -urN b/xen/include/xen/sched.h confer/xen/include/xen/sched.h
--- b/xen/include/xen/sched.h	2005-05-19 22:20:07.000000000 -0500
+++ confer/xen/include/xen/sched.h	2005-05-20 10:37:58.378755224 -0500
@@ -358,6 +358,15 @@
  /* Initialization completed. */
 #define _VCPUF_initialised     8
 #define VCPUF_initialised      (1UL<<_VCPUF_initialised)
+ /* Able to give time slice to another vcpu */
+#define _VCPUF_canconfer       9
+#define VCPUF_canconfer        (1UL<<_VCPUF_canconfer)
+ /* Currently giving time slice to another vcpu */
+#define _VCPUF_conferring      10
+#define VCPUF_conferring       (1UL<<_VCPUF_conferring)
+ /* Already given time slice to another vcpu */
+#define _VCPUF_conferred       11
+#define VCPUF_conferred        (1UL<<_VCPUF_conferred)
 
 /*
  * Per-domain flags (domain_flags).

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] xen, xen-sparse: modify spinlocks to use directed yield
  2005-05-20 16:54 [PATCH] xen, xen-sparse: modify spinlocks to use directed yield Ryan Harper
  2005-05-20 18:16 ` Ryan Harper
@ 2005-05-20 18:17 ` Ryan Harper
  2005-06-03 19:40 ` [PATCH] Yield to VCPU hcall, spinlock yielding Ryan Harper
  2 siblings, 0 replies; 4+ messages in thread
From: Ryan Harper @ 2005-05-20 18:17 UTC (permalink / raw)
  To: Ryan Harper; +Cc: xen-devel

* Ryan Harper <ryanh@us.ibm.com> [2005-05-20 11:55]:
> The following patch creates a new hypercall, do_confer() which allows a

Here is a debug patch I've been using to go along with it. Comment out
the printks in the do_confer() routine if you are measuring performance.

--
Ryan Harper
Software Engineer; Linux Technology Center
IBM Corp., Austin, Tx
(512) 838-9253   T/L: 678-9253
ryanh@us.ibm.com


diffstat output:
 common/keyhandler.c |   30 ++++++++++++++++++++++++++++++
 common/schedule.c   |   19 ++++++++++++++++---
 include/xen/sched.h |    3 +++
 3 files changed, 49 insertions(+), 3 deletions(-)

Signed-off-by: Ryan Harper <ryanh@us.ibm.com>
---
diff -urN confer/xen/common/keyhandler.c debug/xen/common/keyhandler.c
--- confer/xen/common/keyhandler.c	2005-05-19 22:20:27.000000000 -0500
+++ debug/xen/common/keyhandler.c	2005-05-20 10:39:03.565845280 -0500
@@ -11,6 +11,7 @@
 #include <xen/sched.h>
 #include <xen/softirq.h>
 #include <asm/debugger.h>
+#include <public/xen.h>
 
 #define KEY_MAX 256
 #define STR_MAX  64
@@ -138,6 +139,33 @@
     read_unlock(&domlist_lock);
 }
 
+static void do_dump_confer(unsigned char key)
+{
+    struct domain *d;
+    struct exec_domain *ed;
+    s_time_t       now = NOW();
+
+    printk("'%c' pressed -> dumping confer stats (now=0x%X:%08X)\n", key,
+           (u32)(now>>32), (u32)now);
+
+    read_lock(&domlist_lock);
+    for_each_domain ( d )
+    {
+        for_each_exec_domain ( d, ed )
+        {
+            printk("Xen: DOM %d, VCPU %d, CPU %d,"
+                   " confers %d, confer_out %d,"
+                   " confer_in %d, yield_count %d\n",
+                   d->domain_id, ed->vcpu_id, ed->processor,
+                   ed->confercnt, ed->confer_out, 
+                   ed->confer_in, ed->vcpu_info->yield_count
+                  );
+        }
+
+    }
+    read_unlock(&domlist_lock);
+}
+
 extern void dump_runq(unsigned char key);
 extern void print_sched_histo(unsigned char key);
 extern void reset_sched_histo(unsigned char key);
@@ -183,6 +211,8 @@
     register_keyhandler(
         'q', do_task_queues, "dump task queues + guest state");
     register_keyhandler(
+        'c', do_dump_confer, "dump confer stats");
+    register_keyhandler(
         'r', dump_runq,      "dump run queues");
     register_irq_keyhandler(
         'R', halt_machine,   "reboot machine"); 
diff -urN confer/xen/common/schedule.c debug/xen/common/schedule.c
--- confer/xen/common/schedule.c	2005-05-20 10:37:58.367756896 -0500
+++ debug/xen/common/schedule.c	2005-05-20 10:39:03.575843760 -0500
@@ -228,7 +228,11 @@
         if ( test_bit(_VCPUF_conferring, &current->vcpu_flags) ) {
             clear_bit(_VCPUF_conferring, &current->vcpu_flags);
             set_bit(_VCPUF_conferred, &current->vcpu_flags);
+            /* increment confer counters */
+            current->confer_out++;
+            ed->confer_in++;
         }
+
         SCHED_OP(wake, ed);
 #ifdef WAKE_HISTO
         ed->wokenup = NOW();
@@ -283,6 +287,9 @@
 {
     struct domain *d = current->domain;
    
+    /* count hcalls */
+    current->confercnt++;
+
     /* Validate CONFER prereqs:
     * - vcpu is within bounds
     * - vcpu is a valid in this domain
@@ -299,16 +306,22 @@
     if (d->exec_domain[vcpu] == NULL)
         return 0;
 
-    if (!test_bit(_VCPUF_canconfer, &current->vcpu_flags))
+    if (!test_bit(_VCPUF_canconfer, &current->vcpu_flags)) {
+        printk("confer: canconfer not set, %d->vcpu-flags = 0x%08lx\n", current->vcpu_id, current->vcpu_flags);
         return 0;
+    }
 
     /* even counts indicate a running vcpu, odd is preempted/conferred */
     /* don't confer if holder is currently running */
-    if ((d->exec_domain[vcpu]->vcpu_info->yield_count & 1) == 0)
+    if ((d->exec_domain[vcpu]->vcpu_info->yield_count & 1) == 0) {
+        printk("confer: vcpu %d already running\n", vcpu);
         return 0;
+    }
 
-    if (d->exec_domain[vcpu]->vcpu_info->yield_count != yield_count)
+    if (d->exec_domain[vcpu]->vcpu_info->yield_count != yield_count) {
+        printk("confer: yield_count mismatch\n");
         return 0;
+    }
 
     /*
      * set current's state to conferring, wake target
diff -urN confer/xen/include/xen/sched.h debug/xen/include/xen/sched.h
--- confer/xen/include/xen/sched.h	2005-05-20 10:37:58.378755224 -0500
+++ debug/xen/include/xen/sched.h	2005-05-20 10:39:03.577843456 -0500
@@ -87,6 +87,9 @@
     atomic_t         pausecnt;
 
     cpumap_t         cpumap;        /* which cpus this domain can run on */
+    u32              confer_out;    /* inc when conferring to another vcpu */
+    u32              confer_in;     /* inc when conferred from another vcpu */
+    u32              confercnt;     /* # of do_confer hcalls */
 
     struct arch_exec_domain arch;
 };

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Yield to VCPU hcall, spinlock yielding
  2005-05-20 16:54 [PATCH] xen, xen-sparse: modify spinlocks to use directed yield Ryan Harper
  2005-05-20 18:16 ` Ryan Harper
  2005-05-20 18:17 ` Ryan Harper
@ 2005-06-03 19:40 ` Ryan Harper
  2 siblings, 0 replies; 4+ messages in thread
From: Ryan Harper @ 2005-06-03 19:40 UTC (permalink / raw)
  To: xen-devel

[-- Attachment #1: Type: text/plain, Size: 478 bytes --]

* Ryan Harper <ryanh@us.ibm.com> [2005-05-20 11:55]:
> The following patch creates a new hypercall, do_confer() which allows a

I've not recieved any feedback on this.  Following this patch up with
one that applies against current.  Builds, but haven't tested it since
current SMP domains don't run.  

--
Ryan Harper
Software Engineer; Linux Technology Center
IBM Corp., Austin, Tx
(512) 838-9253   T/L: 678-9253
ryanh@us.ibm.com

Signed-off-by: Ryan Harper <ryanh@us.ibm.com>

[-- Attachment #2: confer.patch --]
[-- Type: text/plain, Size: 16792 bytes --]

diff -urN b/linux-2.6.11-xen-sparse/arch/i386/lib/locks.c c/linux-2.6.11-xen-sparse/arch/i386/lib/locks.c
--- b/linux-2.6.11-xen-sparse/arch/i386/lib/locks.c	1969-12-31 18:00:00.000000000 -0600
+++ c/linux-2.6.11-xen-sparse/arch/i386/lib/locks.c	2005-06-03 09:41:10.933009544 -0500
@@ -0,0 +1,76 @@
+/*
+ * Spin and read/write lock operations.
+ *
+ * Copyright (C) 2001-2004 Paul Mackerras <paulus@au.ibm.com>, IBM
+ * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
+ * Copyright (C) 2002 Dave Engebretsen <engebret@us.ibm.com>, IBM
+ *   Rework to support virtual processors
+ * Copyright (C) 2005 Ryan Harper <ryanh@us.ibm.com>, IBM
+ *   Rework for Xen on x86
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/stringify.h>
+#include <asm/hypercall.h>
+#include <asm/processor.h>
+
+/* waiting for a spinlock... */
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+void __spin_yield(spinlock_t *lock)
+{
+	unsigned int lock_value, holder_cpu, yield_count;
+   shared_info_t *s = HYPERVISOR_shared_info;
+
+	lock_value = lock->slock;
+	if (lock_value == 1)
+		return;
+	holder_cpu = lock->cpu;
+	BUG_ON(holder_cpu >= NR_CPUS);
+	yield_count = s->vcpu_data[holder_cpu].yield_count;
+	if ((yield_count & 1) == 0)
+		return;		/* virtual cpu is currently running */
+	rmb();
+	if (lock->slock != lock_value)
+		return;		/* something has changed */
+   HYPERVISOR_confer(holder_cpu, yield_count);
+}
+
+void __rw_yield(rwlock_t *rw)
+{
+	unsigned int lock_value, holder_cpu, yield_count;
+   shared_info_t *s = HYPERVISOR_shared_info;
+
+	lock_value = rw->lock;
+	if (lock_value == RW_LOCK_BIAS)
+		return;
+	holder_cpu = rw->cpu;
+	BUG_ON(holder_cpu >= NR_CPUS);
+	yield_count = s->vcpu_data[holder_cpu].yield_count;
+	if ((yield_count & 1) == 0)
+		return;		/* virtual cpu is currently running */
+	rmb();
+	if (rw->lock != lock_value)
+		return;		/* something has changed */
+   HYPERVISOR_confer(holder_cpu, yield_count);
+}
+
+void spin_unlock_wait(spinlock_t *lock)
+{
+	while (spin_is_locked(lock)) {
+		cpu_relax();
+		if (SHARED_PROCESSOR)
+			__spin_yield(lock);
+	}
+	cpu_relax();
+}
+EXPORT_SYMBOL(spin_unlock_wait);
+#endif
diff -urN b/linux-2.6.11-xen-sparse/arch/i386/lib/Makefile c/linux-2.6.11-xen-sparse/arch/i386/lib/Makefile
--- b/linux-2.6.11-xen-sparse/arch/i386/lib/Makefile	1969-12-31 18:00:00.000000000 -0600
+++ c/linux-2.6.11-xen-sparse/arch/i386/lib/Makefile	2005-06-03 09:41:10.948007446 -0500
@@ -0,0 +1,11 @@
+#
+# Makefile for i386-specific library files..
+#
+
+
+lib-y = checksum.o delay.o usercopy.o getuser.o memcpy.o strstr.o \
+	bitops.o 
+
+lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
+lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
+lib-$(CONFIG_XEN) += locks.o
diff -urN b/linux-2.6.11-xen-sparse/arch/xen/configs/xenU-smp_defconfig_x86_32 c/linux-2.6.11-xen-sparse/arch/xen/configs/xenU-smp_defconfig_x86_32
--- b/linux-2.6.11-xen-sparse/arch/xen/configs/xenU-smp_defconfig_x86_32	2005-06-03 09:02:36.837705157 -0500
+++ c/linux-2.6.11-xen-sparse/arch/xen/configs/xenU-smp_defconfig_x86_32	2005-06-03 09:41:10.949007306 -0500
@@ -117,8 +117,8 @@
 CONFIG_SMP=y
 CONFIG_NR_CPUS=8
 # CONFIG_SCHED_SMT is not set
-CONFIG_PREEMPT=y
-CONFIG_PREEMPT_BKL=y
+# CONFIG_PREEMPT is not set
+# CONFIG_PREEMPT_BKL is not set
 CONFIG_X86_CPUID=y
 
 #
diff -urN b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S c/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S
--- b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S	2005-06-02 22:21:42.000000000 -0500
+++ c/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S	2005-06-03 09:41:10.953006747 -0500
@@ -80,7 +80,7 @@
 #define evtchn_upcall_pending		/* 0 */
 #define evtchn_upcall_mask		1
 
-#define sizeof_vcpu_shift		3
+#define sizeof_vcpu_shift		4
 
 #ifdef CONFIG_SMP
 #define preempt_disable(reg)	incl TI_preempt_count(reg)
diff -urN b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h c/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h
--- b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h	2005-06-02 22:21:42.000000000 -0500
+++ c/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/hypercall.h	2005-06-03 09:41:10.954006607 -0500
@@ -517,4 +517,20 @@
     return ret;
 }
 
+static inline int
+HYPERVISOR_confer(
+	unsigned int vcpu, unsigned int yield_count)
+{
+    int ret;
+    unsigned long ign1, ign2;
+
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1), "=c" (ign2)
+	: "0" (__HYPERVISOR_confer), "1" (vcpu), "2" (yield_count)
+	: "memory");
+
+    return ret;
+}
+
 #endif /* __HYPERCALL_H__ */
diff -urN b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h c/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h
--- b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h	2005-06-02 22:21:37.000000000 -0500
+++ c/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/spinlock.h	2005-06-03 09:41:10.975003670 -0500
@@ -22,10 +22,36 @@
 #ifdef CONFIG_PREEMPT
 	unsigned int break_lock;
 #endif
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+	unsigned int cpu;
+#endif
 } spinlock_t;
 
 #define SPINLOCK_MAGIC	0xdead4ead
 
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+typedef struct {
+	volatile unsigned int lock;
+#ifdef CONFIG_DEBUG_SPINLOCK
+	unsigned magic;
+#endif
+#ifdef CONFIG_PREEMPT
+	unsigned int break_lock;
+#endif
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+	unsigned int cpu;
+#endif
+} rwlock_t;
+
 #ifdef CONFIG_DEBUG_SPINLOCK
 #define SPINLOCK_MAGIC_INIT	, SPINLOCK_MAGIC
 #else
@@ -44,7 +70,20 @@
  */
 
 #define spin_is_locked(x)	(*(volatile signed char *)(&(x)->slock) <= 0)
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+#include <linux/smp.h>
+#define SPINLOCK_CPU (smp_processor_id())
+/* We only yield to the hypervisor if we are in shared processor mode */
+#define SHARED_PROCESSOR (HYPERVISOR_shared_info->shproc == 0)
+extern void __spin_yield(spinlock_t *lock);
+extern void __rw_yield(rwlock_t *rw);
+extern void spin_unlock_wait(spinlock_t *lock);
+#else
+#define __spin_yield(x) barrier()
+#define __rw_yield(x) barrier()
+#define SHARED_PROCESSOR 0
 #define spin_unlock_wait(x)	do { barrier(); } while(spin_is_locked(x))
+#endif
 
 #define spin_lock_string \
 	"\n1:\t" \
@@ -125,6 +164,9 @@
 		"xchgb %b0,%1"
 		:"=q" (oldval), "=m" (lock->slock)
 		:"0" (0) : "memory");
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+	lock->cpu = SPINLOCK_CPU;
+#endif
 	return oldval > 0;
 }
 
@@ -136,43 +178,55 @@
 		BUG();
 	}
 #endif
-	__asm__ __volatile__(
-		spin_lock_string
-		:"=m" (lock->slock) : : "memory");
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+	while (1) {
+		if ( likely(_raw_spin_trylock(lock)) )
+			break;
+		do {
+			cpu_relax();
+			if (SHARED_PROCESSOR)
+				__spin_yield(lock);
+		} while (likely(spin_is_locked(lock)));
+		cpu_relax();
+	}
+#else
+   __asm__ __volatile__(
+      spin_lock_string
+      :"=m" (lock->slock) : : "memory");
+#endif
 }
 
 static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags)
 {
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+	unsigned long flags_dis;
+#endif
 #ifdef CONFIG_DEBUG_SPINLOCK
 	if (unlikely(lock->magic != SPINLOCK_MAGIC)) {
 		printk("eip: %p\n", __builtin_return_address(0));
 		BUG();
 	}
 #endif
-	__asm__ __volatile__(
-		spin_lock_string_flags
-		:"=m" (lock->slock) : "r" (flags) : "memory");
-}
-
-/*
- * Read-write spinlocks, allowing multiple readers
- * but only one writer.
- *
- * NOTE! it is quite common to have readers in interrupts
- * but no interrupt writers. For those circumstances we
- * can "mix" irq-safe locks - any writer needs to get a
- * irq-safe write-lock, but readers can get non-irqsafe
- * read-locks.
- */
-typedef struct {
-	volatile unsigned int lock;
-#ifdef CONFIG_DEBUG_SPINLOCK
-	unsigned magic;
-#endif
-#ifdef CONFIG_PREEMPT
-	unsigned int break_lock;
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+	while (1) {
+		if ( likely(_raw_spin_trylock(lock)) )
+			break;
+		local_save_flags(flags_dis);
+		local_irq_restore(flags);
+		do {
+			cpu_relax();
+			if (SHARED_PROCESSOR)
+				__spin_yield(lock);
+		} while (likely(spin_is_locked(lock)));
+		cpu_relax();
+		local_irq_restore(flags_dis);
+	}
+#else
+   __asm__ __volatile__(
+      spin_lock_string_flags
+      :"=m" (lock->slock) : "r" (flags) : "memory");
 #endif
-} rwlock_t;
+}
 
 #define RWLOCK_MAGIC	0xdeaf1eed
 
@@ -198,6 +252,18 @@
  */
 #define write_can_lock(x) ((x)->lock == RW_LOCK_BIAS)
 
+static inline int _raw_write_trylock(rwlock_t *lock)
+{
+	atomic_t *count = (atomic_t *)lock;
+	if (atomic_sub_and_test(RW_LOCK_BIAS, count)) {
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+      lock->cpu = SPINLOCK_CPU;
+#endif
+		return 1;
+   }
+	atomic_add(RW_LOCK_BIAS, count);
+	return 0;
+}
 /*
  * On x86, we implement read-write locks as a 32-bit counter
  * with the high bit (sign) being the "contended" bit.
@@ -222,7 +288,20 @@
 #ifdef CONFIG_DEBUG_SPINLOCK
 	BUG_ON(rw->magic != RWLOCK_MAGIC);
 #endif
+#if defined(CONFIG_XEN) && defined(CONFIG_SMP)
+	while (1) {
+		if ( likely(_raw_write_trylock(rw)) )
+			break;
+		do {
+			cpu_relax();
+			if (SHARED_PROCESSOR)
+				__rw_yield(rw);
+		} while ( likely(!write_can_lock(rw)));
+		cpu_relax();
+	}
+#else
 	__build_write_lock(rw, "__write_lock_failed");
+#endif
 }
 
 #define _raw_read_unlock(rw)		asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
@@ -238,13 +317,6 @@
 	return 0;
 }
 
-static inline int _raw_write_trylock(rwlock_t *lock)
-{
-	atomic_t *count = (atomic_t *)lock;
-	if (atomic_sub_and_test(RW_LOCK_BIAS, count))
-		return 1;
-	atomic_add(RW_LOCK_BIAS, count);
-	return 0;
-}
+
 
 #endif /* __ASM_SPINLOCK_H */
diff -urN b/xen/arch/x86/domain.c c/xen/arch/x86/domain.c
--- b/xen/arch/x86/domain.c	2005-06-02 22:21:41.000000000 -0500
+++ c/xen/arch/x86/domain.c	2005-06-03 09:42:37.487868084 -0500
@@ -240,6 +240,8 @@
     memset(d->shared_info, 0, PAGE_SIZE);
     v->vcpu_info = &d->shared_info->vcpu_data[v->vcpu_id];
     v->cpumap = CPUMAP_RUNANYWHERE;
+    /* default vcpus to sharing physical cpus */
+    d->shared_info->shproc = 1;
     SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d);
     machine_to_phys_mapping[virt_to_phys(d->shared_info) >> 
                            PAGE_SHIFT] = INVALID_M2P_ENTRY;
diff -urN b/xen/arch/x86/x86_32/entry.S c/xen/arch/x86/x86_32/entry.S
--- b/xen/arch/x86/x86_32/entry.S	2005-06-02 22:21:43.000000000 -0500
+++ c/xen/arch/x86/x86_32/entry.S	2005-06-03 09:41:11.000000173 -0500
@@ -751,6 +751,7 @@
         .long do_boot_vcpu
         .long do_ni_hypercall       /* 25 */
         .long do_mmuext_op
+        .long do_confer
         .rept NR_hypercalls-((.-hypercall_table)/4)
         .long do_ni_hypercall
         .endr
diff -urN b/xen/common/domain.c c/xen/common/domain.c
--- b/xen/common/domain.c	2005-06-02 22:21:37.000000000 -0500
+++ c/xen/common/domain.c	2005-06-03 09:42:09.839752947 -0500
@@ -392,6 +392,7 @@
 
     atomic_set(&v->pausecnt, 0);
     v->cpumap = CPUMAP_RUNANYWHERE;
+    set_bit(_VCPUF_canconfer, &v->vcpu_flags);
 
     memcpy(&v->arch, &idle0_vcpu.arch, sizeof(v->arch));
 
diff -urN b/xen/common/schedule.c c/xen/common/schedule.c
--- b/xen/common/schedule.c	2005-06-02 22:21:42.000000000 -0500
+++ c/xen/common/schedule.c	2005-06-03 09:41:49.540601494 -0500
@@ -219,6 +219,11 @@
     spin_lock_irqsave(&schedule_data[v->processor].schedule_lock, flags);
     if ( likely(domain_runnable(v)) )
     {
+        /* mark current's confer state */
+        if ( test_bit(_VCPUF_conferring, &current->vcpu_flags) ) {
+            clear_bit(_VCPUF_conferring, &current->vcpu_flags);
+            set_bit(_VCPUF_conferred, &current->vcpu_flags);
+        }
         SCHED_OP(wake, v);
 #ifdef WAKE_HISTO
         v->wokenup = NOW();
@@ -260,6 +265,51 @@
     return 0;
 }
 
+/* Confer control to another vcpu */
+long do_confer(unsigned int vcpu, unsigned int yield_count)
+{
+    struct domain *d = current->domain;
+   
+    /* Validate CONFER prereqs:
+    * - vcpu is within bounds
+    * - vcpu is a valid in this domain
+    * - current has not already conferred its slice to vcpu
+    * - vcpu is not already running
+    * - designated vcpu's yield_count matches value from call
+    *
+    * of all are ok, then set conferred value and enter scheduler
+    */
+
+    if (unlikely(vcpu > MAX_VIRT_CPUS))
+        return 0; 
+
+    if (unlikely(d->vcpu[vcpu] == NULL))
+        return 0;
+
+    if (unlikely(!test_bit(_VCPUF_canconfer, &current->vcpu_flags)))
+        return 0;
+
+    /* even counts indicate a running vcpu, odd is preempted/conferred */
+    /* don't confer if holder is currently running */
+    if (unlikely((d->vcpu[vcpu]->vcpu_info->yield_count & 1) == 0))
+        return 0;
+
+    if (unlikely(d->vcpu[vcpu]->vcpu_info->yield_count != yield_count))
+        return 0;
+
+    /*
+     * set current's state to conferring, wake target
+     */
+    clear_bit(_VCPUF_canconfer, &current->vcpu_flags);
+    set_bit(_VCPUF_conferring, &current->vcpu_flags);
+    domain_wake(d->vcpu[vcpu]);
+
+    /* give up my timeslice */
+    do_yield();
+
+    return 0;
+}
+
 /*
  * Demultiplex scheduler-related hypercalls.
  */
@@ -422,7 +472,15 @@
 
     r_time = next_slice.time;
     next = next_slice.task;
-    
+
+    /*
+     * always clear conferred state so this vcpu can confer during its slice
+     * since it can confer, clear all other confer state
+     */
+    set_bit(_VCPUF_canconfer,   &next->vcpu_flags);
+    clear_bit(_VCPUF_conferring, &next->vcpu_flags);
+    clear_bit(_VCPUF_conferred,  &next->vcpu_flags);
+
     schedule_data[cpu].curr = next;
     
     next->lastschd = now;
@@ -434,6 +492,12 @@
 
     spin_unlock_irq(&schedule_data[cpu].schedule_lock);
 
+     /* bump vcpu yield_count when controlling domain is not-idle */
+     if ( !is_idle_task(prev->domain) )
+         prev->vcpu_info->yield_count++;
+     if ( !is_idle_task(next->domain) )
+         next->vcpu_info->yield_count++;
+ 
     if ( unlikely(prev == next) )
         return continue_running(prev);
 
diff -urN b/xen/include/public/xen.h c/xen/include/public/xen.h
--- b/xen/include/public/xen.h	2005-06-02 22:21:41.000000000 -0500
+++ c/xen/include/public/xen.h	2005-06-03 09:41:11.040994437 -0500
@@ -58,6 +58,7 @@
 #define __HYPERVISOR_boot_vcpu            24
 #define __HYPERVISOR_set_segment_base     25 /* x86/64 only */
 #define __HYPERVISOR_mmuext_op            26
+#define __HYPERVISOR_confer               27
 
 /* 
  * VIRTUAL INTERRUPTS
@@ -324,8 +325,11 @@
     u8 evtchn_upcall_mask;              /* 1 */
     u8 pad0, pad1;
     u32 evtchn_pending_sel;             /* 4 */
-    arch_vcpu_info_t arch;              /* 8 */
-} PACKED vcpu_info_t;                   /* 8 + arch */
+    /* Even when vcpu is running, Odd when it is preempted/conferred */
+    u32 yield_count;                    /* 8 */
+    u32 pad2;                           /* 12 */
+    arch_vcpu_info_t arch;              /* 16 */
+} PACKED vcpu_info_t;                   /* 16 + arch */
 
 /*
  * Xen/kernel shared data -- pointer provided in start_info.
@@ -337,6 +341,9 @@
 
     u32 n_vcpu;
 
+    /* set if domains' vcpus share physical cpus */
+    int shproc; 
+
     /*
      * A domain can have up to 1024 "event channels" on which it can send
      * and receive asynchronous event notifications. There are three classes
diff -urN b/xen/include/xen/sched.h c/xen/include/xen/sched.h
--- b/xen/include/xen/sched.h	2005-06-02 22:21:36.000000000 -0500
+++ c/xen/include/xen/sched.h	2005-06-03 09:41:11.042994158 -0500
@@ -342,6 +342,15 @@
  /* Initialization completed. */
 #define _VCPUF_initialised     8
 #define VCPUF_initialised      (1UL<<_VCPUF_initialised)
+ /* Able to give time slice to another vcpu */
+#define _VCPUF_canconfer       9
+#define VCPUF_canconfer        (1UL<<_VCPUF_canconfer)
+ /* Currently giving time slice to another vcpu */
+#define _VCPUF_conferring      10
+#define VCPUF_conferring       (1UL<<_VCPUF_conferring)
+ /* Already given time slice to another vcpu */
+#define _VCPUF_conferred       11
+#define VCPUF_conferred        (1UL<<_VCPUF_conferred)
 
 /*
  * Per-domain flags (domain_flags).

[-- Attachment #3: confer_debug.patch --]
[-- Type: text/plain, Size: 4031 bytes --]

diff -urN c/xen/common/keyhandler.c d/xen/common/keyhandler.c
--- c/xen/common/keyhandler.c	2005-06-02 22:21:41.000000000 -0500
+++ d/xen/common/keyhandler.c	2005-06-03 09:48:15.075135457 -0500
@@ -12,6 +12,7 @@
 #include <xen/softirq.h>
 #include <xen/domain.h>
 #include <asm/debugger.h>
+#include <public/xen.h>
 
 #define KEY_MAX 256
 #define STR_MAX  64
@@ -139,6 +140,33 @@
     read_unlock(&domlist_lock);
 }
 
+static void do_dump_confer(unsigned char key)
+{
+    struct domain *d;
+    struct vcpu *v;
+    s_time_t       now = NOW();
+
+    printk("'%c' pressed -> dumping confer stats (now=0x%X:%08X)\n", key,
+           (u32)(now>>32), (u32)now);
+
+    read_lock(&domlist_lock);
+    for_each_domain ( d )
+    {
+        for_each_vcpu ( d, v )
+        {
+            printk("Xen: D%d, V%d, C%d,"
+                   " cnfs %d, c_out %d,"
+                   " c_in %d, ycnt %d\n",
+                   d->domain_id, v->vcpu_id, v->processor,
+                   v->confercnt, v->confer_out, 
+                   v->confer_in, v->vcpu_info->yield_count
+                  );
+        }
+
+    }
+    read_unlock(&domlist_lock);
+}
+
 extern void dump_runq(unsigned char key);
 extern void print_sched_histo(unsigned char key);
 extern void reset_sched_histo(unsigned char key);
@@ -184,6 +212,8 @@
     register_keyhandler(
         'q', do_task_queues, "dump task queues + guest state");
     register_keyhandler(
+        'c', do_dump_confer, "dump confer stats");
+    register_keyhandler(
         'r', dump_runq,      "dump run queues");
     register_irq_keyhandler(
         'R', halt_machine,   "reboot machine"); 
diff -urN c/xen/common/schedule.c d/xen/common/schedule.c
--- c/xen/common/schedule.c	2005-06-03 09:41:49.540601494 -0500
+++ d/xen/common/schedule.c	2005-06-03 09:46:01.768032249 -0500
@@ -223,6 +223,9 @@
         if ( test_bit(_VCPUF_conferring, &current->vcpu_flags) ) {
             clear_bit(_VCPUF_conferring, &current->vcpu_flags);
             set_bit(_VCPUF_conferred, &current->vcpu_flags);
+            /* increment confer counters */
+            current->confer_out++;
+            v->confer_in++;
         }
         SCHED_OP(wake, v);
 #ifdef WAKE_HISTO
@@ -270,6 +273,9 @@
 {
     struct domain *d = current->domain;
    
+    /* count hcalls */
+    current->confercnt++;
+
     /* Validate CONFER prereqs:
     * - vcpu is within bounds
     * - vcpu is a valid in this domain
@@ -286,16 +292,22 @@
     if (unlikely(d->vcpu[vcpu] == NULL))
         return 0;
 
-    if (unlikely(!test_bit(_VCPUF_canconfer, &current->vcpu_flags)))
+    if (unlikely(!test_bit(_VCPUF_canconfer, &current->vcpu_flags))) {
+         printk("confer: canconfer not set, %d->vcpu-flags = 0x%08lx\n", current->vcpu_id, current->vcpu_flags);
         return 0;
+    }
 
     /* even counts indicate a running vcpu, odd is preempted/conferred */
     /* don't confer if holder is currently running */
-    if (unlikely((d->vcpu[vcpu]->vcpu_info->yield_count & 1) == 0))
+    if (unlikely((d->vcpu[vcpu]->vcpu_info->yield_count & 1) == 0)) {
+        printk("confer: vcpu %d already running\n", vcpu);
         return 0;
+    }
 
-    if (unlikely(d->vcpu[vcpu]->vcpu_info->yield_count != yield_count))
+    if (unlikely(d->vcpu[vcpu]->vcpu_info->yield_count != yield_count)) {
+        printk("confer: yield_count mismatch\n");
         return 0;
+    }
 
     /*
      * set current's state to conferring, wake target
diff -urN c/xen/include/xen/sched.h d/xen/include/xen/sched.h
--- c/xen/include/xen/sched.h	2005-06-03 09:41:11.042994158 -0500
+++ d/xen/include/xen/sched.h	2005-06-03 09:43:52.881249246 -0500
@@ -76,6 +76,9 @@
     atomic_t         pausecnt;
 
     cpumap_t         cpumap;        /* which cpus this domain can run on */
+    u32              confer_out;    /* inc when conferring to another vcpu */
+    u32              confer_in;     /* inc when conferred from another vcpu */
+    u32              confercnt;     /* # of do_confer hcalls */
 
     struct arch_vcpu arch;
 };

[-- Attachment #4: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2005-06-03 19:40 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-05-20 16:54 [PATCH] xen, xen-sparse: modify spinlocks to use directed yield Ryan Harper
2005-05-20 18:16 ` Ryan Harper
2005-05-20 18:17 ` Ryan Harper
2005-06-03 19:40 ` [PATCH] Yield to VCPU hcall, spinlock yielding Ryan Harper

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.