xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
* [RFC]PLE's performance enhancement through improving scheduler.
@ 2010-08-18  5:51 Zhang, Xiantao
  2010-08-18  6:39 ` Keir Fraser
  0 siblings, 1 reply; 4+ messages in thread
From: Zhang, Xiantao @ 2010-08-18  5:51 UTC (permalink / raw)
  To: xen-devel@lists.xensource.com; +Cc: Dong, Eddie, Keir Fraser

[-- Attachment #1: Type: text/plain, Size: 1158 bytes --]

The attached patch is just for RFC, not for check-in. Recently, we are working on enhancing the hardware feature PLE through improving scheduler in Xen, and the attached patch can improve system's throughput significantly. With standand virtualization benchmark(vConsolidate), the testing result shows  ~20% performance gain.   In the implemenation, there are two points to enhance the system's scheduler. 
The first one is that when PLE vmexit occurs,  scheduler de-schedules the vcpu and put it in the second position of the runq instead of moving it to the tail of runq so that it can be re-scheduled in a very short time. In this case, it  can improve scheduler's faireness and make the PLE-senstive guests allocated with reasonable timeslice.  The other improvement is to boost other vcpus' priority of the same guest through moving them to the head of the runq when PLE vmexit happens with one vcpu of the guest.  And we are also improving the implementation to make it more robust and more pervasive, but before the work is done, we also want to collect your guys' ideas and suggestions about it ?  Any comment is very appreciated!. Thanks! 
Xiantao

[-- Attachment #2: sched-ple.patch --]
[-- Type: application/octet-stream, Size: 5912 bytes --]

diff -r 8bb9cfaca402 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c	Wed Aug 11 18:26:47 2010 +0100
+++ b/xen/arch/x86/hvm/vmx/vmx.c	Wed Aug 18 21:15:34 2010 +0800
@@ -2263,7 +2263,7 @@ asmlinkage void vmx_vmexit_handler(struc
 {
     unsigned int exit_reason, idtv_info, intr_info = 0, vector = 0;
     unsigned long exit_qualification, inst_len = 0;
-    struct vcpu *v = current;
+    struct vcpu *v = current, *v2;
 
     if ( paging_mode_hap(v->domain) && hvm_paging_enabled(v) )
         v->arch.hvm_vcpu.guest_cr[3] = v->arch.hvm_vcpu.hw_cr[3] =
@@ -2633,6 +2633,10 @@ asmlinkage void vmx_vmexit_handler(struc
 
     case EXIT_REASON_PAUSE_INSTRUCTION:
         perfc_incr(pauseloop_exits);
+	    for_each_vcpu( v->domain, v2 ) {
+	        if ( v2 != v )
+		        vcpu_boost(v2);
+	    }
         do_sched_op_compat(SCHEDOP_yield, 0);
         break;
 
diff -r 8bb9cfaca402 xen/common/sched_credit.c
--- a/xen/common/sched_credit.c	Wed Aug 11 18:26:47 2010 +0100
+++ b/xen/common/sched_credit.c	Wed Aug 18 21:17:36 2010 +0800
@@ -145,6 +145,7 @@ struct csched_vcpu {
         uint32_t state_idle;
         uint32_t migrate_q;
         uint32_t migrate_r;
+        uint32_t boost;
     } stats;
 #endif
 };
@@ -202,23 +203,17 @@ __runq_insert(unsigned int cpu, struct c
     BUG_ON( __vcpu_on_runq(svc) );
     BUG_ON( cpu != svc->vcpu->processor );
 
+    if ( (svc->flags & CSCHED_FLAG_VCPU_YIELD) && svc->pri >= CSCHED_PRI_TS_UNDER &&
+        __runq_elem(runq->next)->pri > CSCHED_PRI_IDLE) {
+        list_add(&svc->runq_elem, runq->next);
+        return;
+    }
+
     list_for_each( iter, runq )
     {
         const struct csched_vcpu * const iter_svc = __runq_elem(iter);
         if ( svc->pri > iter_svc->pri )
             break;
-    }
-
-    /* If the vcpu yielded, try to put it behind one lower-priority
-     * runnable vcpu if we can.  The next runq_sort will bring it forward
-     * within 30ms if the queue too long. */
-    if ( svc->flags & CSCHED_FLAG_VCPU_YIELD
-         && __runq_elem(iter)->pri > CSCHED_PRI_IDLE )
-    {
-        iter=iter->next;
-
-        /* Some sanity checks */
-        BUG_ON(iter == runq);
     }
 
     list_add_tail(&svc->runq_elem, iter);
@@ -779,6 +774,34 @@ csched_vcpu_yield(const struct scheduler
     }
 }
 
+static void
+csched_vcpu_boost(const struct scheduler *ops, struct vcpu *vc)
+{
+    int cpu = vc->processor;
+    struct csched_vcpu * const svc = CSCHED_VCPU(vc);
+    struct csched_vcpu *svc_elem;
+    struct list_head *runq, *elem;
+
+    if ( vc->is_running || test_bit(_VPF_migrating, &vc->pause_flags) ||
+            (svc->flags & CSCHED_FLAG_VCPU_YIELD) || svc->pri != CSCHED_PRI_TS_UNDER)
+        return;
+
+    runq = RUNQ(cpu);
+    elem = runq->next;
+    while ( elem != runq ) {
+        svc_elem = __runq_elem(elem);
+        if (svc_elem == svc) {
+                /* move up to the head of runq! */
+                list_del(elem);
+                list_add(elem, runq);
+                break;
+        }
+        elem = elem->next;
+    }
+    CSCHED_VCPU_STAT_CRANK(svc, boost);
+    cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+}
+
 static int
 csched_dom_cntl(
     const struct scheduler *ops,
@@ -1377,11 +1400,12 @@ csched_dump_vcpu(struct csched_vcpu *svc
     {
         printk(" credit=%i [w=%u]", atomic_read(&svc->credit), sdom->weight);
 #ifdef CSCHED_STATS
-        printk(" (%d+%u) {a/i=%u/%u m=%u+%u}",
+        printk(" (%d+%u) {a/i=%u/%u bst=%u m=%u+%u}",
                 svc->stats.credit_last,
                 svc->stats.credit_incr,
                 svc->stats.state_active,
                 svc->stats.state_idle,
+                svc->stats.boost,
                 svc->stats.migrate_q,
                 svc->stats.migrate_r);
 #endif
@@ -1549,6 +1573,7 @@ const struct scheduler sched_credit_def 
     .sleep          = csched_vcpu_sleep,
     .wake           = csched_vcpu_wake,
     .yield          = csched_vcpu_yield,
+    .boost          = csched_vcpu_boost,
 
     .adjust         = csched_dom_cntl,
 
diff -r 8bb9cfaca402 xen/common/schedule.c
--- a/xen/common/schedule.c	Wed Aug 11 18:26:47 2010 +0100
+++ b/xen/common/schedule.c	Wed Aug 18 21:15:34 2010 +0800
@@ -361,6 +361,21 @@ void vcpu_wake(struct vcpu *v)
     TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id);
 }
 
+void vcpu_boost(struct vcpu *v)
+{
+    unsigned long flags;
+
+    local_irq_save(flags); 
+    if ( spin_is_locked(per_cpu(schedule_data, v->processor).schedule_lock)) {
+        local_irq_restore(flags); 
+        return;
+    }
+    vcpu_schedule_lock(v);
+    if ( likely(vcpu_runnable(v)) )
+	SCHED_OP(VCPU2OP(v), boost, v);
+    vcpu_schedule_unlock_irqrestore(v, flags);
+}
+
 void vcpu_unblock(struct vcpu *v)
 {
     if ( !test_and_clear_bit(_VPF_blocked, &v->pause_flags) )
diff -r 8bb9cfaca402 xen/include/xen/sched-if.h
--- a/xen/include/xen/sched-if.h	Wed Aug 11 18:26:47 2010 +0100
+++ b/xen/include/xen/sched-if.h	Wed Aug 18 21:15:34 2010 +0800
@@ -109,6 +109,7 @@ struct scheduler {
     void         (*wake)           (const struct scheduler *, struct vcpu *);
     void         (*yield)          (const struct scheduler *, struct vcpu *);
     void         (*context_saved)  (const struct scheduler *, struct vcpu *);
+    void         (*boost)(const struct scheduler *, struct vcpu *);
 
     struct task_slice (*do_schedule) (const struct scheduler *, s_time_t,
                                       bool_t tasklet_work_scheduled);
diff -r 8bb9cfaca402 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h	Wed Aug 11 18:26:47 2010 +0100
+++ b/xen/include/xen/sched.h	Wed Aug 18 21:15:34 2010 +0800
@@ -498,6 +498,7 @@ void sched_tick_suspend(void);
 void sched_tick_suspend(void);
 void sched_tick_resume(void);
 void vcpu_wake(struct vcpu *d);
+void vcpu_boost(struct vcpu *d);
 void vcpu_sleep_nosync(struct vcpu *d);
 void vcpu_sleep_sync(struct vcpu *d);
 

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2010-08-19  1:57 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-08-18  5:51 [RFC]PLE's performance enhancement through improving scheduler Zhang, Xiantao
2010-08-18  6:39 ` Keir Fraser
2010-08-18  9:16   ` George Dunlap
2010-08-19  1:57     ` Zhang, Xiantao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).