[PATCH v3 11/11] xen: sched: implement vcpu hard affinity in Credit2

xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed

From: Dario Faggioli <dario.faggioli@citrix.com>
To: xen-devel@lists.xenproject.org
Cc: Justin Weaver <jtweaver@hawaii.edu>,
	George Dunlap <george.dunlap@citrix.com>
Subject: [PATCH v3 11/11] xen: sched: implement vcpu hard affinity in Credit2
Date: Fri, 08 Apr 2016 03:24:46 +0200	[thread overview]
Message-ID: <20160408012445.10762.40064.stgit@Solace.fritz.box> (raw)
In-Reply-To: <20160408011204.10762.14241.stgit@Solace.fritz.box>

From: Justin Weaver <jtweaver@hawaii.edu>

as it was still missing.

Note that this patch "only" implements hard affinity,
i.e., the possibility of specifying on what pCPUs a
certain vCPU can run. Soft affinity (which express a
preference for vCPUs to run on certain pCPUs) is still
not supported by Credit2, even after this patch.

Signed-off-by: Justin Weaver <jtweaver@hawaii.edu>
Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
Acked-by: George Dunlap <george.dunlap@citrix.com>
---
 xen/common/sched_credit2.c |  131 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 102 insertions(+), 29 deletions(-)

diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index 8617c9b..59eb3db 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -318,6 +318,36 @@ struct csched2_dom {
     uint16_t nr_vcpus;
 };
 
+/*
+ * When a hard affinity change occurs, we may not be able to check some
+ * (any!) of the other runqueues, when looking for the best new processor
+ * for svc (as trylock-s in choose_cpu() can fail). If that happens, we
+ * pick, in order of decreasing preference:
+ *  - svc's current pcpu;
+ *  - another pcpu from svc's current runq;
+ *  - any cpu.
+ */
+static int get_fallback_cpu(struct csched2_vcpu *svc)
+{
+    int cpu;
+
+    if ( likely(cpumask_test_cpu(svc->vcpu->processor,
+                                 svc->vcpu->cpu_hard_affinity)) )
+        return svc->vcpu->processor;
+
+    cpumask_and(cpumask_scratch, svc->vcpu->cpu_hard_affinity,
+                &svc->rqd->active);
+    cpu = cpumask_first(cpumask_scratch);
+    if ( likely(cpu < nr_cpu_ids) )
+        return cpu;
+
+    cpumask_and(cpumask_scratch, svc->vcpu->cpu_hard_affinity,
+                cpupool_domain_cpumask(svc->vcpu->domain));
+
+    ASSERT(!cpumask_empty(cpumask_scratch));
+
+    return cpumask_first(cpumask_scratch);
+}
 
 /*
  * Time-to-credit, credit-to-time.
@@ -551,8 +581,9 @@ runq_tickle(const struct scheduler *ops, unsigned int cpu, struct csched2_vcpu *
         goto tickle;
     }
     
-    /* Get a mask of idle, but not tickled */
+    /* Get a mask of idle, but not tickled, that new is allowed to run on. */
     cpumask_andnot(&mask, &rqd->idle, &rqd->tickled);
+    cpumask_and(&mask, &mask, new->vcpu->cpu_hard_affinity);
     
     /* If it's not empty, choose one */
     i = cpumask_cycle(cpu, &mask);
@@ -563,9 +594,11 @@ runq_tickle(const struct scheduler *ops, unsigned int cpu, struct csched2_vcpu *
     }
 
     /* Otherwise, look for the non-idle cpu with the lowest credit,
-     * skipping cpus which have been tickled but not scheduled yet */
+     * skipping cpus which have been tickled but not scheduled yet,
+     * that new is allowed to run on. */
     cpumask_andnot(&mask, &rqd->active, &rqd->idle);
     cpumask_andnot(&mask, &mask, &rqd->tickled);
+    cpumask_and(&mask, &mask, new->vcpu->cpu_hard_affinity);
 
     for_each_cpu(i, &mask)
     {
@@ -1115,9 +1148,8 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc)
             d2printk("%pv -\n", svc->vcpu);
             clear_bit(__CSFLAG_runq_migrate_request, &svc->flags);
         }
-        /* Leave it where it is for now.  When we actually pay attention
-         * to affinity we'll have to figure something out... */
-        return vc->processor;
+
+        return get_fallback_cpu(svc);
     }
 
     /* First check to see if we're here because someone else suggested a place
@@ -1128,45 +1160,56 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc)
         {
             printk("%s: Runqueue migrate aborted because target runqueue disappeared!\n",
                    __func__);
-            /* Fall-through to normal cpu pick */
         }
         else
         {
-            d2printk("%pv +\n", svc->vcpu);
-            new_cpu = cpumask_cycle(vc->processor, &svc->migrate_rqd->active);
-            goto out_up;
+            cpumask_and(cpumask_scratch, vc->cpu_hard_affinity,
+                        &svc->migrate_rqd->active);
+            new_cpu = cpumask_any(cpumask_scratch);
+            if ( new_cpu < nr_cpu_ids )
+            {
+                d2printk("%pv +\n", svc->vcpu);
+                goto out_up;
+            }
         }
+        /* Fall-through to normal cpu pick */
     }
 
-    /* FIXME: Pay attention to cpu affinity */                                                                                      
-
     min_avgload = MAX_LOAD;
 
     /* Find the runqueue with the lowest instantaneous load */
     for_each_cpu(i, &prv->active_queues)
     {
         struct csched2_runqueue_data *rqd;
-        s_time_t rqd_avgload;
+        s_time_t rqd_avgload = MAX_LOAD;
 
         rqd = prv->rqd + i;
 
-        /* If checking a different runqueue, grab the lock,
-         * read the avg, and then release the lock.
+        /*
+         * If checking a different runqueue, grab the lock, check hard
+         * affinity, read the avg, and then release the lock.
          *
          * If on our own runqueue, don't grab or release the lock;
          * but subtract our own load from the runqueue load to simulate
-         * impartiality */
+         * impartiality.
+         *
+         * Note that, if svc's hard affinity has changed, this is the
+         * first time when we see such change, so it is indeed possible
+         * that none of the cpus in svc's current runqueue is in our
+         * (new) hard affinity!
+         */
         if ( rqd == svc->rqd )
         {
-            rqd_avgload = rqd->b_avgload - svc->avgload;
+            if ( cpumask_intersects(vc->cpu_hard_affinity, &rqd->active) )
+                rqd_avgload = rqd->b_avgload - svc->avgload;
         }
         else if ( spin_trylock(&rqd->lock) )
         {
-            rqd_avgload = rqd->b_avgload;
+            if ( cpumask_intersects(vc->cpu_hard_affinity, &rqd->active) )
+                rqd_avgload = rqd->b_avgload;
+
             spin_unlock(&rqd->lock);
         }
-        else
-            continue;
 
         if ( rqd_avgload < min_avgload )
         {
@@ -1175,12 +1218,14 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc)
         }
     }
 
-    /* We didn't find anyone (most likely because of spinlock contention); leave it where it is */
+    /* We didn't find anyone (most likely because of spinlock contention). */
     if ( min_rqi == -1 )
-        new_cpu = vc->processor;
+        new_cpu = get_fallback_cpu(svc);
     else
     {
-        new_cpu = cpumask_cycle(vc->processor, &prv->rqd[min_rqi].active);
+        cpumask_and(cpumask_scratch, vc->cpu_hard_affinity,
+                    &prv->rqd[min_rqi].active);
+        new_cpu = cpumask_any(cpumask_scratch);
         BUG_ON(new_cpu >= nr_cpu_ids);
     }
 
@@ -1260,7 +1305,12 @@ static void migrate(const struct scheduler *ops,
             on_runq=1;
         }
         __runq_deassign(svc);
-        svc->vcpu->processor = cpumask_any(&trqd->active);
+
+        cpumask_and(cpumask_scratch, svc->vcpu->cpu_hard_affinity,
+                    &trqd->active);
+        svc->vcpu->processor = cpumask_any(cpumask_scratch);
+        BUG_ON(svc->vcpu->processor >= nr_cpu_ids);
+
         __runq_assign(svc, trqd);
         if ( on_runq )
         {
@@ -1274,6 +1324,17 @@ static void migrate(const struct scheduler *ops,
     }
 }
 
+/*
+ * It makes sense considering migrating svc to rqd, if:
+ *  - svc is not already flagged to migrate,
+ *  - if svc is allowed to run on at least one of the pcpus of rqd.
+ */
+static bool_t vcpu_is_migrateable(struct csched2_vcpu *svc,
+                                  struct csched2_runqueue_data *rqd)
+{
+    return !(svc->flags & CSFLAG_runq_migrate_request) &&
+           cpumask_intersects(svc->vcpu->cpu_hard_affinity, &rqd->active);
+}
 
 static void balance_load(const struct scheduler *ops, int cpu, s_time_t now)
 {
@@ -1382,8 +1443,7 @@ retry:
 
         __update_svc_load(ops, push_svc, 0, now);
 
-        /* Skip this one if it's already been flagged to migrate */
-        if ( push_svc->flags & CSFLAG_runq_migrate_request )
+        if ( !vcpu_is_migrateable(push_svc, st.orqd) )
             continue;
 
         list_for_each( pull_iter, &st.orqd->svc )
@@ -1395,8 +1455,7 @@ retry:
                 __update_svc_load(ops, pull_svc, 0, now);
             }
         
-            /* Skip this one if it's already been flagged to migrate */
-            if ( pull_svc->flags & CSFLAG_runq_migrate_request )
+            if ( !vcpu_is_migrateable(pull_svc, st.lrqd) )
                 continue;
 
             consider(&st, push_svc, pull_svc);
@@ -1412,8 +1471,7 @@ retry:
     {
         struct csched2_vcpu * pull_svc = list_entry(pull_iter, struct csched2_vcpu, rqd_elem);
         
-        /* Skip this one if it's already been flagged to migrate */
-        if ( pull_svc->flags & CSFLAG_runq_migrate_request )
+        if ( !vcpu_is_migrateable(pull_svc, st.lrqd) )
             continue;
 
         /* Consider pull only */
@@ -1452,11 +1510,22 @@ csched2_vcpu_migrate(
 
     /* Check if new_cpu is valid */
     BUG_ON(!cpumask_test_cpu(new_cpu, &CSCHED2_PRIV(ops)->initialized));
+    ASSERT(cpumask_test_cpu(new_cpu, vc->cpu_hard_affinity));
 
     trqd = RQD(ops, new_cpu);
 
+    /*
+     * Do the actual movement toward new_cpu, and update vc->processor.
+     * If we are changing runqueue, migrate() takes care of everything.
+     * If we are not changing runqueue, we need to update vc->processor
+     * here. In fact, if, for instance, we are here because the vcpu's
+     * hard affinity changed, we don't want to risk leaving vc->processor
+     * pointing to a pcpu where we can't run any longer.
+     */
     if ( trqd != svc->rqd )
         migrate(ops, svc, trqd, NOW());
+    else
+        vc->processor = new_cpu;
 }
 
 static int
@@ -1679,6 +1748,10 @@ runq_candidate(struct csched2_runqueue_data *rqd,
     {
         struct csched2_vcpu * svc = list_entry(iter, struct csched2_vcpu, runq_elem);
 
+        /* Only consider vcpus that are allowed to run on this processor. */
+        if ( !cpumask_test_cpu(cpu, svc->vcpu->cpu_hard_affinity) )
+            continue;
+
         /* If this is on a different processor, don't pull it unless
          * its credit is at least CSCHED2_MIGRATE_RESIST higher. */
         if ( svc->vcpu->processor != cpu


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

     prev parent reply	other threads:[~2016-04-08  1:24 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-08  1:23 [PATCH v3 00/11] Fixes and improvement (including hard affinity!) for Credit2 Dario Faggioli
2016-04-08  1:23 ` [PATCH v3 01/11] xen: sched: make implementing .alloc_pdata optional Dario Faggioli
2016-04-08 14:03   ` Robert VanVossen
2016-04-08  1:23 ` [PATCH v3 02/11] xen: sched: implement .init_pdata in Credit, Credit2 and RTDS Dario Faggioli
2016-04-08  1:23 ` [PATCH v3 03/11] xen: sched: move pCPU initialization in an helper Dario Faggioli
2016-04-08  1:23 ` [PATCH v3 04/11] xen: sched: close potential races when switching scheduler to CPUs Dario Faggioli
2016-04-08 12:52   ` George Dunlap
2016-04-08 13:00     ` George Dunlap
2016-04-08 13:11       ` Dario Faggioli
2016-04-08 14:00         ` Robert VanVossen
2016-04-11 14:43         ` Konrad Rzeszutek Wilk
2016-04-08  1:23 ` [PATCH v3 05/11] xen: sched: improve credit2 bootparams' scope, placement and signedness Dario Faggioli
2016-04-08  1:24 ` [PATCH v3 06/11] xen: sched: on Credit2, don't reprogram the timer if idle Dario Faggioli
2016-04-08  1:24 ` [PATCH v3 07/11] xen: sched: fix per-socket runqueue creation in credit2 Dario Faggioli
2016-04-08  1:24 ` [PATCH v3 08/11] xen: sched: allow for choosing credit2 runqueues configuration at boot Dario Faggioli
2016-04-08  4:18   ` Juergen Gross
2016-04-08  7:35     ` Dario Faggioli
2016-04-08  7:39       ` Juergen Gross
2016-04-08 10:03         ` Dario Faggioli
2016-04-08 13:10       ` George Dunlap
2016-04-08 15:13       ` [PATCH v3 00/11] Fixes and improvement (including hard affinity!) for Credit2 [and 1 more messages] Ian Jackson
2016-04-11 14:43         ` Konrad Rzeszutek Wilk
2016-04-08  1:24 ` [PATCH v3 09/11] xen: sched: per-core runqueues as default in credit2 Dario Faggioli
2016-04-08  1:24 ` [PATCH v3 10/11] xen: sched: privde some scratch space for not putting cpumasks on stack Dario Faggioli
2016-04-08  1:24 ` Dario Faggioli [this message]

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:8617c9b dfblob:59eb3db )
 OR (
bs:"[PATCH v3 11/11] xen: sched: implement vcpu hard affinity in Credit2" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160408012445.10762.40064.stgit@Solace.fritz.box \
    --to=dario.faggioli@citrix.com \
    --cc=george.dunlap@citrix.com \
    --cc=jtweaver@hawaii.edu \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).