[PATCH v3 11/11] xen: sched: implement vcpu hard affinity in Credit2

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Dario Faggioli <dario.faggioli@citrix.com>
To: xen-devel@lists.xenproject.org
Cc: Justin Weaver <jtweaver@hawaii.edu>,
	George Dunlap <george.dunlap@citrix.com>
Subject: [PATCH v3 11/11] xen: sched: implement vcpu hard affinity in Credit2
Date: Fri, 08 Apr 2016 03:24:46 +0200	[thread overview]
Message-ID: <20160408012445.10762.40064.stgit@Solace.fritz.box> (raw)
In-Reply-To: <20160408011204.10762.14241.stgit@Solace.fritz.box>

From: Justin Weaver <jtweaver@hawaii.edu>

as it was still missing.

Note that this patch "only" implements hard affinity,
i.e., the possibility of specifying on what pCPUs a
certain vCPU can run. Soft affinity (which express a
preference for vCPUs to run on certain pCPUs) is still
not supported by Credit2, even after this patch.

Signed-off-by: Justin Weaver <jtweaver@hawaii.edu>
Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
Acked-by: George Dunlap <george.dunlap@citrix.com>
---
 xen/common/sched_credit2.c |  131 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 102 insertions(+), 29 deletions(-)

diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index 8617c9b..59eb3db 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -318,6 +318,36 @@ struct csched2_dom {
     uint16_t nr_vcpus;
 };
 
+/*
+ * When a hard affinity change occurs, we may not be able to check some
+ * (any!) of the other runqueues, when looking for the best new processor
+ * for svc (as trylock-s in choose_cpu() can fail). If that happens, we
+ * pick, in order of decreasing preference:
+ *  - svc's current pcpu;
+ *  - another pcpu from svc's current runq;
+ *  - any cpu.
+ */
+static int get_fallback_cpu(struct csched2_vcpu *svc)
+{
+    int cpu;
+
+    if ( likely(cpumask_test_cpu(svc->vcpu->processor,
+                                 svc->vcpu->cpu_hard_affinity)) )
+        return svc->vcpu->processor;
+
+    cpumask_and(cpumask_scratch, svc->vcpu->cpu_hard_affinity,
+                &svc->rqd->active);
+    cpu = cpumask_first(cpumask_scratch);
+    if ( likely(cpu < nr_cpu_ids) )
+        return cpu;
+
+    cpumask_and(cpumask_scratch, svc->vcpu->cpu_hard_affinity,
+                cpupool_domain_cpumask(svc->vcpu->domain));
+
+    ASSERT(!cpumask_empty(cpumask_scratch));
+
+    return cpumask_first(cpumask_scratch);
+}
 
 /*
  * Time-to-credit, credit-to-time.
@@ -551,8 +581,9 @@ runq_tickle(const struct scheduler *ops, unsigned int cpu, struct csched2_vcpu *
         goto tickle;
     }
     
-    /* Get a mask of idle, but not tickled */
+    /* Get a mask of idle, but not tickled, that new is allowed to run on. */
     cpumask_andnot(&mask, &rqd->idle, &rqd->tickled);
+    cpumask_and(&mask, &mask, new->vcpu->cpu_hard_affinity);
     
     /* If it's not empty, choose one */
     i = cpumask_cycle(cpu, &mask);
@@ -563,9 +594,11 @@ runq_tickle(const struct scheduler *ops, unsigned int cpu, struct csched2_vcpu *
     }
 
     /* Otherwise, look for the non-idle cpu with the lowest credit,
-     * skipping cpus which have been tickled but not scheduled yet */
+     * skipping cpus which have been tickled but not scheduled yet,
+     * that new is allowed to run on. */
     cpumask_andnot(&mask, &rqd->active, &rqd->idle);
     cpumask_andnot(&mask, &mask, &rqd->tickled);
+    cpumask_and(&mask, &mask, new->vcpu->cpu_hard_affinity);
 
     for_each_cpu(i, &mask)
     {
@@ -1115,9 +1148,8 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc)
             d2printk("%pv -\n", svc->vcpu);
             clear_bit(__CSFLAG_runq_migrate_request, &svc->flags);
         }
-        /* Leave it where it is for now.  When we actually pay attention
-         * to affinity we'll have to figure something out... */
-        return vc->processor;
+
+        return get_fallback_cpu(svc);
     }
 
     /* First check to see if we're here because someone else suggested a place
@@ -1128,45 +1160,56 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc)
         {
             printk("%s: Runqueue migrate aborted because target runqueue disappeared!\n",
                    __func__);
-            /* Fall-through to normal cpu pick */
         }
         else
         {
-            d2printk("%pv +\n", svc->vcpu);
-            new_cpu = cpumask_cycle(vc->processor, &svc->migrate_rqd->active);
-            goto out_up;
+            cpumask_and(cpumask_scratch, vc->cpu_hard_affinity,
+                        &svc->migrate_rqd->active);
+            new_cpu = cpumask_any(cpumask_scratch);
+            if ( new_cpu < nr_cpu_ids )
+            {
+                d2printk("%pv +\n", svc->vcpu);
+                goto out_up;
+            }
         }
+        /* Fall-through to normal cpu pick */
     }
 
-    /* FIXME: Pay attention to cpu affinity */                                                                                      
-
     min_avgload = MAX_LOAD;
 
     /* Find the runqueue with the lowest instantaneous load */
     for_each_cpu(i, &prv->active_queues)
     {
         struct csched2_runqueue_data *rqd;
-        s_time_t rqd_avgload;
+        s_time_t rqd_avgload = MAX_LOAD;
 
         rqd = prv->rqd + i;
 
-        /* If checking a different runqueue, grab the lock,
-         * read the avg, and then release the lock.
+        /*
+         * If checking a different runqueue, grab the lock, check hard
+         * affinity, read the avg, and then release the lock.
          *
          * If on our own runqueue, don't grab or release the lock;
          * but subtract our own load from the runqueue load to simulate
-         * impartiality */
+         * impartiality.
+         *
+         * Note that, if svc's hard affinity has changed, this is the
+         * first time when we see such change, so it is indeed possible
+         * that none of the cpus in svc's current runqueue is in our
+         * (new) hard affinity!
+         */
         if ( rqd == svc->rqd )
         {
-            rqd_avgload = rqd->b_avgload - svc->avgload;
+            if ( cpumask_intersects(vc->cpu_hard_affinity, &rqd->active) )
+                rqd_avgload = rqd->b_avgload - svc->avgload;
         }
         else if ( spin_trylock(&rqd->lock) )
         {
-            rqd_avgload = rqd->b_avgload;
+            if ( cpumask_intersects(vc->cpu_hard_affinity, &rqd->active) )
+                rqd_avgload = rqd->b_avgload;
+
             spin_unlock(&rqd->lock);
         }
-        else
-            continue;
 
         if ( rqd_avgload < min_avgload )
         {
@@ -1175,12 +1218,14 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc)
         }
     }
 
-    /* We didn't find anyone (most likely because of spinlock contention); leave it where it is */
+    /* We didn't find anyone (most likely because of spinlock contention). */
     if ( min_rqi == -1 )
-        new_cpu = vc->processor;
+        new_cpu = get_fallback_cpu(svc);
     else
     {
-        new_cpu = cpumask_cycle(vc->processor, &prv->rqd[min_rqi].active);
+        cpumask_and(cpumask_scratch, vc->cpu_hard_affinity,
+                    &prv->rqd[min_rqi].active);
+        new_cpu = cpumask_any(cpumask_scratch);
         BUG_ON(new_cpu >= nr_cpu_ids);
     }
 
@@ -1260,7 +1305,12 @@ static void migrate(const struct scheduler *ops,
             on_runq=1;
         }
         __runq_deassign(svc);
-        svc->vcpu->processor = cpumask_any(&trqd->active);
+
+        cpumask_and(cpumask_scratch, svc->vcpu->cpu_hard_affinity,
+                    &trqd->active);
+        svc->vcpu->processor = cpumask_any(cpumask_scratch);
+        BUG_ON(svc->vcpu->processor >= nr_cpu_ids);
+
         __runq_assign(svc, trqd);
         if ( on_runq )
         {
@@ -1274,6 +1324,17 @@ static void migrate(const struct scheduler *ops,
     }
 }
 
+/*
+ * It makes sense considering migrating svc to rqd, if:
+ *  - svc is not already flagged to migrate,
+ *  - if svc is allowed to run on at least one of the pcpus of rqd.
+ */
+static bool_t vcpu_is_migrateable(struct csched2_vcpu *svc,
+                                  struct csched2_runqueue_data *rqd)
+{
+    return !(svc->flags & CSFLAG_runq_migrate_request) &&
+           cpumask_intersects(svc->vcpu->cpu_hard_affinity, &rqd->active);
+}
 
 static void balance_load(const struct scheduler *ops, int cpu, s_time_t now)
 {
@@ -1382,8 +1443,7 @@ retry:
 
         __update_svc_load(ops, push_svc, 0, now);
 
-        /* Skip this one if it's already been flagged to migrate */
-        if ( push_svc->flags & CSFLAG_runq_migrate_request )
+        if ( !vcpu_is_migrateable(push_svc, st.orqd) )
             continue;
 
         list_for_each( pull_iter, &st.orqd->svc )
@@ -1395,8 +1455,7 @@ retry:
                 __update_svc_load(ops, pull_svc, 0, now);
             }
         
-            /* Skip this one if it's already been flagged to migrate */
-            if ( pull_svc->flags & CSFLAG_runq_migrate_request )
+            if ( !vcpu_is_migrateable(pull_svc, st.lrqd) )
                 continue;
 
             consider(&st, push_svc, pull_svc);
@@ -1412,8 +1471,7 @@ retry:
     {
         struct csched2_vcpu * pull_svc = list_entry(pull_iter, struct csched2_vcpu, rqd_elem);
         
-        /* Skip this one if it's already been flagged to migrate */
-        if ( pull_svc->flags & CSFLAG_runq_migrate_request )
+        if ( !vcpu_is_migrateable(pull_svc, st.lrqd) )
             continue;
 
         /* Consider pull only */
@@ -1452,11 +1510,22 @@ csched2_vcpu_migrate(
 
     /* Check if new_cpu is valid */
     BUG_ON(!cpumask_test_cpu(new_cpu, &CSCHED2_PRIV(ops)->initialized));
+    ASSERT(cpumask_test_cpu(new_cpu, vc->cpu_hard_affinity));
 
     trqd = RQD(ops, new_cpu);
 
+    /*
+     * Do the actual movement toward new_cpu, and update vc->processor.
+     * If we are changing runqueue, migrate() takes care of everything.
+     * If we are not changing runqueue, we need to update vc->processor
+     * here. In fact, if, for instance, we are here because the vcpu's
+     * hard affinity changed, we don't want to risk leaving vc->processor
+     * pointing to a pcpu where we can't run any longer.
+     */
     if ( trqd != svc->rqd )
         migrate(ops, svc, trqd, NOW());
+    else
+        vc->processor = new_cpu;
 }
 
 static int
@@ -1679,6 +1748,10 @@ runq_candidate(struct csched2_runqueue_data *rqd,
     {
         struct csched2_vcpu * svc = list_entry(iter, struct csched2_vcpu, runq_elem);
 
+        /* Only consider vcpus that are allowed to run on this processor. */
+        if ( !cpumask_test_cpu(cpu, svc->vcpu->cpu_hard_affinity) )
+            continue;
+
         /* If this is on a different processor, don't pull it unless
          * its credit is at least CSCHED2_MIGRATE_RESIST higher. */
         if ( svc->vcpu->processor != cpu


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

     prev parent reply	other threads:[~2016-04-08  1:24 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-08  1:23 [PATCH v3 00/11] Fixes and improvement (including hard affinity!) for Credit2 Dario Faggioli
2016-04-08  1:23 ` [PATCH v3 01/11] xen: sched: make implementing .alloc_pdata optional Dario Faggioli
2016-04-08 14:03   ` Robert VanVossen
2016-04-08  1:23 ` [PATCH v3 02/11] xen: sched: implement .init_pdata in Credit, Credit2 and RTDS Dario Faggioli
2016-04-08  1:23 ` [PATCH v3 03/11] xen: sched: move pCPU initialization in an helper Dario Faggioli
2016-04-08  1:23 ` [PATCH v3 04/11] xen: sched: close potential races when switching scheduler to CPUs Dario Faggioli
2016-04-08 12:52   ` George Dunlap
2016-04-08 13:00     ` George Dunlap
2016-04-08 13:11       ` Dario Faggioli
2016-04-08 14:00         ` Robert VanVossen
2016-04-11 14:43         ` Konrad Rzeszutek Wilk
2016-04-08  1:23 ` [PATCH v3 05/11] xen: sched: improve credit2 bootparams' scope, placement and signedness Dario Faggioli
2016-04-08  1:24 ` [PATCH v3 06/11] xen: sched: on Credit2, don't reprogram the timer if idle Dario Faggioli
2016-04-08  1:24 ` [PATCH v3 07/11] xen: sched: fix per-socket runqueue creation in credit2 Dario Faggioli
2016-04-08  1:24 ` [PATCH v3 08/11] xen: sched: allow for choosing credit2 runqueues configuration at boot Dario Faggioli
2016-04-08  4:18   ` Juergen Gross
2016-04-08  7:35     ` Dario Faggioli
2016-04-08  7:39       ` Juergen Gross
2016-04-08 10:03         ` Dario Faggioli
2016-04-08 13:10       ` George Dunlap
2016-04-08 15:13       ` [PATCH v3 00/11] Fixes and improvement (including hard affinity!) for Credit2 [and 1 more messages] Ian Jackson
2016-04-11 14:43         ` Konrad Rzeszutek Wilk
2016-04-08  1:24 ` [PATCH v3 09/11] xen: sched: per-core runqueues as default in credit2 Dario Faggioli
2016-04-08  1:24 ` [PATCH v3 10/11] xen: sched: privde some scratch space for not putting cpumasks on stack Dario Faggioli
2016-04-08  1:24 ` Dario Faggioli [this message]

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:8617c9b dfblob:59eb3db )
 OR (
bs:"[PATCH v3 11/11] xen: sched: implement vcpu hard affinity in Credit2" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160408012445.10762.40064.stgit@Solace.fritz.box \
    --to=dario.faggioli@citrix.com \
    --cc=george.dunlap@citrix.com \
    --cc=jtweaver@hawaii.edu \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.