xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 1/3] xen: Some code motion to avoid having to do forward-declaration
@ 2016-07-25 15:28 George Dunlap
  2016-07-25 15:28 ` [PATCH v2 2/3] xen: Have schedulers revise initial placement George Dunlap
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: George Dunlap @ 2016-07-25 15:28 UTC (permalink / raw)
  To: xen-devel; +Cc: Dario Faggioli, George Dunlap, Anshul Makkar, Meng Xu

For sched_credit2, move the vcpu insert / remove / free functions near the domain
insert / remove / alloc / free functions (and after cpu_pick).

For sched_rt, move rt_cpu_pick() further up.

This is pure code motion; no functional change.

Signed-off-by: George Dunlap <george.dunlap@citrix.com>
Reviewed-by: Meng Xu <mengxu@cis.upenn.edu>​
---
v2:
- Rebase on top of Dario's series

I've retained Meng Xu's Reviewed-by since the sched_rt part didn't change.

CC: Dario Faggioli <dario.faggioli@citrix.com>
CC: Anshul Makkar <anshul.makkar@citrix.com>
CC: Meng Xu <mengxu@cis.upenn.edu>
---
 xen/common/sched_credit2.c | 106 ++++++++++++++++++++++-----------------------
 xen/common/sched_rt.c      |  46 ++++++++++----------
 2 files changed, 76 insertions(+), 76 deletions(-)

diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index 1d79de0..3d2716a 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -1348,59 +1348,6 @@ runq_deassign(const struct scheduler *ops, struct vcpu *vc)
 }
 
 static void
-csched2_vcpu_insert(const struct scheduler *ops, struct vcpu *vc)
-{
-    struct csched2_vcpu *svc = vc->sched_priv;
-    struct csched2_dom * const sdom = svc->sdom;
-    spinlock_t *lock;
-
-    ASSERT(!is_idle_vcpu(vc));
-    ASSERT(list_empty(&svc->runq_elem));
-
-    /* Add vcpu to runqueue of initial processor */
-    lock = vcpu_schedule_lock_irq(vc);
-
-    runq_assign(ops, vc);
-
-    vcpu_schedule_unlock_irq(lock, vc);
-
-    sdom->nr_vcpus++;
-
-    SCHED_STAT_CRANK(vcpu_insert);
-
-    CSCHED2_VCPU_CHECK(vc);
-}
-
-static void
-csched2_free_vdata(const struct scheduler *ops, void *priv)
-{
-    struct csched2_vcpu *svc = priv;
-
-    xfree(svc);
-}
-
-static void
-csched2_vcpu_remove(const struct scheduler *ops, struct vcpu *vc)
-{
-    struct csched2_vcpu * const svc = CSCHED2_VCPU(vc);
-    spinlock_t *lock;
-
-    ASSERT(!is_idle_vcpu(vc));
-    ASSERT(list_empty(&svc->runq_elem));
-
-    SCHED_STAT_CRANK(vcpu_remove);
-
-    /* Remove from runqueue */
-    lock = vcpu_schedule_lock_irq(vc);
-
-    runq_deassign(ops, vc);
-
-    vcpu_schedule_unlock_irq(lock, vc);
-
-    svc->sdom->nr_vcpus--;
-}
-
-static void
 csched2_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc)
 {
     struct csched2_vcpu * const svc = CSCHED2_VCPU(vc);
@@ -2098,6 +2045,59 @@ csched2_dom_destroy(const struct scheduler *ops, struct domain *dom)
     csched2_free_domdata(ops, CSCHED2_DOM(dom));
 }
 
+static void
+csched2_vcpu_insert(const struct scheduler *ops, struct vcpu *vc)
+{
+    struct csched2_vcpu *svc = vc->sched_priv;
+    struct csched2_dom * const sdom = svc->sdom;
+    spinlock_t *lock;
+
+    ASSERT(!is_idle_vcpu(vc));
+    ASSERT(list_empty(&svc->runq_elem));
+
+    /* Add vcpu to runqueue of initial processor */
+    lock = vcpu_schedule_lock_irq(vc);
+
+    runq_assign(ops, vc);
+
+    vcpu_schedule_unlock_irq(lock, vc);
+
+    sdom->nr_vcpus++;
+
+    SCHED_STAT_CRANK(vcpu_insert);
+
+    CSCHED2_VCPU_CHECK(vc);
+}
+
+static void
+csched2_free_vdata(const struct scheduler *ops, void *priv)
+{
+    struct csched2_vcpu *svc = priv;
+
+    xfree(svc);
+}
+
+static void
+csched2_vcpu_remove(const struct scheduler *ops, struct vcpu *vc)
+{
+    struct csched2_vcpu * const svc = CSCHED2_VCPU(vc);
+    spinlock_t *lock;
+
+    ASSERT(!is_idle_vcpu(vc));
+    ASSERT(list_empty(&svc->runq_elem));
+
+    SCHED_STAT_CRANK(vcpu_remove);
+
+    /* Remove from runqueue */
+    lock = vcpu_schedule_lock_irq(vc);
+
+    runq_deassign(ops, vc);
+
+    vcpu_schedule_unlock_irq(lock, vc);
+
+    svc->sdom->nr_vcpus--;
+}
+
 /* How long should we let this vcpu run for? */
 static s_time_t
 csched2_runtime(const struct scheduler *ops, int cpu, struct csched2_vcpu *snext)
diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index 98524a6..bd3a2a0 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -582,6 +582,29 @@ replq_reinsert(const struct scheduler *ops, struct rt_vcpu *svc)
 }
 
 /*
+ * Pick a valid CPU for the vcpu vc
+ * Valid CPU of a vcpu is intesection of vcpu's affinity
+ * and available cpus
+ */
+static int
+rt_cpu_pick(const struct scheduler *ops, struct vcpu *vc)
+{
+    cpumask_t cpus;
+    cpumask_t *online;
+    int cpu;
+
+    online = cpupool_domain_cpumask(vc->domain);
+    cpumask_and(&cpus, online, vc->cpu_hard_affinity);
+
+    cpu = cpumask_test_cpu(vc->processor, &cpus)
+            ? vc->processor
+            : cpumask_cycle(vc->processor, &cpus);
+    ASSERT( !cpumask_empty(&cpus) && cpumask_test_cpu(cpu, &cpus) );
+
+    return cpu;
+}
+
+/*
  * Init/Free related code
  */
 static int
@@ -894,29 +917,6 @@ rt_vcpu_remove(const struct scheduler *ops, struct vcpu *vc)
 }
 
 /*
- * Pick a valid CPU for the vcpu vc
- * Valid CPU of a vcpu is intesection of vcpu's affinity
- * and available cpus
- */
-static int
-rt_cpu_pick(const struct scheduler *ops, struct vcpu *vc)
-{
-    cpumask_t cpus;
-    cpumask_t *online;
-    int cpu;
-
-    online = cpupool_domain_cpumask(vc->domain);
-    cpumask_and(&cpus, online, vc->cpu_hard_affinity);
-
-    cpu = cpumask_test_cpu(vc->processor, &cpus)
-            ? vc->processor
-            : cpumask_cycle(vc->processor, &cpus);
-    ASSERT( !cpumask_empty(&cpus) && cpumask_test_cpu(cpu, &cpus) );
-
-    return cpu;
-}
-
-/*
  * Burn budget in nanosecond granularity
  */
 static void
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v2 2/3] xen: Have schedulers revise initial placement
  2016-07-25 15:28 [PATCH v2 1/3] xen: Some code motion to avoid having to do forward-declaration George Dunlap
@ 2016-07-25 15:28 ` George Dunlap
  2016-07-25 17:34   ` Meng Xu
  2016-07-26  9:30   ` Dario Faggioli
  2016-07-25 15:28 ` [PATCH v2 3/3] xen: Remove buggy initial placement algorithm George Dunlap
  2016-07-26  9:32 ` [PATCH v2 1/3] xen: Some code motion to avoid having to do forward-declaration Dario Faggioli
  2 siblings, 2 replies; 9+ messages in thread
From: George Dunlap @ 2016-07-25 15:28 UTC (permalink / raw)
  To: xen-devel
  Cc: Dario Faggioli, Jan Beulich, George Dunlap, Anshul Makkar,
	Meng Xu

The generic domain creation logic in
xen/common/domctl.c:default_vcpu0_location() attempts to try to do
initial placement load-balancing by placing vcpu 0 on the least-busy
non-primary hyperthread available.  Unfortunately, the logic can end
up picking a pcpu that's not in the online mask.  When this is passed
to a scheduler such which assumes that the initial assignment is
valid, it causes a null pointer dereference looking up the runqueue.

Furthermore, this initial placement doesn't take into account hard or
soft affinity, or any scheduler-specific knowledge (such as historic
runqueue load, as in credit2).

To solve this, when inserting a vcpu, always call the per-scheduler
"pick" function to revise the initial placement.  This will
automatically take all knowledge the scheduler has into account.

csched2_cpu_pick ASSERTs that the vcpu's pcpu scheduler lock has been
taken.  Grab and release the lock to minimize time spend with irqs
disabled.

Signed-off-by: George Dunlap <george.dunlap@citrix.com>
---
v2:
- Actually grab lock before calling vcpu_schedule_lock() to avoid
  tripping over a new ASSERT

CC: Dario Faggioli <dario.faggioli@citrix.com>
CC: Anshul Makkar <anshul.makkar@citrix.com>
CC: Meng Xu <mengxu@cis.upenn.edu>
CC: Jan Beulich <jbeulich@suse.com>
---
 xen/common/sched_credit.c  |  3 +++
 xen/common/sched_credit2.c | 11 ++++++++++-
 xen/common/sched_rt.c      |  3 +++
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/xen/common/sched_credit.c b/xen/common/sched_credit.c
index d547716..220ff0d 100644
--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -998,6 +998,9 @@ csched_vcpu_insert(const struct scheduler *ops, struct vcpu *vc)
 
     BUG_ON( is_idle_vcpu(vc) );
 
+    /* This is safe because vc isn't yet being scheduled */
+    vc->processor = csched_cpu_pick(ops, vc);
+
     lock = vcpu_schedule_lock_irq(vc);
 
     if ( !__vcpu_on_runq(svc) && vcpu_runnable(vc) && !vc->is_running )
diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index 3d2716a..fb9dd98 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -2055,12 +2055,21 @@ csched2_vcpu_insert(const struct scheduler *ops, struct vcpu *vc)
     ASSERT(!is_idle_vcpu(vc));
     ASSERT(list_empty(&svc->runq_elem));
 
-    /* Add vcpu to runqueue of initial processor */
+    /* csched2_cpu_pick() expects the pcpu lock to be held */
     lock = vcpu_schedule_lock_irq(vc);
 
+    vc->processor = csched2_cpu_pick(ops, vc);
+
+    spin_unlock_irq(lock);
+
+    lock = vcpu_schedule_lock_irq(vc);
+
+    /* Add vcpu to runqueue of initial processor */
     runq_assign(ops, vc);
 
     vcpu_schedule_unlock_irq(lock, vc);
+    
+    local_irq_enable();
 
     sdom->nr_vcpus++;
 
diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index bd3a2a0..41c61a7 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -874,6 +874,9 @@ rt_vcpu_insert(const struct scheduler *ops, struct vcpu *vc)
 
     BUG_ON( is_idle_vcpu(vc) );
 
+    /* This is safe because vc isn't yet being scheduled */
+    vc->processor = rt_cpu_pick(ops, vc);
+
     lock = vcpu_schedule_lock_irq(vc);
 
     now = NOW();
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v2 3/3] xen: Remove buggy initial placement algorithm
  2016-07-25 15:28 [PATCH v2 1/3] xen: Some code motion to avoid having to do forward-declaration George Dunlap
  2016-07-25 15:28 ` [PATCH v2 2/3] xen: Have schedulers revise initial placement George Dunlap
@ 2016-07-25 15:28 ` George Dunlap
  2016-07-26  9:14   ` Dario Faggioli
  2016-07-26  9:22   ` Andrew Cooper
  2016-07-26  9:32 ` [PATCH v2 1/3] xen: Some code motion to avoid having to do forward-declaration Dario Faggioli
  2 siblings, 2 replies; 9+ messages in thread
From: George Dunlap @ 2016-07-25 15:28 UTC (permalink / raw)
  To: xen-devel
  Cc: Stefano Stabellini, Wei Liu, Andrew Cooper, Dario Faggioli,
	Ian Jackson, George Dunlap, Tim Deegan, Meng Xu, Jan Beulich,
	Anshul Makkar

The initial placement algorithm sometimes picks cpus outside of the
mask it's given, does a lot of unnecessary bitmasking, does its own
separate load calculation, and completely ignores vcpu hard and soft
affinities.  Just get rid of it and rely on the schedulers to do
initial placement.

Signed-off-by: George Dunlap <george.dunlap@citrix.com>
---
v2:
- Use cpumask_any() to avoid a bias towards low pcpus on schedulers
that prefer to leave the cpu where it is.

The core problem with default_vcpu0_location() is that it chooses its
initial cpu based on the sibling of pcpu 0, not the first available
sibling in the online mask; so if pcpu 1 ends up being less "busy"
than all the cpus in the pool, then it ends up being chosen even
though it's not in the pool.

Fixing the algorithm would involve starting with the sibling map of
cpumask_first(online) rather than 0, and then having all sibling
checks not only test that the result of cpumask_next() < nr_cpu_ids,
but that the result is in online.

Additionally, as far as I can tell, the cpumask_test_cpu(i,
&cpu_exclude_map) at the top of the for_each_cpu() loop can never
return false; and this both this test and the cpumask_or() are
unnecessary and should be removed.

CC: Dario Faggioli <dario.faggioli@citrix.com>
CC: Anshul Makkar <anshul.makkar@citrix.com>
CC: Meng Xu <mengxu@cis.upenn.edu>
CC: Andrew Cooper <andrew.cooper3@citrix.com>
CC: Ian Jackson <ian.jackson@eu.citrix.com>
CC: Jan Beulich <jbeulich@suse.com>
CC: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
CC: Stefano Stabellini <sstabellini@kernel.org>
CC: Tim Deegan <tim@xen.org>
CC: Wei Liu <wei.liu2@citrix.com>
---
 xen/common/domctl.c | 50 +-------------------------------------------------
 1 file changed, 1 insertion(+), 49 deletions(-)

diff --git a/xen/common/domctl.c b/xen/common/domctl.c
index b784e6c..87640b6 100644
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -217,54 +217,6 @@ void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info)
     memcpy(info->handle, d->handle, sizeof(xen_domain_handle_t));
 }
 
-static unsigned int default_vcpu0_location(cpumask_t *online)
-{
-    struct domain *d;
-    struct vcpu   *v;
-    unsigned int   i, cpu, nr_cpus, *cnt;
-    cpumask_t      cpu_exclude_map;
-
-    /* Do an initial CPU placement. Pick the least-populated CPU. */
-    nr_cpus = cpumask_last(&cpu_online_map) + 1;
-    cnt = xzalloc_array(unsigned int, nr_cpus);
-    if ( cnt )
-    {
-        rcu_read_lock(&domlist_read_lock);
-        for_each_domain ( d )
-            for_each_vcpu ( d, v )
-                if ( !(v->pause_flags & VPF_down)
-                     && ((cpu = v->processor) < nr_cpus) )
-                    cnt[cpu]++;
-        rcu_read_unlock(&domlist_read_lock);
-    }
-
-    /*
-     * If we're on a HT system, we only auto-allocate to a non-primary HT. We
-     * favour high numbered CPUs in the event of a tie.
-     */
-    cpumask_copy(&cpu_exclude_map, per_cpu(cpu_sibling_mask, 0));
-    cpu = cpumask_first(&cpu_exclude_map);
-    i = cpumask_next(cpu, &cpu_exclude_map);
-    if ( i < nr_cpu_ids )
-        cpu = i;
-    for_each_cpu(i, online)
-    {
-        if ( cpumask_test_cpu(i, &cpu_exclude_map) )
-            continue;
-        if ( (i == cpumask_first(per_cpu(cpu_sibling_mask, i))) &&
-             (cpumask_next(i, per_cpu(cpu_sibling_mask, i)) < nr_cpu_ids) )
-            continue;
-        cpumask_or(&cpu_exclude_map, &cpu_exclude_map,
-                   per_cpu(cpu_sibling_mask, i));
-        if ( !cnt || cnt[i] <= cnt[cpu] )
-            cpu = i;
-    }
-
-    xfree(cnt);
-
-    return cpu;
-}
-
 bool_t domctl_lock_acquire(void)
 {
     /*
@@ -691,7 +643,7 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
                 continue;
 
             cpu = (i == 0) ?
-                default_vcpu0_location(online) :
+                cpumask_any(online) :
                 cpumask_cycle(d->vcpu[i-1]->processor, online);
 
             if ( alloc_vcpu(d, i, cpu) == NULL )
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 2/3] xen: Have schedulers revise initial placement
  2016-07-25 15:28 ` [PATCH v2 2/3] xen: Have schedulers revise initial placement George Dunlap
@ 2016-07-25 17:34   ` Meng Xu
  2016-07-26  9:30   ` Dario Faggioli
  1 sibling, 0 replies; 9+ messages in thread
From: Meng Xu @ 2016-07-25 17:34 UTC (permalink / raw)
  To: George Dunlap
  Cc: xen-devel@lists.xenproject.org, Dario Faggioli, Anshul Makkar,
	Jan Beulich

On Mon, Jul 25, 2016 at 11:28 AM, George Dunlap
<george.dunlap@citrix.com> wrote:
> The generic domain creation logic in
> xen/common/domctl.c:default_vcpu0_location() attempts to try to do
> initial placement load-balancing by placing vcpu 0 on the least-busy
> non-primary hyperthread available.  Unfortunately, the logic can end
> up picking a pcpu that's not in the online mask.  When this is passed
> to a scheduler such which assumes that the initial assignment is
> valid, it causes a null pointer dereference looking up the runqueue.
>
> Furthermore, this initial placement doesn't take into account hard or
> soft affinity, or any scheduler-specific knowledge (such as historic
> runqueue load, as in credit2).
>
> To solve this, when inserting a vcpu, always call the per-scheduler
> "pick" function to revise the initial placement.  This will
> automatically take all knowledge the scheduler has into account.
>
> csched2_cpu_pick ASSERTs that the vcpu's pcpu scheduler lock has been
> taken.  Grab and release the lock to minimize time spend with irqs
> disabled.
>
> Signed-off-by: George Dunlap <george.dunlap@citrix.com>
> ---
> v2:
> - Actually grab lock before calling vcpu_schedule_lock() to avoid
>   tripping over a new ASSERT
>
> CC: Dario Faggioli <dario.faggioli@citrix.com>
> CC: Anshul Makkar <anshul.makkar@citrix.com>
> CC: Meng Xu <mengxu@cis.upenn.edu>
> CC: Jan Beulich <jbeulich@suse.com>
> ---
>  xen/common/sched_credit.c  |  3 +++
>  xen/common/sched_credit2.c | 11 ++++++++++-
>  xen/common/sched_rt.c      |  3 +++
>  3 files changed, 16 insertions(+), 1 deletion(-)
>


As to sched_rt.c,

Reviewed-by: Meng Xu <mengxu@cis.upenn.edu>

-----------
Meng Xu
PhD Student in Computer and Information Science
University of Pennsylvania
http://www.cis.upenn.edu/~mengxu/

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 3/3] xen: Remove buggy initial placement algorithm
  2016-07-25 15:28 ` [PATCH v2 3/3] xen: Remove buggy initial placement algorithm George Dunlap
@ 2016-07-26  9:14   ` Dario Faggioli
  2016-07-26  9:22   ` Andrew Cooper
  1 sibling, 0 replies; 9+ messages in thread
From: Dario Faggioli @ 2016-07-26  9:14 UTC (permalink / raw)
  To: George Dunlap, xen-devel
  Cc: Stefano Stabellini, Wei Liu, Andrew Cooper, Anshul Makkar,
	Ian Jackson, Tim Deegan, Meng Xu, Jan Beulich


[-- Attachment #1.1: Type: text/plain, Size: 767 bytes --]

On Mon, 2016-07-25 at 16:28 +0100, George Dunlap wrote:
> The initial placement algorithm sometimes picks cpus outside of the
> mask it's given, does a lot of unnecessary bitmasking, does its own
> separate load calculation, and completely ignores vcpu hard and soft
> affinities.  Just get rid of it and rely on the schedulers to do
> initial placement.
> 
> Signed-off-by: George Dunlap <george.dunlap@citrix.com>
>
Reviewed-by: Dario Faggioli <dario.faggioli@citrix.com>

Regards,
Dario
-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)


[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

[-- Attachment #2: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 3/3] xen: Remove buggy initial placement algorithm
  2016-07-25 15:28 ` [PATCH v2 3/3] xen: Remove buggy initial placement algorithm George Dunlap
  2016-07-26  9:14   ` Dario Faggioli
@ 2016-07-26  9:22   ` Andrew Cooper
  1 sibling, 0 replies; 9+ messages in thread
From: Andrew Cooper @ 2016-07-26  9:22 UTC (permalink / raw)
  To: George Dunlap, xen-devel
  Cc: Stefano Stabellini, Wei Liu, Tim Deegan, Dario Faggioli,
	Ian Jackson, Meng Xu, Jan Beulich, Anshul Makkar

On 25/07/16 16:28, George Dunlap wrote:
> The initial placement algorithm sometimes picks cpus outside of the
> mask it's given, does a lot of unnecessary bitmasking, does its own
> separate load calculation, and completely ignores vcpu hard and soft
> affinities.  Just get rid of it and rely on the schedulers to do
> initial placement.
>
> Signed-off-by: George Dunlap <george.dunlap@citrix.com>
> ---
> v2:
> - Use cpumask_any() to avoid a bias towards low pcpus on schedulers
> that prefer to leave the cpu where it is.
>
> The core problem with default_vcpu0_location() is that it chooses its
> initial cpu based on the sibling of pcpu 0, not the first available
> sibling in the online mask; so if pcpu 1 ends up being less "busy"
> than all the cpus in the pool, then it ends up being chosen even
> though it's not in the pool.
>
> Fixing the algorithm would involve starting with the sibling map of
> cpumask_first(online) rather than 0, and then having all sibling
> checks not only test that the result of cpumask_next() < nr_cpu_ids,
> but that the result is in online.
>
> Additionally, as far as I can tell, the cpumask_test_cpu(i,
> &cpu_exclude_map) at the top of the for_each_cpu() loop can never
> return false; and this both this test and the cpumask_or() are
> unnecessary and should be removed.
>
> CC: Dario Faggioli <dario.faggioli@citrix.com>
> CC: Anshul Makkar <anshul.makkar@citrix.com>
> CC: Meng Xu <mengxu@cis.upenn.edu>
> CC: Andrew Cooper <andrew.cooper3@citrix.com>
> CC: Ian Jackson <ian.jackson@eu.citrix.com>
> CC: Jan Beulich <jbeulich@suse.com>
> CC: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> CC: Stefano Stabellini <sstabellini@kernel.org>
> CC: Tim Deegan <tim@xen.org>
> CC: Wei Liu <wei.liu2@citrix.com>

Acked-by: Andrew Cooper <andrew.cooper3@citrix.com> from a REST point of
view. 

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 2/3] xen: Have schedulers revise initial placement
  2016-07-25 15:28 ` [PATCH v2 2/3] xen: Have schedulers revise initial placement George Dunlap
  2016-07-25 17:34   ` Meng Xu
@ 2016-07-26  9:30   ` Dario Faggioli
  2016-07-26  9:35     ` George Dunlap
  1 sibling, 1 reply; 9+ messages in thread
From: Dario Faggioli @ 2016-07-26  9:30 UTC (permalink / raw)
  To: George Dunlap, xen-devel; +Cc: Anshul Makkar, Meng Xu, Jan Beulich


[-- Attachment #1.1: Type: text/plain, Size: 2594 bytes --]

On Mon, 2016-07-25 at 16:28 +0100, George Dunlap wrote:
> The generic domain creation logic in
> xen/common/domctl.c:default_vcpu0_location() attempts to try to do
> initial placement load-balancing by placing vcpu 0 on the least-busy
> non-primary hyperthread available.  Unfortunately, the logic can end
> up picking a pcpu that's not in the online mask.  When this is passed
> to a scheduler such which assumes that the initial assignment is
> valid, it causes a null pointer dereference looking up the runqueue.
> 
Looking more at Credit2 code, I think there are a couple of missing
checks that a cpu that is about to be used for something, is actually
in online.

However, that is orthogonal with this patch and...

> Furthermore, this initial placement doesn't take into account hard or
> soft affinity, or any scheduler-specific knowledge (such as historic
> runqueue load, as in credit2).
> 
... using pick_cpu() here is, IMO, a really really really good idea, so
I think this patch should go in (and I'll work on and, if I am right,
add the missing checks).

> Signed-off-by: George Dunlap <george.dunlap@citrix.com>
> ---
> v2:
> - Actually grab lock before calling vcpu_schedule_lock() to avoid
>   tripping over a new ASSERT
> 
Ah, yes... sorry! :-P

Just one thing:

> --- a/xen/common/sched_credit2.c
> +++ b/xen/common/sched_credit2.c
> @@ -2055,12 +2055,21 @@ csched2_vcpu_insert(const struct scheduler
> *ops, struct vcpu *vc)
>      ASSERT(!is_idle_vcpu(vc));
>      ASSERT(list_empty(&svc->runq_elem));
>  
> -    /* Add vcpu to runqueue of initial processor */
> +    /* csched2_cpu_pick() expects the pcpu lock to be held */
>      lock = vcpu_schedule_lock_irq(vc);
>  
> +    vc->processor = csched2_cpu_pick(ops, vc);
> +
> +    spin_unlock_irq(lock);
> +
> +    lock = vcpu_schedule_lock_irq(vc);
> +
> +    /* Add vcpu to runqueue of initial processor */
>      runq_assign(ops, vc);
>  
>      vcpu_schedule_unlock_irq(lock, vc);
> +    
> +    local_irq_enable();
> 
This local_irq_enable() is not necessary any longer, is it?

With that off (and I'd be fine if you want to do that while
committing):

Reviwed-by: Dario Faggioli <dario.faggioli@citrix.com>

Regards,
Dario
-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)


[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

[-- Attachment #2: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 1/3] xen: Some code motion to avoid having to do forward-declaration
  2016-07-25 15:28 [PATCH v2 1/3] xen: Some code motion to avoid having to do forward-declaration George Dunlap
  2016-07-25 15:28 ` [PATCH v2 2/3] xen: Have schedulers revise initial placement George Dunlap
  2016-07-25 15:28 ` [PATCH v2 3/3] xen: Remove buggy initial placement algorithm George Dunlap
@ 2016-07-26  9:32 ` Dario Faggioli
  2 siblings, 0 replies; 9+ messages in thread
From: Dario Faggioli @ 2016-07-26  9:32 UTC (permalink / raw)
  To: George Dunlap, xen-devel; +Cc: Anshul Makkar, Meng Xu


[-- Attachment #1.1: Type: text/plain, Size: 615 bytes --]

On Mon, 2016-07-25 at 16:28 +0100, George Dunlap wrote:
> This is pure code motion; no functional change.
> 
> Signed-off-by: George Dunlap <george.dunlap@citrix.com>
> Reviewed-by: Meng Xu <mengxu@cis.upenn.edu>
>
Acked-by: Dario Faggioli <dario.faggioli@citrix.com>

> v2:
> - Rebase on top of Dario's series
> 
Thanks and Regards,
Dario
-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)


[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

[-- Attachment #2: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 2/3] xen: Have schedulers revise initial placement
  2016-07-26  9:30   ` Dario Faggioli
@ 2016-07-26  9:35     ` George Dunlap
  0 siblings, 0 replies; 9+ messages in thread
From: George Dunlap @ 2016-07-26  9:35 UTC (permalink / raw)
  To: Dario Faggioli, xen-devel; +Cc: Anshul Makkar, Meng Xu, Jan Beulich

On 26/07/16 10:30, Dario Faggioli wrote:
> On Mon, 2016-07-25 at 16:28 +0100, George Dunlap wrote:
>> The generic domain creation logic in
>> xen/common/domctl.c:default_vcpu0_location() attempts to try to do
>> initial placement load-balancing by placing vcpu 0 on the least-busy
>> non-primary hyperthread available.  Unfortunately, the logic can end
>> up picking a pcpu that's not in the online mask.  When this is passed
>> to a scheduler such which assumes that the initial assignment is
>> valid, it causes a null pointer dereference looking up the runqueue.
>>
> Looking more at Credit2 code, I think there are a couple of missing
> checks that a cpu that is about to be used for something, is actually
> in online.
> 
> However, that is orthogonal with this patch and...
> 
>> Furthermore, this initial placement doesn't take into account hard or
>> soft affinity, or any scheduler-specific knowledge (such as historic
>> runqueue load, as in credit2).
>>
> ... using pick_cpu() here is, IMO, a really really really good idea, so
> I think this patch should go in (and I'll work on and, if I am right,
> add the missing checks).
> 
>> Signed-off-by: George Dunlap <george.dunlap@citrix.com>
>> ---
>> v2:
>> - Actually grab lock before calling vcpu_schedule_lock() to avoid
>>   tripping over a new ASSERT
>>
> Ah, yes... sorry! :-P

Haha, no worries.

> 
> Just one thing:
> 
>> --- a/xen/common/sched_credit2.c
>> +++ b/xen/common/sched_credit2.c
>> @@ -2055,12 +2055,21 @@ csched2_vcpu_insert(const struct scheduler
>> *ops, struct vcpu *vc)
>>      ASSERT(!is_idle_vcpu(vc));
>>      ASSERT(list_empty(&svc->runq_elem));
>>  
>> -    /* Add vcpu to runqueue of initial processor */
>> +    /* csched2_cpu_pick() expects the pcpu lock to be held */
>>      lock = vcpu_schedule_lock_irq(vc);
>>  
>> +    vc->processor = csched2_cpu_pick(ops, vc);
>> +
>> +    spin_unlock_irq(lock);
>> +
>> +    lock = vcpu_schedule_lock_irq(vc);
>> +
>> +    /* Add vcpu to runqueue of initial processor */
>>      runq_assign(ops, vc);
>>  
>>      vcpu_schedule_unlock_irq(lock, vc);
>> +    
>> +    local_irq_enable();
>>
> This local_irq_enable() is not necessary any longer, is it?

Ah, yes -- thanks for catching that. :-)

> With that off (and I'd be fine if you want to do that while
> committing):
> 
> Reviwed-by: Dario Faggioli <dario.faggioli@citrix.com>

Great, thanks.

 -George


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2016-07-26  9:35 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-07-25 15:28 [PATCH v2 1/3] xen: Some code motion to avoid having to do forward-declaration George Dunlap
2016-07-25 15:28 ` [PATCH v2 2/3] xen: Have schedulers revise initial placement George Dunlap
2016-07-25 17:34   ` Meng Xu
2016-07-26  9:30   ` Dario Faggioli
2016-07-26  9:35     ` George Dunlap
2016-07-25 15:28 ` [PATCH v2 3/3] xen: Remove buggy initial placement algorithm George Dunlap
2016-07-26  9:14   ` Dario Faggioli
2016-07-26  9:22   ` Andrew Cooper
2016-07-26  9:32 ` [PATCH v2 1/3] xen: Some code motion to avoid having to do forward-declaration Dario Faggioli

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).