From mboxrd@z Thu Jan  1 00:00:00 1970
From: Dario Faggioli <raistlin@linux.it>
Subject: [RFC/RFT][PATCH 1 of 3] Move locking into pluggable
	schedulers.
Date: Wed, 23 Nov 2011 16:07:56 +0100
Message-ID: <1322060876.30168.18.camel@Abyss>
References: <1322060131.30168.15.camel@Abyss>
Mime-Version: 1.0
Content-Type: multipart/mixed; boundary="===============4988314752036575231=="
Return-path: <xen-devel-bounces@lists.xensource.com>
In-Reply-To: <1322060131.30168.15.camel@Abyss>
List-Unsubscribe: <http://lists.xensource.com/mailman/options/xen-devel>,
	<mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
List-Post: <mailto:xen-devel@lists.xensource.com>
List-Help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-Subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>,
	<mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
Sender: xen-devel-bounces@lists.xensource.com
Errors-To: xen-devel-bounces@lists.xensource.com
To: xen-devel@lists.xensource.com
Cc: George Dunlap <George.Dunlap@eu.citrix.com>, Juergen Gross <juergen.gross@ts.fujitsu.com>, Keir Fraser <keir@xen.org>
List-Id: xen-devel@lists.xenproject.org


--===============4988314752036575231==
Content-Type: multipart/signed; micalg="pgp-sha1"; protocol="application/pgp-signature";
	boundary="=-YFk3Qd7bSF4SLMiJwwzE"


--=-YFk3Qd7bSF4SLMiJwwzE
Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: quoted-printable

Pluggable schedulers' code knows what (and when) to lock better than
generic code, let's delegate to them all the concurrency related issues.

Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>

diff -r 84b3e46aa7d2 xen/common/sched_credit.c
--- a/xen/common/sched_credit.c	Wed Nov 23 12:03:37 2011 +0000
+++ b/xen/common/sched_credit.c	Wed Nov 23 15:09:14 2011 +0100
@@ -161,6 +161,7 @@ struct csched_dom {
  * System-wide private data
  */
 struct csched_private {
+    /* lock for the whole pluggable scheduler, nests inside cpupool_lock *=
/
     spinlock_t lock;
     struct list_head active_sdom;
     uint32_t ncpus;
@@ -800,6 +801,10 @@ csched_dom_cntl(
     struct csched_private *prv =3D CSCHED_PRIV(ops);
     unsigned long flags;
=20
+    /* Protect both get and put branches with the pluggable scheduler
+     * lock. Runq lock not needed anywhere in here. */
+    spin_lock_irqsave(&prv->lock, flags);
+
     if ( op->cmd =3D=3D XEN_DOMCTL_SCHEDOP_getinfo )
     {
         op->u.credit.weight =3D sdom->weight;
@@ -809,8 +814,6 @@ csched_dom_cntl(
     {
         ASSERT(op->cmd =3D=3D XEN_DOMCTL_SCHEDOP_putinfo);
=20
-        spin_lock_irqsave(&prv->lock, flags);
-
         if ( op->u.credit.weight !=3D 0 )
         {
             if ( !list_empty(&sdom->active_sdom_elem) )
@@ -824,9 +827,10 @@ csched_dom_cntl(
         if ( op->u.credit.cap !=3D (uint16_t)~0U )
             sdom->cap =3D op->u.credit.cap;
=20
-        spin_unlock_irqrestore(&prv->lock, flags);
     }
=20
+    spin_unlock_irqrestore(&prv->lock, flags);
+
     return 0;
 }
=20
diff -r 84b3e46aa7d2 xen/common/sched_credit2.c
--- a/xen/common/sched_credit2.c	Wed Nov 23 12:03:37 2011 +0000
+++ b/xen/common/sched_credit2.c	Wed Nov 23 15:09:14 2011 +0100
@@ -1384,6 +1384,10 @@ csched_dom_cntl(
     struct csched_private *prv =3D CSCHED_PRIV(ops);
     unsigned long flags;
=20
+    /* Must hold csched_priv lock to read and update sdom,
+     * runq lock to update csvcs. */
+    spin_lock_irqsave(&prv->lock, flags);
+
     if ( op->cmd =3D=3D XEN_DOMCTL_SCHEDOP_getinfo )
     {
         op->u.credit2.weight =3D sdom->weight;
@@ -1397,10 +1401,6 @@ csched_dom_cntl(
             struct list_head *iter;
             int old_weight;
=20
-            /* Must hold csched_priv lock to update sdom, runq lock to
-             * update csvcs. */
-            spin_lock_irqsave(&prv->lock, flags);
-
             old_weight =3D sdom->weight;
=20
             sdom->weight =3D op->u.credit2.weight;
@@ -1411,22 +1411,23 @@ csched_dom_cntl(
                 struct csched_vcpu *svc =3D list_entry(iter, struct csched=
_vcpu, sdom_elem);
=20
                 /* NB: Locking order is important here.  Because we grab t=
his lock here, we
-                 * must never lock csched_priv.lock if we're holding a run=
queue
-                 * lock. */
-                vcpu_schedule_lock_irq(svc->vcpu);
+                 * must never lock csched_priv.lock if we're holding a run=
queue lock.
+                 * Also, calling vcpu_schedule_lock() is enough, since IRQ=
s have already
+                 * been disabled. */
+                vcpu_schedule_lock(svc->vcpu);
=20
                 BUG_ON(svc->rqd !=3D RQD(ops, svc->vcpu->processor));
=20
                 svc->weight =3D sdom->weight;
                 update_max_weight(svc->rqd, svc->weight, old_weight);
=20
-                vcpu_schedule_unlock_irq(svc->vcpu);
+                vcpu_schedule_unlock(svc->vcpu);
             }
-
-            spin_unlock_irqrestore(&prv->lock, flags);
         }
     }
=20
+    spin_unlock_irqrestore(&prv->lock, flags);
+
     return 0;
 }
=20
diff -r 84b3e46aa7d2 xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c	Wed Nov 23 12:03:37 2011 +0000
+++ b/xen/common/sched_sedf.c	Wed Nov 23 15:09:14 2011 +0100
@@ -1397,18 +1397,28 @@ static int sedf_adjust_weights(struct cp
 static int sedf_adjust(const struct scheduler *ops, struct domain *p, stru=
ct xen_domctl_scheduler_op *op)
 {
     struct vcpu *v;
-    int rc;
+    int rc =3D 0;
=20
     PRINT(2,"sedf_adjust was called, domain-id %i new period %"PRIu64" "
           "new slice %"PRIu64"\nlatency %"PRIu64" extra:%s\n",
           p->domain_id, op->u.sedf.period, op->u.sedf.slice,
           op->u.sedf.latency, (op->u.sedf.extratime)?"yes":"no");
=20
+    /* Serialize everything against runq lock to prevent concurrent update
+     * (notice all non-current VCPUs of the domain have been paused in the
+     * caller). */
+    if ( p =3D=3D current->domain )
+        vcpu_schedule_lock_irq(current);
+
     if ( op->cmd =3D=3D XEN_DOMCTL_SCHEDOP_putinfo )
     {
         /* Check for sane parameters. */
         if ( !op->u.sedf.period && !op->u.sedf.weight )
-            return -EINVAL;
+        {
+            rc =3D -EINVAL;
+            goto out;
+        }
+
         if ( op->u.sedf.weight )
         {
             if ( (op->u.sedf.extratime & EXTRA_AWARE) &&
@@ -1443,7 +1453,11 @@ static int sedf_adjust(const struct sche
                      (op->u.sedf.period < PERIOD_MIN) ||
                      (op->u.sedf.slice  > op->u.sedf.period) ||
                      (op->u.sedf.slice  < SLICE_MIN) )
-                    return -EINVAL;
+                {
+                    rc =3D -EINVAL;
+                    goto out;
+                }
+
                 EDOM_INFO(v)->weight =3D 0;
                 EDOM_INFO(v)->extraweight =3D 0;
                 EDOM_INFO(v)->period_orig =3D=20
@@ -1455,7 +1469,7 @@ static int sedf_adjust(const struct sche
=20
         rc =3D sedf_adjust_weights(p->cpupool, op);
         if ( rc )
-            return rc;
+            goto out;
=20
         for_each_vcpu ( p, v )
         {
@@ -1469,7 +1483,11 @@ static int sedf_adjust(const struct sche
     else if ( op->cmd =3D=3D XEN_DOMCTL_SCHEDOP_getinfo )
     {
         if ( p->vcpu[0] =3D=3D NULL )
-            return -EINVAL;
+        {
+            rc =3D -EINVAL;
+            goto out;
+        }
+
         op->u.sedf.period    =3D EDOM_INFO(p->vcpu[0])->period;
         op->u.sedf.slice     =3D EDOM_INFO(p->vcpu[0])->slice;
         op->u.sedf.extratime =3D EDOM_INFO(p->vcpu[0])->status & EXTRA_AWA=
RE;
@@ -1477,8 +1495,12 @@ static int sedf_adjust(const struct sche
         op->u.sedf.weight    =3D EDOM_INFO(p->vcpu[0])->weight;
     }
=20
-    PRINT(2,"sedf_adjust_finished\n");
-    return 0;
+out:
+    if ( p =3D=3D current->domain )
+        vcpu_schedule_unlock_irq(current);
+
+    PRINT(2,"sedf_adjust_finished with return code %d\n", rc);
+    return rc;
 }
=20
 const struct scheduler sched_sedf_def =3D {
diff -r 84b3e46aa7d2 xen/common/schedule.c
--- a/xen/common/schedule.c	Wed Nov 23 12:03:37 2011 +0000
+++ b/xen/common/schedule.c	Wed Nov 23 15:09:14 2011 +0100
@@ -1015,15 +1015,8 @@ long sched_adjust(struct domain *d, stru
=20
     /*
      * Most VCPUs we can simply pause. If we are adjusting this VCPU then
-     * we acquire the local schedule_lock to guard against concurrent upda=
tes.
-     *
-     * We only acquire the local schedule lock after we have paused all ot=
her
-     * VCPUs in this domain. There are two reasons for this:
-     * 1- We don't want to hold up interrupts as pausing a VCPU can
-     *    trigger a tlb shootdown.
-     * 2- Pausing other VCPUs involves briefly locking the schedule
-     *    lock of the CPU they are running on. This CPU could be the
-     *    same as ours.
+     * concurrent updates shall be prevented within the actual pluggable
+     * scheduler.
      */
=20
     for_each_vcpu ( d, v )
@@ -1032,15 +1025,9 @@ long sched_adjust(struct domain *d, stru
             vcpu_pause(v);
     }
=20
-    if ( d =3D=3D current->domain )
-        vcpu_schedule_lock_irq(current);
-
     if ( (ret =3D SCHED_OP(DOM2OP(d), adjust, d, op)) =3D=3D 0 )
         TRACE_1D(TRC_SCHED_ADJDOM, d->domain_id);
=20
-    if ( d =3D=3D current->domain )
-        vcpu_schedule_unlock_irq(current);
-
     for_each_vcpu ( d, v )
     {
         if ( v !=3D current )

--=20
<<This happens because I choose it to happen!>> (Raistlin Majere)
-------------------------------------------------------------------
Dario Faggioli, http://retis.sssup.it/people/faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)
PhD Candidate, ReTiS Lab, Scuola Superiore Sant'Anna, Pisa (Italy)


--=-YFk3Qd7bSF4SLMiJwwzE
Content-Type: application/pgp-signature; name="signature.asc"
Content-Description: This is a digitally signed message part
Content-Transfer-Encoding: 7bit

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (GNU/Linux)

iEYEABECAAYFAk7NDEwACgkQk4XaBE3IOsSeCQCgoJCJsYia/X/oVQMLDtYBGLCi
sckAnR9wYQJXIJ1KE9lISJkdrg5g3oOl
=vyX5
-----END PGP SIGNATURE-----

--=-YFk3Qd7bSF4SLMiJwwzE--


--===============4988314752036575231==
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Content-Disposition: inline

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

--===============4988314752036575231==--