All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] MCE: Fix race condition in mctelem_reserve
@ 2014-01-22 10:50 Frediano Ziglio
  2014-01-22 10:56 ` David Vrabel
                   ` (2 more replies)
  0 siblings, 3 replies; 18+ messages in thread
From: Frediano Ziglio @ 2014-01-22 10:50 UTC (permalink / raw)
  To: Liu Jinsong, Christoph Egger; +Cc: David Vrabel, Jan Beulich, xen-devel

These lines (in mctelem_reserve)


        newhead = oldhead->mcte_next;
        if (cmpxchgptr(freelp, oldhead, newhead) == oldhead) {

are racy. After you read the newhead pointer it can happen that another
flow (thread or recursive invocation) change all the list but set head
with same value. So oldhead is the same as *freelp but you are setting
a new head that could point to whatever element (even already used).

This patch use instead a bit array and atomic bit operations.

Actually it use unsigned long instead of bitmap type as testing for
all zeroes is easier.

Signed-off-by: Frediano Ziglio <frediano.ziglio@citrix.com>
---
 xen/arch/x86/cpu/mcheck/mctelem.c |   52 ++++++++++++++++++++++---------------
 1 file changed, 31 insertions(+), 21 deletions(-)

diff --git a/xen/arch/x86/cpu/mcheck/mctelem.c b/xen/arch/x86/cpu/mcheck/mctelem.c
index 895ce1a..e56b6fb 100644
--- a/xen/arch/x86/cpu/mcheck/mctelem.c
+++ b/xen/arch/x86/cpu/mcheck/mctelem.c
@@ -69,6 +69,11 @@ struct mctelem_ent {
 #define	MC_URGENT_NENT		10
 #define	MC_NONURGENT_NENT	20
 
+/* Check if we can fit enough bits in the free bit array */
+#if MC_URGENT_NENT + MC_NONURGENT_NENT > BITS_PER_LONG
+#error Too much elements
+#endif
+
 #define	MC_NCLASSES		(MC_NONURGENT + 1)
 
 #define	COOKIE2MCTE(c)		((struct mctelem_ent *)(c))
@@ -77,11 +82,9 @@ struct mctelem_ent {
 static struct mc_telem_ctl {
 	/* Linked lists that thread the array members together.
 	 *
-	 * The free lists are singly-linked via mcte_next, and we allocate
-	 * from them by atomically unlinking an element from the head.
-	 * Consumed entries are returned to the head of the free list.
-	 * When an entry is reserved off the free list it is not linked
-	 * on any list until it is committed or dismissed.
+	 * The free lists is a bit array where bit 1 means free.
+	 * This as element number is quite small and is easy to
+	 * atomically allocate that way.
 	 *
 	 * The committed list grows at the head and we do not maintain a
 	 * tail pointer; insertions are performed atomically.  The head
@@ -101,7 +104,7 @@ static struct mc_telem_ctl {
 	 * we can lock it for updates.  The head of the processing list
 	 * always has the oldest telemetry, and we append (as above)
 	 * at the tail of the processing list. */
-	struct mctelem_ent *mctc_free[MC_NCLASSES];
+	unsigned long mctc_free[MC_NCLASSES];
 	struct mctelem_ent *mctc_committed[MC_NCLASSES];
 	struct mctelem_ent *mctc_processing_head[MC_NCLASSES];
 	struct mctelem_ent *mctc_processing_tail[MC_NCLASSES];
@@ -214,7 +217,10 @@ static void mctelem_free(struct mctelem_ent *tep)
 	BUG_ON(MCTE_STATE(tep) != MCTE_F_STATE_FREE);
 
 	tep->mcte_prev = NULL;
-	mctelem_xchg_head(&mctctl.mctc_free[target], &tep->mcte_next, tep);
+	tep->mcte_next = NULL;
+
+	/* set free in array */
+	set_bit(tep - mctctl.mctc_elems, &mctctl.mctc_free[target]);
 }
 
 /* Increment the reference count of an entry that is not linked on to
@@ -284,7 +290,7 @@ void mctelem_init(int reqdatasz)
 	}
 
 	for (i = 0; i < MC_URGENT_NENT + MC_NONURGENT_NENT; i++) {
-		struct mctelem_ent *tep, **tepp;
+		struct mctelem_ent *tep;
 
 		tep = mctctl.mctc_elems + i;
 		tep->mcte_flags = MCTE_F_STATE_FREE;
@@ -292,16 +298,15 @@ void mctelem_init(int reqdatasz)
 		tep->mcte_data = datarr + i * datasz;
 
 		if (i < MC_URGENT_NENT) {
-			tepp = &mctctl.mctc_free[MC_URGENT];
-			tep->mcte_flags |= MCTE_F_HOME_URGENT;
+			__set_bit(i, &mctctl.mctc_free[MC_URGENT]);
+			tep->mcte_flags = MCTE_F_HOME_URGENT;
 		} else {
-			tepp = &mctctl.mctc_free[MC_NONURGENT];
-			tep->mcte_flags |= MCTE_F_HOME_NONURGENT;
+			__set_bit(i, &mctctl.mctc_free[MC_NONURGENT]);
+			tep->mcte_flags = MCTE_F_HOME_NONURGENT;
 		}
 
-		tep->mcte_next = *tepp;
+		tep->mcte_next = NULL;
 		tep->mcte_prev = NULL;
-		*tepp = tep;
 	}
 }
 
@@ -310,18 +315,21 @@ static int mctelem_drop_count;
 
 /* Reserve a telemetry entry, or return NULL if none available.
  * If we return an entry then the caller must subsequently call exactly one of
- * mctelem_unreserve or mctelem_commit for that entry.
+ * mctelem_dismiss or mctelem_commit for that entry.
  */
 mctelem_cookie_t mctelem_reserve(mctelem_class_t which)
 {
-	struct mctelem_ent **freelp;
-	struct mctelem_ent *oldhead, *newhead;
+	unsigned long *freelp;
+	unsigned long oldfree;
+	unsigned bit;
 	mctelem_class_t target = (which == MC_URGENT) ?
 	    MC_URGENT : MC_NONURGENT;
 
 	freelp = &mctctl.mctc_free[target];
 	for (;;) {
-		if ((oldhead = *freelp) == NULL) {
+		oldfree = *freelp;
+
+		if (oldfree == 0) {
 			if (which == MC_URGENT && target == MC_URGENT) {
 				/* raid the non-urgent freelist */
 				target = MC_NONURGENT;
@@ -333,9 +341,11 @@ mctelem_cookie_t mctelem_reserve(mctelem_class_t which)
 			}
 		}
 
-		newhead = oldhead->mcte_next;
-		if (cmpxchgptr(freelp, oldhead, newhead) == oldhead) {
-			struct mctelem_ent *tep = oldhead;
+		/* try to allocate, atomically clear free bit */
+		bit = find_first_set_bit(oldfree);
+		if (test_and_clear_bit(bit, freelp)) {
+			/* return element we got */
+			struct mctelem_ent *tep = mctctl.mctc_elems + bit;
 
 			mctelem_hold(tep);
 			MCTE_TRANSITION_STATE(tep, FREE, UNCOMMITTED);
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2014-02-19 10:35 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-01-22 10:50 [PATCH] MCE: Fix race condition in mctelem_reserve Frediano Ziglio
2014-01-22 10:56 ` David Vrabel
2014-01-22 11:00   ` Frediano Ziglio
2014-01-22 13:05     ` David Vrabel
2014-01-22 12:21 ` Jan Beulich
2014-01-22 17:17   ` [PATCH v2] " Frediano Ziglio
2014-01-30  9:20     ` Frediano Ziglio
2014-01-30  9:55       ` Jan Beulich
2014-01-31 13:37     ` Frediano Ziglio
2014-02-14 16:23       ` Frediano Ziglio
2014-02-14 16:54         ` Jan Beulich
2014-02-16 14:44           ` Liu, Jinsong
2014-02-18 12:47     ` George Dunlap
2014-02-19  9:53       ` Frediano Ziglio
2014-02-19 10:35         ` George Dunlap
2014-02-18  8:42 ` [PATCH] " Liu, Jinsong
2014-02-18 11:10   ` Frediano Ziglio
2014-02-18 11:25     ` Liu, Jinsong

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.