All of lore.kernel.org
 help / color / mirror / Atom feed
From: Daniel Walker <dwalker@mvista.com>
To: linux-kernel@vger.kernel.org
Cc: Ulrich Drepper <drepper@gmail.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Arjan van de Ven <arjan@infradead.org>
Subject: [PATCH 5/5] futex: fix miss ordered wakeups
Date: Wed, 11 Jun 2008 13:49:21 -0700	[thread overview]
Message-ID: <20080611204917.523866467@mvista.com> (raw)
In-Reply-To: 20080611204916.271608740@mvista.com

[-- Attachment #1: blocked_on-futex.patch --]
[-- Type: text/plain, Size: 4334 bytes --]

Adds an additional function call to the sched_setscheduler to update the
waiter position of a task if it happens to be waiting on a futex. This
ensures that the kernel level waiter ordering is correctly maintained
based on the changed priority of the task.

I fixed the locking issue noticed by Thomas Gleixner.

This doesn't address userspace at all, only the kernel level wakeups and
kernel level ordering.

The additional locking added to the futex_wait function has no visible speed
impact, and only effects waiters which actual enter the kernel.

Signed-off-by: Daniel Walker <dwalker@mvista.com>

---
 include/linux/sched.h |    4 ++++
 kernel/futex.c        |   41 +++++++++++++++++++++++++++++++++++++++++
 kernel/sched.c        |    1 +
 3 files changed, 46 insertions(+)

Index: linux-2.6.25/include/linux/sched.h
===================================================================
--- linux-2.6.25.orig/include/linux/sched.h
+++ linux-2.6.25/include/linux/sched.h
@@ -1027,6 +1027,7 @@ struct sched_rt_entity {
 enum lock_waiter_type {
 	MUTEX_WAITER = 1,
 	RT_MUTEX_WAITER,
+	FUTEX_WAITER
 };
 
 struct lock_waiter_state {
@@ -1034,6 +1035,7 @@ struct lock_waiter_state {
 	union {
 		struct mutex_waiter *mutex_blocked_on;
 		struct rt_mutex_waiter *rt_blocked_on;
+		union futex_key	*futex_blocked_on;
 	};
 };
 
@@ -1675,6 +1677,8 @@ static inline int rt_mutex_getprio(struc
 # define rt_mutex_adjust_pi(p)		do { } while (0)
 #endif
 
+extern void futex_adjust_waiters(struct task_struct *p);
+
 extern void set_user_nice(struct task_struct *p, long nice);
 extern int task_prio(const struct task_struct *p);
 extern int task_nice(const struct task_struct *p);
Index: linux-2.6.25/kernel/futex.c
===================================================================
--- linux-2.6.25.orig/kernel/futex.c
+++ linux-2.6.25/kernel/futex.c
@@ -327,6 +327,38 @@ static int get_futex_value_locked(u32 *d
 	return ret ? -EFAULT : 0;
 }
 
+void futex_adjust_waiters(struct task_struct *p)
+{
+
+	if (p->blocked_on) {
+		struct futex_hash_bucket *hb;
+		struct futex_q *q, *next;
+		union futex_key key;
+
+		spin_lock_irq(&p->pi_lock);
+		if (p->blocked_on && p->blocked_on->lock_type == FUTEX_WAITER) {
+			key = *p->blocked_on->futex_blocked_on;
+			spin_unlock_irq(&p->pi_lock);
+		} else {
+			spin_unlock_irq(&p->pi_lock);
+			return;
+		}
+
+		hb = hash_futex(&key);
+		spin_lock(&hb->lock);
+		plist_for_each_entry_safe(q, next, &hb->chain, list) {
+			if (match_futex(&q->key, &key) && q->task == p) {
+				int prio = min(p->normal_prio, MAX_RT_PRIO);
+				plist_del(&q->list, &hb->chain);
+				plist_node_init(&q->list, prio);
+				plist_add(&q->list, &hb->chain);
+				break;
+			}
+		}
+		spin_unlock(&hb->lock);
+	}
+}
+
 /*
  * Fault handling.
  * if fshared is non NULL, current->mm->mmap_sem is already held
@@ -1159,6 +1191,8 @@ static int futex_wait(u32 __user *uaddr,
 	DECLARE_WAITQUEUE(wait, curr);
 	struct futex_hash_bucket *hb;
 	struct futex_q q;
+	struct lock_waiter_state blocked_on = {
+		.lock_type = FUTEX_WAITER, { .futex_blocked_on = &q.key } };
 	u32 uval;
 	int ret;
 	struct hrtimer_sleeper t;
@@ -1176,6 +1210,8 @@ static int futex_wait(u32 __user *uaddr,
 	if (unlikely(ret != 0))
 		goto out_release_sem;
 
+	set_blocked_on(current, &blocked_on);
+
 	hb = queue_lock(&q);
 
 	/*
@@ -1203,6 +1239,8 @@ static int futex_wait(u32 __user *uaddr,
 	if (unlikely(ret)) {
 		queue_unlock(&q, hb);
 
+		set_blocked_on(current, NULL);
+
 		/*
 		 * If we would have faulted, release mmap_sem, fault it in and
 		 * start all over again.
@@ -1276,6 +1314,8 @@ static int futex_wait(u32 __user *uaddr,
 	}
 	__set_current_state(TASK_RUNNING);
 
+	set_blocked_on(current, NULL);
+
 	/*
 	 * NOTE: we don't remove ourselves from the waitqueue because
 	 * we are the only user of it.
@@ -1310,6 +1350,7 @@ static int futex_wait(u32 __user *uaddr,
 
  out_unlock_release_sem:
 	queue_unlock(&q, hb);
+	set_blocked_on(current, NULL);
 
  out_release_sem:
 	futex_unlock_mm(fshared);
Index: linux-2.6.25/kernel/sched.c
===================================================================
--- linux-2.6.25.orig/kernel/sched.c
+++ linux-2.6.25/kernel/sched.c
@@ -5209,6 +5209,7 @@ recheck:
 	spin_unlock_irqrestore(&p->pi_lock, flags);
 
 	rt_mutex_adjust_pi(p);
+	futex_adjust_waiters(p);
 
 	return 0;
 }

-- 

  parent reply	other threads:[~2008-06-11 20:55 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-06-11 20:49 [PATCH 1/5] futex: checkpatch cleanup Daniel Walker
2008-06-11 20:49 ` [PATCH 2/5] futex: update prio on requeue Daniel Walker
2008-06-12  5:22   ` Peter Zijlstra
2008-06-11 20:49 ` [PATCH 3/5] mutex debug: add generic blocked_on usage Daniel Walker
2008-06-12  5:25   ` Peter Zijlstra
2008-06-12 13:21     ` Daniel Walker
2008-06-11 20:49 ` [PATCH 4/5] rtmutex: " Daniel Walker
2008-06-11 20:49 ` Daniel Walker [this message]
2008-06-12  6:07   ` [PATCH 5/5] futex: fix miss ordered wakeups Peter Zijlstra
2008-06-12 13:22     ` Daniel Walker
2008-06-12 13:57       ` Peter Zijlstra
2008-06-12 14:04         ` Daniel Walker
2008-06-12  8:56   ` Thomas Gleixner
2008-06-12 13:30     ` Daniel Walker
2008-06-12 13:33       ` Thomas Gleixner
2008-06-12 13:44         ` Daniel Walker
2008-06-12 15:24           ` Thomas Gleixner
2008-06-12 15:56             ` Daniel Walker
2008-06-12 19:55               ` Thomas Gleixner
2008-06-12 22:09                 ` Daniel Walker
2008-06-12 22:43                   ` Thomas Gleixner
2008-06-12 23:06                     ` Daniel Walker
2008-06-12 23:30                       ` Thomas Gleixner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080611204917.523866467@mvista.com \
    --to=dwalker@mvista.com \
    --cc=arjan@infradead.org \
    --cc=drepper@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.