public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Steven Rostedt <rostedt@goodmis.org>
To: linux-kernel@vger.kernel.org,
	linux-rt-users <linux-rt-users@vger.kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>, Steven Rostedt <rostedt@goodmis.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Clark Williams <clark.williams@gmail.com>,
	Arnaldo Carvalho de Melo <acme@ghostprotocols.net>,
	Jon Masters <jonathan@jonmasters.org>,
	Gregory Haskins <ghaskins@novell.com>,
	Steven Rostedt <srostedt@redhat.com>
Subject: [PATCH RT 6/6] read lock Priority Inheritance implementation
Date: Fri, 25 Apr 2008 09:09:14 -0400	[thread overview]
Message-ID: <20080425142110.380944244@goodmis.org> (raw)
In-Reply-To: 20080425130908.140707404@goodmis.org

[-- Attachment #1: multi-reader-pi.patch --]
[-- Type: text/plain, Size: 9409 bytes --]

This patch adds the priority inheritance (PI) to the read / write locks.
When a task is blocked on the lock that eventually is owned by a reader
in the PI chain, it will boost all the readers if they are of lower priority
than the blocked task.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 include/linux/init_task.h |    8 +++
 include/linux/rt_lock.h   |    4 +
 kernel/fork.c             |    1 
 kernel/rtmutex.c          |  115 ++++++++++++++++++++++++++++++++++++++++++----
 4 files changed, 118 insertions(+), 10 deletions(-)

Index: linux-2.6.24.4-rt4/include/linux/rt_lock.h
===================================================================
--- linux-2.6.24.4-rt4.orig/include/linux/rt_lock.h	2008-03-25 23:13:17.000000000 -0400
+++ linux-2.6.24.4-rt4/include/linux/rt_lock.h	2008-03-25 23:14:47.000000000 -0400
@@ -13,6 +13,7 @@
 #include <linux/rtmutex.h>
 #include <asm/atomic.h>
 #include <linux/spinlock_types.h>
+#include <linux/sched_prio.h>
 
 #ifdef CONFIG_PREEMPT_RT
 /*
@@ -66,6 +67,7 @@ struct rw_mutex {
 	atomic_t		count;	/* number of times held for read */
 	atomic_t		owners; /* number of owners as readers */
 	struct list_head	readers;
+	int prio;
 };
 
 /*
@@ -98,6 +100,7 @@ typedef struct {
 
 #define __RW_LOCK_UNLOCKED(name) (rwlock_t) \
 	{ .owners.mutex = __RT_SPIN_INITIALIZER(name.owners.mutex),	\
+	  .owners.prio = MAX_PRIO,					\
 	  RW_DEP_MAP_INIT(name) }
 #else /* !PREEMPT_RT */
 
@@ -196,6 +199,7 @@ extern int __bad_func_type(void);
 
 #define __RWSEM_INITIALIZER(name) \
 	{ .owners.mutex = __RT_MUTEX_INITIALIZER(name.owners.mutex),	\
+	  .owners.prio = MAX_PRIO,					\
 	  RW_DEP_MAP_INIT(name) }
 
 #define DECLARE_RWSEM(lockname) \
Index: linux-2.6.24.4-rt4/kernel/rtmutex.c
===================================================================
--- linux-2.6.24.4-rt4.orig/kernel/rtmutex.c	2008-03-25 23:13:17.000000000 -0400
+++ linux-2.6.24.4-rt4/kernel/rtmutex.c	2008-03-25 23:14:47.000000000 -0400
@@ -133,6 +133,8 @@ static inline void init_lists(struct rt_
 	}
 }
 
+static int rt_mutex_get_readers_prio(struct task_struct *task, int prio);
+
 /*
  * Calculate task priority from the waiter list priority
  *
@@ -143,6 +145,8 @@ int rt_mutex_getprio(struct task_struct 
 {
 	int prio = min(task->normal_prio, get_rcu_prio(task));
 
+	prio = rt_mutex_get_readers_prio(task, prio);
+
 	if (likely(!task_has_pi_waiters(task)))
 		return prio;
 
@@ -185,6 +189,11 @@ static void rt_mutex_adjust_prio(struct 
  */
 int max_lock_depth = 1024;
 
+static int rt_mutex_adjust_readers(struct rt_mutex *orig_lock,
+				   struct rt_mutex_waiter *orig_waiter,
+				   struct task_struct *top_task,
+				   struct rt_mutex *lock,
+				   int recursion_depth);
 /*
  * Adjust the priority chain. Also used for deadlock detection.
  * Decreases task's usage by one - may thus free the task.
@@ -194,7 +203,8 @@ static int rt_mutex_adjust_prio_chain(st
 				      int deadlock_detect,
 				      struct rt_mutex *orig_lock,
 				      struct rt_mutex_waiter *orig_waiter,
-				      struct task_struct *top_task)
+				      struct task_struct *top_task,
+				      int recursion_depth)
 {
 	struct rt_mutex *lock;
 	struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
@@ -296,8 +306,13 @@ static int rt_mutex_adjust_prio_chain(st
 	/* Grab the next task */
 	task = rt_mutex_owner(lock);
 
-	/* Writers do not boost their readers. */
+	/*
+	 * Readers are special. We may need to boost more than one owner.
+	 */
 	if (task == RT_RW_READER) {
+		ret = rt_mutex_adjust_readers(orig_lock, orig_waiter,
+					      top_task, lock,
+					      recursion_depth);
 		spin_unlock_irqrestore(&lock->wait_lock, flags);
 		goto out;
 	}
@@ -479,9 +494,12 @@ static int task_blocks_on_rt_mutex(struc
 	spin_unlock(&current->pi_lock);
 
 	if (waiter == rt_mutex_top_waiter(lock)) {
-		/* readers are not handled */
-		if (owner == RT_RW_READER)
-			return 0;
+		/* readers are handled differently */
+		if (owner == RT_RW_READER) {
+			res = rt_mutex_adjust_readers(lock, waiter,
+						      current, lock, 0);
+			return res;
+		}
 
 		spin_lock(&owner->pi_lock);
 		plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
@@ -508,7 +526,7 @@ static int task_blocks_on_rt_mutex(struc
 	spin_unlock_irqrestore(&lock->wait_lock, flags);
 
 	res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
-					 current);
+					 current, 0);
 
 	spin_lock_irq(&lock->wait_lock);
 
@@ -625,7 +643,7 @@ static void remove_waiter(struct rt_mute
 
 	spin_unlock_irqrestore(&lock->wait_lock, flags);
 
-	rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current);
+	rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current, 0);
 
 	spin_lock_irq(&lock->wait_lock);
 }
@@ -652,7 +670,7 @@ void rt_mutex_adjust_pi(struct task_stru
 	get_task_struct(task);
 	spin_unlock_irqrestore(&task->pi_lock, flags);
 
-	rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
+	rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task, 0);
 }
 
 /*
@@ -1088,7 +1106,6 @@ static int try_to_take_rw_read(struct rw
 			if (rt_rwlock_pending_writer(rwm))
 				return 0;
 			if (rt_mutex_has_waiters(mutex)) {
-				/* readers don't do PI */
 				waiter = rt_mutex_top_waiter(mutex);
 				if (current->prio >= waiter->task->prio)
 					return 0;
@@ -1102,7 +1119,7 @@ static int try_to_take_rw_read(struct rw
 				spin_unlock(&mtxowner->pi_lock);
 			}
 		} else if (rt_mutex_has_waiters(mutex)) {
-			/* Readers don't do PI */
+			/* Readers do things differently with respect to PI */
 			waiter = rt_mutex_top_waiter(mutex);
 			spin_lock(&current->pi_lock);
 			plist_del(&waiter->pi_list_entry, &current->pi_waiters);
@@ -1608,6 +1625,7 @@ rt_read_slowunlock(struct rw_mutex *rwm,
 
 	/* If no one is blocked, then clear all ownership */
 	if (!rt_mutex_has_waiters(mutex)) {
+		rwm->prio = MAX_PRIO;
 		/*
 		 * If count is not zero, we are under the limit with
 		 * no other readers.
@@ -1838,11 +1856,88 @@ void rt_mutex_rwsem_init(struct rw_mutex
 	rwm->owner = NULL;
 	atomic_set(&rwm->count, 0);
 	atomic_set(&rwm->owners, 0);
+	rwm->prio = MAX_PRIO;
 	INIT_LIST_HEAD(&rwm->readers);
 
 	__rt_mutex_init(mutex, name);
 }
 
+static int rt_mutex_get_readers_prio(struct task_struct *task, int prio)
+{
+	struct reader_lock_struct *rls;
+	struct rw_mutex *rwm;
+	int lock_prio;
+	int i;
+
+	for (i = 0; i < task->reader_lock_count; i++) {
+		rls = &task->owned_read_locks[i];
+		rwm = rls->lock;
+		if (rwm) {
+			lock_prio = rwm->prio;
+			if (prio > lock_prio)
+				prio = lock_prio;
+		}
+	}
+
+	return prio;
+}
+
+static int rt_mutex_adjust_readers(struct rt_mutex *orig_lock,
+				   struct rt_mutex_waiter *orig_waiter,
+				   struct task_struct *top_task,
+				   struct rt_mutex *lock,
+				   int recursion_depth)
+{
+	struct reader_lock_struct *rls;
+	struct rt_mutex_waiter *waiter;
+	struct task_struct *task;
+	struct rw_mutex *rwm = container_of(lock, struct rw_mutex, mutex);
+
+	if (rt_mutex_has_waiters(lock)) {
+		waiter = rt_mutex_top_waiter(lock);
+		/*
+		 * Do we need to grab the task->pi_lock?
+		 * Really, we are only reading it. If it
+		 * changes, then that should follow this chain
+		 * too.
+		 */
+		rwm->prio = waiter->task->prio;
+	} else
+		rwm->prio = MAX_PRIO;
+
+	if (recursion_depth >= MAX_RWLOCK_DEPTH) {
+		WARN_ON(1);
+		return 1;
+	}
+
+	list_for_each_entry(rls, &rwm->readers, list) {
+		task = rls->task;
+		get_task_struct(task);
+		/*
+		 * rt_mutex_adjust_prio_chain will do
+		 * the put_task_struct
+		 */
+		rt_mutex_adjust_prio_chain(task, 0, orig_lock,
+					   orig_waiter, top_task,
+					   recursion_depth+1);
+	}
+
+	return 0;
+}
+#else
+static int rt_mutex_adjust_readers(struct rt_mutex *orig_lock,
+				   struct rt_mutex_waiter *orig_waiter,
+				   struct task_struct *top_task,
+				   struct rt_mutex *lock,
+				   int recursion_depth)
+{
+	return 0;
+}
+
+static int rt_mutex_get_readers_prio(struct task_struct *task, int prio)
+{
+	return prio;
+}
 #endif /* CONFIG_PREEMPT_RT */
 
 #ifdef CONFIG_PREEMPT_BKL
Index: linux-2.6.24.4-rt4/include/linux/init_task.h
===================================================================
--- linux-2.6.24.4-rt4.orig/include/linux/init_task.h	2008-03-25 16:41:47.000000000 -0400
+++ linux-2.6.24.4-rt4/include/linux/init_task.h	2008-03-25 23:14:47.000000000 -0400
@@ -99,6 +99,13 @@ extern struct nsproxy init_nsproxy;
 #define INIT_PREEMPT_RCU_BOOST(tsk)
 #endif /* #else #ifdef CONFIG_PREEMPT_RCU_BOOST */
 
+#ifdef CONFIG_PREEMPT_RT
+# define INIT_RW_OWNERS(tsk) .owned_read_locks = {			\
+		[0 ... (MAX_RWLOCK_DEPTH - 1) ] = { .task = &tsk } },
+#else
+# define INIT_RW_OWNERS(tsk)
+#endif
+
 extern struct group_info init_groups;
 
 #define INIT_STRUCT_PID {						\
@@ -189,6 +196,7 @@ extern struct group_info init_groups;
 	INIT_TRACE_IRQFLAGS						\
 	INIT_LOCKDEP							\
 	INIT_PREEMPT_RCU_BOOST(tsk)					\
+	INIT_RW_OWNERS(tsk)						\
 }
 
 
Index: linux-2.6.24.4-rt4/kernel/fork.c
===================================================================
--- linux-2.6.24.4-rt4.orig/kernel/fork.c	2008-03-25 23:13:17.000000000 -0400
+++ linux-2.6.24.4-rt4/kernel/fork.c	2008-03-25 23:14:47.000000000 -0400
@@ -1214,6 +1214,7 @@ static struct task_struct *copy_process(
 			INIT_LIST_HEAD(&p->owned_read_locks[i].list);
 			p->owned_read_locks[i].count = 0;
 			p->owned_read_locks[i].lock = NULL;
+			p->owned_read_locks[i].task = p;
 		}
 	}
 #endif

-- 

      parent reply	other threads:[~2008-04-25 14:22 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-04-25 13:09 [PATCH RT 0/6] New read/write locks for PI and multiple readers Steven Rostedt
2008-04-25 13:09 ` [PATCH RT 1/6] add framework for multi readers on rwsems Steven Rostedt
2008-04-25 13:09 ` [PATCH RT 2/6] implement rwlocks management Steven Rostedt
2008-04-25 13:09 ` [PATCH RT 3/6] map tasks to reader locks held Steven Rostedt
2008-04-25 13:09 ` [PATCH RT 4/6] implement reader limit on read write locks Steven Rostedt
2008-04-25 13:09 ` [PATCH RT 5/6] map read/write locks back to their readers Steven Rostedt
2008-04-25 13:09 ` Steven Rostedt [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080425142110.380944244@goodmis.org \
    --to=rostedt@goodmis.org \
    --cc=acme@ghostprotocols.net \
    --cc=clark.williams@gmail.com \
    --cc=ghaskins@novell.com \
    --cc=jonathan@jonmasters.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rt-users@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=peterz@infradead.org \
    --cc=srostedt@redhat.com \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox