* [PATCH RT 1/6] add framework for multi readers on rwsems
2008-04-25 13:09 [PATCH RT 0/6] New read/write locks for PI and multiple readers Steven Rostedt
@ 2008-04-25 13:09 ` Steven Rostedt
2008-04-25 13:09 ` [PATCH RT 2/6] implement rwlocks management Steven Rostedt
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Steven Rostedt @ 2008-04-25 13:09 UTC (permalink / raw)
To: linux-kernel, linux-rt-users
Cc: Ingo Molnar, Steven Rostedt, Peter Zijlstra, Thomas Gleixner,
Clark Williams, Arnaldo Carvalho de Melo, Jon Masters,
Gregory Haskins, Steven Rostedt
[-- Attachment #1: rwsems-mulitple-readers.patch --]
[-- Type: text/plain, Size: 31664 bytes --]
Add the frame work for multiple readers and implemnt the code for
rwsem first.
A new structure is created called rw_mutex. This is used by PREEMPT_RT
rwsems and will later be incorporated with rwlocks.
The rw_mutex lock encapsulates the rt_mutex for use with rwsems (and later
rwlocks). This patch is just the ground work. It simply allows for mulitple
readers to grab the lock. This disables PI for readers. That is, when
a writer is blocked on a rwsem with readers, it will not boost the readers.
That work will be done later in the patch series.
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
include/linux/lockdep.h | 13
include/linux/rt_lock.h | 13
kernel/rt.c | 64 ----
kernel/rtmutex.c | 706 +++++++++++++++++++++++++++++++++++++++++++++++-
kernel/rtmutex_common.h | 57 +++
5 files changed, 795 insertions(+), 58 deletions(-)
Index: linux-2.6.24.4-rt4/include/linux/rt_lock.h
===================================================================
--- linux-2.6.24.4-rt4.orig/include/linux/rt_lock.h 2008-03-25 16:41:48.000000000 -0400
+++ linux-2.6.24.4-rt4/include/linux/rt_lock.h 2008-03-25 21:39:23.000000000 -0400
@@ -60,6 +60,12 @@ typedef raw_spinlock_t spinlock_t;
#ifdef CONFIG_PREEMPT_RT
+struct rw_mutex {
+ struct task_struct *owner;
+ struct rt_mutex mutex;
+ atomic_t count; /* number of times held for read */
+};
+
/*
* RW-semaphores are a spinlock plus a reader-depth count.
*
@@ -71,8 +77,7 @@ typedef raw_spinlock_t spinlock_t;
* fair and makes it simpler as well:
*/
struct rw_semaphore {
- struct rt_mutex lock;
- int read_depth;
+ struct rw_mutex owners;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif
@@ -189,7 +194,7 @@ extern int __bad_func_type(void);
*/
#define __RWSEM_INITIALIZER(name) \
- { .lock = __RT_MUTEX_INITIALIZER(name.lock), \
+ { .owners.mutex = __RT_MUTEX_INITIALIZER(name.owners.mutex), \
RW_DEP_MAP_INIT(name) }
#define DECLARE_RWSEM(lockname) \
@@ -222,7 +227,7 @@ extern void fastcall rt_up_read(struct r
extern void fastcall rt_up_write(struct rw_semaphore *rwsem);
extern void fastcall rt_downgrade_write(struct rw_semaphore *rwsem);
-# define rt_rwsem_is_locked(rws) (rt_mutex_is_locked(&(rws)->lock))
+# define rt_rwsem_is_locked(rws) ((rws)->owners.owner != NULL)
#define PICK_RWSEM_OP(...) PICK_FUNCTION(struct compat_rw_semaphore *, \
struct rw_semaphore *, ##__VA_ARGS__)
Index: linux-2.6.24.4-rt4/kernel/rtmutex.c
===================================================================
--- linux-2.6.24.4-rt4.orig/kernel/rtmutex.c 2008-03-25 16:41:48.000000000 -0400
+++ linux-2.6.24.4-rt4/kernel/rtmutex.c 2008-03-25 22:39:14.000000000 -0400
@@ -81,6 +81,7 @@ static void fixup_rt_mutex_waiters(struc
*/
#if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES)
# define rt_mutex_cmpxchg(l,c,n) (cmpxchg(&l->owner, c, n) == c)
+# define rt_rwlock_cmpxchg(rwm,c,n) (cmpxchg(&(rwm)->owner, c, n) == c)
static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
{
unsigned long owner, *p = (unsigned long *) &lock->owner;
@@ -89,13 +90,31 @@ static inline void mark_rt_mutex_waiters
owner = *p;
} while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner);
}
+#ifdef CONFIG_PREEMPT_RT
+static inline void mark_rt_rwlock_check(struct rw_mutex *rwm)
+{
+ unsigned long owner, *p = (unsigned long *) &rwm->owner;
+
+ do {
+ owner = *p;
+ } while (cmpxchg(p, owner, owner | RT_RWLOCK_CHECK) != owner);
+}
+#endif /* CONFIG_PREEMPT_RT */
#else
# define rt_mutex_cmpxchg(l,c,n) (0)
+# define rt_rwlock_cmpxchg(l,c,n) ({ (void)c; (void)n; 0; })
static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
{
lock->owner = (struct task_struct *)
((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
}
+#ifdef CONFIG_PREEMPT_RT
+static inline void mark_rt_rwlock_check(struct rw_mutex *rwm)
+{
+ rwm->owner = (struct task_struct *)
+ ((unsigned long)rwm->owner | RT_RWLOCK_CHECK);
+}
+#endif /* CONFIG_PREEMPT_RT */
#endif
int pi_initialized;
@@ -276,6 +295,13 @@ static int rt_mutex_adjust_prio_chain(st
/* Grab the next task */
task = rt_mutex_owner(lock);
+
+ /* Writers do not boost their readers. */
+ if (task == RT_RW_READER) {
+ spin_unlock_irqrestore(&lock->wait_lock, flags);
+ goto out;
+ }
+
get_task_struct(task);
spin_lock(&task->pi_lock);
@@ -309,7 +335,7 @@ static int rt_mutex_adjust_prio_chain(st
spin_unlock_irqrestore(&task->pi_lock, flags);
out_put_task:
put_task_struct(task);
-
+ out:
return ret;
}
@@ -329,6 +355,8 @@ static inline int try_to_steal_lock(stru
if (pendowner == current)
return 1;
+ WARN_ON(rt_mutex_owner(lock) == RT_RW_READER);
+
spin_lock(&pendowner->pi_lock);
if (current->prio >= pendowner->prio) {
spin_unlock(&pendowner->pi_lock);
@@ -451,6 +479,10 @@ static int task_blocks_on_rt_mutex(struc
spin_unlock(¤t->pi_lock);
if (waiter == rt_mutex_top_waiter(lock)) {
+ /* readers are not handled */
+ if (owner == RT_RW_READER)
+ return 0;
+
spin_lock(&owner->pi_lock);
plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
@@ -463,7 +495,7 @@ static int task_blocks_on_rt_mutex(struc
else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock))
chain_walk = 1;
- if (!chain_walk)
+ if (!chain_walk || owner == RT_RW_READER)
return 0;
/*
@@ -563,7 +595,7 @@ static void remove_waiter(struct rt_mute
current->pi_blocked_on = NULL;
spin_unlock(¤t->pi_lock);
- if (first && owner != current) {
+ if (first && owner != current && owner != RT_RW_READER) {
spin_lock(&owner->pi_lock);
@@ -679,6 +711,7 @@ rt_spin_lock_slowlock(struct rt_mutex *l
debug_rt_mutex_init_waiter(&waiter);
waiter.task = NULL;
+ waiter.write_lock = 0;
spin_lock_irqsave(&lock->wait_lock, flags);
init_lists(lock);
@@ -894,7 +927,671 @@ __rt_spin_lock_init(spinlock_t *lock, ch
}
EXPORT_SYMBOL(__rt_spin_lock_init);
-#endif
+static inline int rt_release_bkl(struct rt_mutex *lock, unsigned long flags);
+static inline void rt_reacquire_bkl(int saved_lock_depth);
+
+static inline void
+rt_rwlock_set_owner(struct rw_mutex *rwm, struct task_struct *owner,
+ unsigned long mask)
+{
+ unsigned long val = (unsigned long)owner | mask;
+
+ rwm->owner = (struct task_struct *)val;
+}
+
+/*
+ * The fast paths of the rw locks do not set up owners to
+ * the mutex. When blocking on an rwlock we must make sure
+ * there exists an owner.
+ */
+static void
+update_rw_mutex_owner(struct rw_mutex *rwm)
+{
+ struct rt_mutex *mutex = &rwm->mutex;
+ struct task_struct *mtxowner;
+
+ mtxowner = rt_mutex_owner(mutex);
+ if (mtxowner)
+ return;
+
+ mtxowner = rt_rwlock_owner(rwm);
+ WARN_ON(!mtxowner);
+ if (rt_rwlock_writer(rwm))
+ WARN_ON(mtxowner == RT_RW_READER);
+ else
+ mtxowner = RT_RW_READER;
+ rt_mutex_set_owner(mutex, mtxowner, 0);
+}
+
+static int try_to_take_rw_read(struct rw_mutex *rwm)
+{
+ struct rt_mutex *mutex = &rwm->mutex;
+ struct rt_mutex_waiter *waiter;
+ struct task_struct *mtxowner;
+
+ assert_spin_locked(&mutex->wait_lock);
+
+ /* mark the lock to force the owner to check on release */
+ mark_rt_rwlock_check(rwm);
+
+ /* is the owner a writer? */
+ if (unlikely(rt_rwlock_writer(rwm)))
+ return 0;
+
+ /* A writer is not the owner, but is a writer waiting */
+ mtxowner = rt_mutex_owner(mutex);
+
+ /* if the owner released it before we marked it then take it */
+ if (!mtxowner && !rt_rwlock_owner(rwm)) {
+ WARN_ON(atomic_read(&rwm->count));
+ rt_rwlock_set_owner(rwm, current, 0);
+ goto taken;
+ }
+
+ if (mtxowner && mtxowner != RT_RW_READER) {
+ if (!try_to_steal_lock(mutex)) {
+ /*
+ * readers don't own the mutex, and rwm shows that a
+ * writer doesn't have it either. If we enter this
+ * condition, then we must be pending.
+ */
+ WARN_ON(!rt_mutex_owner_pending(mutex));
+ /*
+ * Even though we didn't steal the lock, if the owner
+ * is a reader, and we are of higher priority than
+ * any waiting writer, we might still be able to continue.
+ */
+ if (rt_rwlock_pending_writer(rwm))
+ return 0;
+ if (rt_mutex_has_waiters(mutex)) {
+ /* readers don't do PI */
+ waiter = rt_mutex_top_waiter(mutex);
+ if (current->prio >= waiter->task->prio)
+ return 0;
+ /*
+ * The pending reader has PI waiters,
+ * but we are taking the lock.
+ * Remove the waiters from the pending owner.
+ */
+ spin_lock(&mtxowner->pi_lock);
+ plist_del(&waiter->pi_list_entry, &mtxowner->pi_waiters);
+ spin_unlock(&mtxowner->pi_lock);
+ }
+ } else if (rt_mutex_has_waiters(mutex)) {
+ /* Readers don't do PI */
+ waiter = rt_mutex_top_waiter(mutex);
+ spin_lock(¤t->pi_lock);
+ plist_del(&waiter->pi_list_entry, ¤t->pi_waiters);
+ spin_unlock(¤t->pi_lock);
+ }
+ /* Readers never own the mutex */
+ rt_mutex_set_owner(mutex, RT_RW_READER, 0);
+ }
+
+ /* RT_RW_READER forces slow paths */
+ rt_rwlock_set_owner(rwm, RT_RW_READER, 0);
+ taken:
+ rt_mutex_deadlock_account_lock(mutex, current);
+ atomic_inc(&rwm->count);
+ return 1;
+}
+
+static int
+try_to_take_rw_write(struct rw_mutex *rwm)
+{
+ struct rt_mutex *mutex = &rwm->mutex;
+ struct task_struct *own;
+
+ /* mark the lock to force the owner to check on release */
+ mark_rt_rwlock_check(rwm);
+
+ own = rt_rwlock_owner(rwm);
+
+ /* readers or writers? */
+ if ((own && !rt_rwlock_pending(rwm)))
+ return 0;
+
+ WARN_ON(atomic_read(&rwm->count));
+
+ /*
+ * RT_RW_PENDING means that the lock is free, but there are
+ * pending owners on the mutex
+ */
+ WARN_ON(own && !rt_mutex_owner_pending(mutex));
+
+ if (!try_to_take_rt_mutex(mutex))
+ return 0;
+
+ /*
+ * We stole the lock. Add both WRITER and CHECK flags
+ * since we must release the mutex.
+ */
+ rt_rwlock_set_owner(rwm, current, RT_RWLOCK_WRITER | RT_RWLOCK_CHECK);
+
+ return 1;
+}
+
+static void
+rt_read_slowlock(struct rw_mutex *rwm)
+{
+ struct rt_mutex_waiter waiter;
+ struct rt_mutex *mutex = &rwm->mutex;
+ int saved_lock_depth = -1;
+ unsigned long flags;
+
+ spin_lock_irqsave(&mutex->wait_lock, flags);
+ init_lists(mutex);
+
+ if (try_to_take_rw_read(rwm)) {
+ spin_unlock_irqrestore(&mutex->wait_lock, flags);
+ return;
+ }
+ update_rw_mutex_owner(rwm);
+
+ /* Owner is a writer (or a blocked writer). Block on the lock */
+
+ debug_rt_mutex_init_waiter(&waiter);
+ waiter.task = NULL;
+ waiter.write_lock = 0;
+
+ init_lists(mutex);
+
+ /*
+ * We drop the BKL here before we go into the wait loop to avoid a
+ * possible deadlock in the scheduler.
+ */
+ if (unlikely(current->lock_depth >= 0))
+ saved_lock_depth = rt_release_bkl(mutex, flags);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+
+ for (;;) {
+ unsigned long saved_flags;
+
+ /* Try to acquire the lock: */
+ if (try_to_take_rw_read(rwm))
+ break;
+ update_rw_mutex_owner(rwm);
+
+ /*
+ * waiter.task is NULL the first time we come here and
+ * when we have been woken up by the previous owner
+ * but the lock got stolen by a higher prio task.
+ */
+ if (!waiter.task) {
+ task_blocks_on_rt_mutex(mutex, &waiter, 0, flags);
+ /* Wakeup during boost ? */
+ if (unlikely(!waiter.task))
+ continue;
+ }
+ saved_flags = current->flags & PF_NOSCHED;
+ current->flags &= ~PF_NOSCHED;
+
+ spin_unlock_irqrestore(&mutex->wait_lock, flags);
+
+ debug_rt_mutex_print_deadlock(&waiter);
+
+ if (waiter.task)
+ schedule_rt_mutex(mutex);
+
+ spin_lock_irqsave(&mutex->wait_lock, flags);
+
+ current->flags |= saved_flags;
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ }
+
+ set_current_state(TASK_RUNNING);
+
+ if (unlikely(waiter.task))
+ remove_waiter(mutex, &waiter, flags);
+
+ WARN_ON(rt_mutex_owner(mutex) &&
+ rt_mutex_owner(mutex) != current &&
+ rt_mutex_owner(mutex) != RT_RW_READER &&
+ !rt_mutex_owner_pending(mutex));
+
+ spin_unlock_irqrestore(&mutex->wait_lock, flags);
+
+ /* Must we reaquire the BKL? */
+ if (unlikely(saved_lock_depth >= 0))
+ rt_reacquire_bkl(saved_lock_depth);
+
+ debug_rt_mutex_free_waiter(&waiter);
+}
+
+static inline void
+rt_read_fastlock(struct rw_mutex *rwm,
+ void fastcall (*slowfn)(struct rw_mutex *rwm))
+{
+retry:
+ if (likely(rt_rwlock_cmpxchg(rwm, NULL, current))) {
+ rt_mutex_deadlock_account_lock(&rwm->mutex, current);
+ atomic_inc(&rwm->count);
+ /*
+ * It is possible that the owner was zeroed
+ * before we incremented count. If owner is not
+ * current, then retry again
+ */
+ if (unlikely(rwm->owner != current)) {
+ atomic_dec(&rwm->count);
+ goto retry;
+ }
+ } else
+ slowfn(rwm);
+}
+
+void fastcall rt_mutex_down_read(struct rw_mutex *rwm)
+{
+ rt_read_fastlock(rwm, rt_read_slowlock);
+}
+
+
+static inline int
+rt_read_slowtrylock(struct rw_mutex *rwm)
+{
+ struct rt_mutex *mutex = &rwm->mutex;
+ unsigned long flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&mutex->wait_lock, flags);
+ init_lists(mutex);
+
+ if (try_to_take_rw_read(rwm))
+ ret = 1;
+
+ spin_unlock_irqrestore(&mutex->wait_lock, flags);
+
+ return ret;
+}
+
+static inline int
+rt_read_fasttrylock(struct rw_mutex *rwm,
+ int fastcall (*slowfn)(struct rw_mutex *rwm))
+{
+retry:
+ if (likely(rt_rwlock_cmpxchg(rwm, NULL, current))) {
+ rt_mutex_deadlock_account_lock(&rwm->mutex, current);
+ atomic_inc(&rwm->count);
+ /*
+ * It is possible that the owner was zeroed
+ * before we incremented count. If owner is not
+ * current, then retry again
+ */
+ if (unlikely(rwm->owner != current)) {
+ atomic_dec(&rwm->count);
+ goto retry;
+ }
+ return 1;
+ } else
+ return slowfn(rwm);
+}
+
+int __sched rt_mutex_down_read_trylock(struct rw_mutex *rwm)
+{
+ return rt_read_fasttrylock(rwm, rt_read_slowtrylock);
+}
+
+static void
+rt_write_slowlock(struct rw_mutex *rwm)
+{
+ struct rt_mutex *mutex = &rwm->mutex;
+ struct rt_mutex_waiter waiter;
+ int saved_lock_depth = -1;
+ unsigned long flags;
+
+ debug_rt_mutex_init_waiter(&waiter);
+ waiter.task = NULL;
+
+ /* we do PI different for writers that are blocked */
+ waiter.write_lock = 1;
+
+ spin_lock_irqsave(&mutex->wait_lock, flags);
+ init_lists(mutex);
+
+ if (try_to_take_rw_write(rwm)) {
+ spin_unlock_irqrestore(&mutex->wait_lock, flags);
+ return;
+ }
+ update_rw_mutex_owner(rwm);
+
+ /*
+ * We drop the BKL here before we go into the wait loop to avoid a
+ * possible deadlock in the scheduler.
+ */
+ if (unlikely(current->lock_depth >= 0))
+ saved_lock_depth = rt_release_bkl(mutex, flags);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+
+ for (;;) {
+ unsigned long saved_flags;
+
+ /* Try to acquire the lock: */
+ if (try_to_take_rw_write(rwm))
+ break;
+ update_rw_mutex_owner(rwm);
+
+ /*
+ * waiter.task is NULL the first time we come here and
+ * when we have been woken up by the previous owner
+ * but the lock got stolen by a higher prio task.
+ */
+ if (!waiter.task) {
+ task_blocks_on_rt_mutex(mutex, &waiter, 0, flags);
+ /* Wakeup during boost ? */
+ if (unlikely(!waiter.task))
+ continue;
+ }
+ saved_flags = current->flags & PF_NOSCHED;
+ current->flags &= ~PF_NOSCHED;
+
+ spin_unlock_irqrestore(&mutex->wait_lock, flags);
+
+ debug_rt_mutex_print_deadlock(&waiter);
+
+ if (waiter.task)
+ schedule_rt_mutex(mutex);
+
+ spin_lock_irqsave(&mutex->wait_lock, flags);
+
+ current->flags |= saved_flags;
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ }
+
+ set_current_state(TASK_RUNNING);
+
+ if (unlikely(waiter.task))
+ remove_waiter(mutex, &waiter, flags);
+
+ /* check on unlock if we have any waiters. */
+ if (rt_mutex_has_waiters(mutex))
+ mark_rt_rwlock_check(rwm);
+
+ spin_unlock_irqrestore(&mutex->wait_lock, flags);
+
+ /* Must we reaquire the BKL? */
+ if (unlikely(saved_lock_depth >= 0))
+ rt_reacquire_bkl(saved_lock_depth);
+
+ WARN_ON(atomic_read(&rwm->count));
+
+ debug_rt_mutex_free_waiter(&waiter);
+
+}
+
+static inline void
+rt_write_fastlock(struct rw_mutex *rwm,
+ void fastcall (*slowfn)(struct rw_mutex *rwm))
+{
+ unsigned long val = (unsigned long)current | RT_RWLOCK_WRITER;
+
+ if (likely(rt_rwlock_cmpxchg(rwm, NULL, val))) {
+ rt_mutex_deadlock_account_lock(&rwm->mutex, current);
+ WARN_ON(atomic_read(&rwm->count));
+ } else
+ slowfn(rwm);
+}
+
+void fastcall rt_mutex_down_write(struct rw_mutex *rwm)
+{
+ rt_write_fastlock(rwm, rt_write_slowlock);
+}
+
+static int
+rt_write_slowtrylock(struct rw_mutex *rwm)
+{
+ struct rt_mutex *mutex = &rwm->mutex;
+ unsigned long flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&mutex->wait_lock, flags);
+ init_lists(mutex);
+
+ if (try_to_take_rw_write(rwm))
+ ret = 1;
+
+ spin_unlock_irqrestore(&mutex->wait_lock, flags);
+
+ return ret;
+}
+
+static inline int
+rt_write_fasttrylock(struct rw_mutex *rwm,
+ int fastcall (*slowfn)(struct rw_mutex *rwm))
+{
+ unsigned long val = (unsigned long)current | RT_RWLOCK_WRITER;
+
+ if (likely(rt_rwlock_cmpxchg(rwm, NULL, val))) {
+ rt_mutex_deadlock_account_lock(&rwm->mutex, current);
+ WARN_ON(atomic_read(&rwm->count));
+ return 1;
+ } else
+ return slowfn(rwm);
+}
+
+int fastcall rt_mutex_down_write_trylock(struct rw_mutex *rwm)
+{
+ return rt_write_fasttrylock(rwm, rt_write_slowtrylock);
+}
+
+static void fastcall noinline __sched
+rt_read_slowunlock(struct rw_mutex *rwm)
+{
+ struct rt_mutex *mutex = &rwm->mutex;
+ unsigned long flags;
+ struct rt_mutex_waiter *waiter;
+
+ spin_lock_irqsave(&mutex->wait_lock, flags);
+
+ rt_mutex_deadlock_account_unlock(current);
+
+ /*
+ * To prevent multiple readers from zeroing out the owner
+ * when the count goes to zero and then have another task
+ * grab the task. We mark the lock. This makes all tasks
+ * go to the slow path. Then we can check the owner without
+ * worry that it changed.
+ */
+ mark_rt_rwlock_check(rwm);
+
+ /*
+ * If there are more readers, let the last one do any wakeups.
+ * Also check to make sure the owner wasn't cleared when two
+ * readers released the lock at the same time, and the count
+ * went to zero before grabbing the wait_lock.
+ */
+ if (atomic_read(&rwm->count) ||
+ (rt_rwlock_owner(rwm) != current &&
+ rt_rwlock_owner(rwm) != RT_RW_READER)) {
+ spin_unlock_irqrestore(&mutex->wait_lock, flags);
+ return;
+ }
+
+ /* If no one is blocked, then clear all ownership */
+ if (!rt_mutex_has_waiters(mutex)) {
+ /* We could still have a pending reader waiting */
+ if (rt_mutex_owner_pending(mutex)) {
+ /* set the rwm back to pending */
+ rwm->owner = RT_RW_PENDING_READ;
+ } else {
+ rwm->owner = NULL;
+ mutex->owner = NULL;
+ }
+ goto out;
+ }
+
+ /* We are the last reader with pending waiters. */
+ waiter = rt_mutex_top_waiter(mutex);
+ if (waiter->write_lock)
+ rwm->owner = RT_RW_PENDING_WRITE;
+ else
+ rwm->owner = RT_RW_PENDING_READ;
+
+ /*
+ * It is possible to have a reader waiting. We still only
+ * wake one up in that case. A way we can have a reader waiting
+ * is because a writer woke up, a higher prio reader came
+ * and stole the lock from the writer. But the writer now
+ * is no longer waiting on the lock and needs to retake
+ * the lock. We simply wake up the reader and let the
+ * reader have the lock. If the writer comes by, it
+ * will steal the lock from the reader. This is the
+ * only time we can have a reader pending on a lock.
+ */
+ wakeup_next_waiter(mutex, 0);
+
+ out:
+ spin_unlock_irqrestore(&mutex->wait_lock, flags);
+
+ /* Undo pi boosting.when necessary */
+ rt_mutex_adjust_prio(current);
+}
+
+static inline void
+rt_read_fastunlock(struct rw_mutex *rwm,
+ void fastcall (*slowfn)(struct rw_mutex *rwm))
+{
+ WARN_ON(!atomic_read(&rwm->count));
+ WARN_ON(!rwm->owner);
+ atomic_dec(&rwm->count);
+ if (likely(rt_rwlock_cmpxchg(rwm, current, NULL)))
+ rt_mutex_deadlock_account_unlock(current);
+ else
+ slowfn(rwm);
+}
+
+void fastcall rt_mutex_up_read(struct rw_mutex *rwm)
+{
+ rt_read_fastunlock(rwm, rt_read_slowunlock);
+}
+
+static void fastcall noinline __sched
+rt_write_slowunlock(struct rw_mutex *rwm)
+{
+ struct rt_mutex *mutex = &rwm->mutex;
+ struct rt_mutex_waiter *waiter;
+ struct task_struct *pendowner;
+ unsigned long flags;
+
+ spin_lock_irqsave(&mutex->wait_lock, flags);
+
+ rt_mutex_deadlock_account_unlock(current);
+
+ if (!rt_mutex_has_waiters(mutex)) {
+ rwm->owner = NULL;
+ mutex->owner = NULL;
+ spin_unlock_irqrestore(&mutex->wait_lock, flags);
+ return;
+ }
+
+ debug_rt_mutex_unlock(mutex);
+
+ /*
+ * This is where it gets a bit tricky.
+ * We can have both readers and writers waiting below us.
+ * They are ordered by priority. For each reader we wake
+ * up, we check to see if there's another reader waiting.
+ * If that is the case, we continue to wake up the readers
+ * until we hit a writer. Once we hit a writer, then we
+ * stop (and don't wake it up).
+ *
+ * If the next waiter is a writer, than we just wake up
+ * the writer and we are done.
+ */
+
+ waiter = rt_mutex_top_waiter(mutex);
+ pendowner = waiter->task;
+ wakeup_next_waiter(mutex, 0);
+
+ /* another writer is next? */
+ if (waiter->write_lock) {
+ rwm->owner = RT_RW_PENDING_WRITE;
+ goto out;
+ }
+
+ rwm->owner = RT_RW_PENDING_READ;
+
+ if (!rt_mutex_has_waiters(mutex))
+ goto out;
+
+ spin_lock(&pendowner->pi_lock);
+ /*
+ * Wake up all readers.
+ * This gets a bit more complex. More than one reader can't
+ * own the mutex. We give it to the first (highest prio)
+ * reader, and then wake up the rest of the readers until
+ * we wake up all readers or come to a writer. The woken
+ * up readers that don't own the lock will try to take it
+ * when they schedule. Doing this lets a high prio writer
+ * come along and steal the lock.
+ */
+ waiter = rt_mutex_top_waiter(mutex);
+ while (waiter && !waiter->write_lock) {
+ struct task_struct *reader = waiter->task;
+
+ plist_del(&waiter->list_entry, &mutex->wait_list);
+
+ /* nop if not on a list */
+ plist_del(&waiter->pi_list_entry, &pendowner->pi_waiters);
+
+ waiter->task = NULL;
+ reader->pi_blocked_on = NULL;
+
+ wake_up_process(reader);
+
+ if (rt_mutex_has_waiters(mutex))
+ waiter = rt_mutex_top_waiter(mutex);
+ else
+ waiter = NULL;
+ }
+
+ /* If a writer is still pending, then update its plist. */
+ if (rt_mutex_has_waiters(mutex)) {
+ struct rt_mutex_waiter *next;
+
+ next = rt_mutex_top_waiter(mutex);
+ /* delete incase we didn't go through the loop */
+ plist_del(&next->pi_list_entry, &pendowner->pi_waiters);
+ /* add back in as top waiter */
+ plist_add(&next->pi_list_entry, &pendowner->pi_waiters);
+ }
+ spin_unlock(&pendowner->pi_lock);
+
+ out:
+
+ spin_unlock_irqrestore(&mutex->wait_lock, flags);
+
+ /* Undo pi boosting.when necessary */
+ rt_mutex_adjust_prio(current);
+}
+
+static inline void
+rt_write_fastunlock(struct rw_mutex *rwm,
+ void fastcall (*slowfn)(struct rw_mutex *rwm))
+{
+ unsigned long val = (unsigned long)current | RT_RWLOCK_WRITER;
+
+ WARN_ON(rt_rwlock_owner(rwm) != current);
+ if (likely(rt_rwlock_cmpxchg(rwm, (struct task_struct *)val, NULL)))
+ rt_mutex_deadlock_account_unlock(current);
+ else
+ slowfn(rwm);
+}
+
+void fastcall rt_mutex_up_write(struct rw_mutex *rwm)
+{
+ rt_write_fastunlock(rwm, rt_write_slowunlock);
+}
+
+void rt_mutex_rwsem_init(struct rw_mutex *rwm, const char *name)
+{
+ struct rt_mutex *mutex = &rwm->mutex;
+
+ rwm->owner = NULL;
+ atomic_set(&rwm->count, 0);
+
+ __rt_mutex_init(mutex, name);
+}
+
+#endif /* CONFIG_PREEMPT_RT */
#ifdef CONFIG_PREEMPT_BKL
@@ -942,6 +1639,7 @@ rt_mutex_slowlock(struct rt_mutex *lock,
debug_rt_mutex_init_waiter(&waiter);
waiter.task = NULL;
+ waiter.write_lock = 0;
spin_lock_irqsave(&lock->wait_lock, flags);
init_lists(lock);
Index: linux-2.6.24.4-rt4/kernel/rtmutex_common.h
===================================================================
--- linux-2.6.24.4-rt4.orig/kernel/rtmutex_common.h 2008-03-25 16:41:48.000000000 -0400
+++ linux-2.6.24.4-rt4/kernel/rtmutex_common.h 2008-03-25 21:45:43.000000000 -0400
@@ -13,6 +13,7 @@
#define __KERNEL_RTMUTEX_COMMON_H
#include <linux/rtmutex.h>
+#include <linux/rt_lock.h>
/*
* The rtmutex in kernel tester is independent of rtmutex debugging. We
@@ -43,12 +44,14 @@ extern void schedule_rt_mutex_test(struc
* @list_entry: pi node to enqueue into the mutex waiters list
* @pi_list_entry: pi node to enqueue into the mutex owner waiters list
* @task: task reference to the blocked task
+ * @write_lock: true if blocked as writer
*/
struct rt_mutex_waiter {
struct plist_node list_entry;
struct plist_node pi_list_entry;
struct task_struct *task;
struct rt_mutex *lock;
+ int write_lock;
#ifdef CONFIG_DEBUG_RT_MUTEXES
unsigned long ip;
pid_t deadlock_task_pid;
@@ -112,6 +115,60 @@ static inline unsigned long rt_mutex_own
return (unsigned long)lock->owner & RT_MUTEX_OWNER_PENDING;
}
+#ifdef CONFIG_PREEMPT_RT
+/*
+ * rw_mutex->owner state tracking
+ */
+#define RT_RWLOCK_CHECK 1UL
+#define RT_RWLOCK_WRITER 2UL
+#define RT_RWLOCK_MASKALL 3UL
+
+/* used as reader owner of the mutex */
+#define RT_RW_READER (struct task_struct *)0x100
+
+/* used when a writer releases the lock with waiters */
+/* pending owner is a reader */
+#define RT_RW_PENDING_READ (struct task_struct *)0x200
+/* pending owner is a writer */
+#define RT_RW_PENDING_WRITE (struct task_struct *)0x400
+/* Either of the above is true */
+#define RT_RW_PENDING_MASK (0x600 | RT_RWLOCK_MASKALL)
+
+/* Return true if lock is not owned but has pending owners */
+static inline int rt_rwlock_pending(struct rw_mutex *rwm)
+{
+ unsigned long owner = (unsigned long)rwm->owner;
+ return (owner & RT_RW_PENDING_MASK) == owner;
+}
+
+static inline int rt_rwlock_pending_writer(struct rw_mutex *rwm)
+{
+ unsigned long owner = (unsigned long)rwm->owner;
+ return rt_rwlock_pending(rwm) &&
+ (owner & (unsigned long)RT_RW_PENDING_WRITE);
+}
+
+static inline struct task_struct *rt_rwlock_owner(struct rw_mutex *rwm)
+{
+ return (struct task_struct *)
+ ((unsigned long)rwm->owner & ~RT_RWLOCK_MASKALL);
+}
+
+static inline unsigned long rt_rwlock_writer(struct rw_mutex *rwm)
+{
+ return (unsigned long)rwm->owner & RT_RWLOCK_WRITER;
+}
+
+extern void rt_mutex_up_write(struct rw_mutex *rwm);
+extern void rt_mutex_up_read(struct rw_mutex *rwm);
+extern int rt_mutex_down_write_trylock(struct rw_mutex *rwm);
+extern void rt_mutex_down_write(struct rw_mutex *rwm);
+extern int rt_mutex_down_read_trylock(struct rw_mutex *rwm);
+extern void rt_mutex_down_read(struct rw_mutex *rwm);
+extern void rt_mutex_rwsem_init(struct rw_mutex *rwm, const char *name);
+
+#endif /* CONFIG_PREEMPT_RT */
+
/*
* PI-futex support (proxy locking functions, etc.):
*/
Index: linux-2.6.24.4-rt4/kernel/rt.c
===================================================================
--- linux-2.6.24.4-rt4.orig/kernel/rt.c 2008-03-25 16:41:48.000000000 -0400
+++ linux-2.6.24.4-rt4/kernel/rt.c 2008-03-25 21:38:23.000000000 -0400
@@ -301,26 +301,14 @@ EXPORT_SYMBOL(__rt_rwlock_init);
void fastcall rt_up_write(struct rw_semaphore *rwsem)
{
rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
- rt_mutex_unlock(&rwsem->lock);
+ rt_mutex_up_write(&rwsem->owners);
}
EXPORT_SYMBOL(rt_up_write);
void fastcall rt_up_read(struct rw_semaphore *rwsem)
{
- unsigned long flags;
-
rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
- /*
- * Read locks within the self-held write lock succeed.
- */
- spin_lock_irqsave(&rwsem->lock.wait_lock, flags);
- if (rt_mutex_real_owner(&rwsem->lock) == current && rwsem->read_depth) {
- spin_unlock_irqrestore(&rwsem->lock.wait_lock, flags);
- rwsem->read_depth--;
- return;
- }
- spin_unlock_irqrestore(&rwsem->lock.wait_lock, flags);
- rt_mutex_unlock(&rwsem->lock);
+ rt_mutex_up_read(&rwsem->owners);
}
EXPORT_SYMBOL(rt_up_read);
@@ -336,7 +324,7 @@ EXPORT_SYMBOL(rt_downgrade_write);
int fastcall rt_down_write_trylock(struct rw_semaphore *rwsem)
{
- int ret = rt_mutex_trylock(&rwsem->lock);
+ int ret = rt_mutex_down_write_trylock(&rwsem->owners);
if (ret)
rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
@@ -344,38 +332,29 @@ int fastcall rt_down_write_trylock(struc
}
EXPORT_SYMBOL(rt_down_write_trylock);
+static void __rt_down_write(struct rw_semaphore *rwsem, int subclass)
+{
+ rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_);
+ LOCK_CONTENDED_RT_RW(rwsem, rt_mutex_down_write_trylock, rt_mutex_down_write);
+}
+
void fastcall rt_down_write(struct rw_semaphore *rwsem)
{
- rwsem_acquire(&rwsem->dep_map, 0, 0, _RET_IP_);
- LOCK_CONTENDED_RT(rwsem, rt_mutex_trylock, rt_mutex_lock);
+ __rt_down_write(rwsem, 0);
}
EXPORT_SYMBOL(rt_down_write);
void fastcall rt_down_write_nested(struct rw_semaphore *rwsem, int subclass)
{
- rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_);
- LOCK_CONTENDED_RT(rwsem, rt_mutex_trylock, rt_mutex_lock);
+ __rt_down_write(rwsem, subclass);
}
EXPORT_SYMBOL(rt_down_write_nested);
int fastcall rt_down_read_trylock(struct rw_semaphore *rwsem)
{
- unsigned long flags;
int ret;
- /*
- * Read locks within the self-held write lock succeed.
- */
- spin_lock_irqsave(&rwsem->lock.wait_lock, flags);
- if (rt_mutex_real_owner(&rwsem->lock) == current) {
- spin_unlock_irqrestore(&rwsem->lock.wait_lock, flags);
- rwsem_acquire_read(&rwsem->dep_map, 0, 1, _RET_IP_);
- rwsem->read_depth++;
- return 1;
- }
- spin_unlock_irqrestore(&rwsem->lock.wait_lock, flags);
-
- ret = rt_mutex_trylock(&rwsem->lock);
+ ret = rt_mutex_down_read_trylock(&rwsem->owners);
if (ret)
rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
return ret;
@@ -384,22 +363,8 @@ EXPORT_SYMBOL(rt_down_read_trylock);
static void __rt_down_read(struct rw_semaphore *rwsem, int subclass)
{
- unsigned long flags;
-
rwsem_acquire_read(&rwsem->dep_map, subclass, 0, _RET_IP_);
-
- /*
- * Read locks within the write lock succeed.
- */
- spin_lock_irqsave(&rwsem->lock.wait_lock, flags);
-
- if (rt_mutex_real_owner(&rwsem->lock) == current) {
- spin_unlock_irqrestore(&rwsem->lock.wait_lock, flags);
- rwsem->read_depth++;
- return;
- }
- spin_unlock_irqrestore(&rwsem->lock.wait_lock, flags);
- LOCK_CONTENDED_RT(rwsem, rt_mutex_trylock, rt_mutex_lock);
+ LOCK_CONTENDED_RT_RW(rwsem, rt_mutex_down_read_trylock, rt_mutex_down_read);
}
void fastcall rt_down_read(struct rw_semaphore *rwsem)
@@ -424,8 +389,7 @@ void fastcall __rt_rwsem_init(struct rw_
debug_check_no_locks_freed((void *)rwsem, sizeof(*rwsem));
lockdep_init_map(&rwsem->dep_map, name, key, 0);
#endif
- __rt_mutex_init(&rwsem->lock, name);
- rwsem->read_depth = 0;
+ rt_mutex_rwsem_init(&rwsem->owners, name);
}
EXPORT_SYMBOL(__rt_rwsem_init);
Index: linux-2.6.24.4-rt4/include/linux/lockdep.h
===================================================================
--- linux-2.6.24.4-rt4.orig/include/linux/lockdep.h 2008-03-25 16:41:48.000000000 -0400
+++ linux-2.6.24.4-rt4/include/linux/lockdep.h 2008-03-25 21:38:23.000000000 -0400
@@ -383,6 +383,16 @@ do { \
ret; \
})
+#define LOCK_CONTENDED_RT_RW(_lock, f_try, f_lock) \
+do { \
+ if (!f_try(&(_lock)->owners)) { \
+ lock_contended(&(_lock)->dep_map, _RET_IP_); \
+ f_lock(&(_lock)->owners); \
+ } \
+ lock_acquired(&(_lock)->dep_map); \
+} while (0)
+
+
#else /* CONFIG_LOCK_STAT */
#define lock_contended(lockdep_map, ip) do {} while (0)
@@ -397,6 +407,9 @@ do { \
#define LOCK_CONTENDED_RT_RET(_lock, f_try, f_lock) \
f_lock(&(_lock)->lock)
+#define LOCK_CONTENDED_RT_RW(_lock, f_try, f_lock) \
+ f_lock(&(_lock)->owners)
+
#endif /* CONFIG_LOCK_STAT */
#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_GENERIC_HARDIRQS)
--
^ permalink raw reply [flat|nested] 7+ messages in thread* [PATCH RT 2/6] implement rwlocks management
2008-04-25 13:09 [PATCH RT 0/6] New read/write locks for PI and multiple readers Steven Rostedt
2008-04-25 13:09 ` [PATCH RT 1/6] add framework for multi readers on rwsems Steven Rostedt
@ 2008-04-25 13:09 ` Steven Rostedt
2008-04-25 13:09 ` [PATCH RT 3/6] map tasks to reader locks held Steven Rostedt
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Steven Rostedt @ 2008-04-25 13:09 UTC (permalink / raw)
To: linux-kernel, linux-rt-users
Cc: Ingo Molnar, Steven Rostedt, Peter Zijlstra, Thomas Gleixner,
Clark Williams, Arnaldo Carvalho de Melo, Jon Masters,
Gregory Haskins, Steven Rostedt
[-- Attachment #1: rwlocks-multiple-readers.patch --]
[-- Type: text/plain, Size: 16677 bytes --]
This patch adds the managment for rwlocks to have multiple readers.
Like the rwsems, it does not do PI boosting on readers when a writer
is blocked.
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
include/linux/rt_lock.h | 5 -
include/linux/spinlock.h | 2
kernel/rt.c | 56 ++--------------
kernel/rtmutex.c | 158 ++++++++++++++++++++++++++++++++++-------------
kernel/rtmutex_common.h | 4 +
5 files changed, 129 insertions(+), 96 deletions(-)
Index: linux-2.6.24.4-rt4/include/linux/rt_lock.h
===================================================================
--- linux-2.6.24.4-rt4.orig/include/linux/rt_lock.h 2008-03-25 21:39:23.000000000 -0400
+++ linux-2.6.24.4-rt4/include/linux/rt_lock.h 2008-03-25 22:54:24.000000000 -0400
@@ -87,8 +87,7 @@ struct rw_semaphore {
* rwlocks - an RW semaphore plus lock-break field:
*/
typedef struct {
- struct rt_mutex lock;
- int read_depth;
+ struct rw_mutex owners;
unsigned int break_lock;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
@@ -96,7 +95,7 @@ typedef struct {
} rwlock_t;
#define __RW_LOCK_UNLOCKED(name) (rwlock_t) \
- { .lock = __RT_SPIN_INITIALIZER(name), \
+ { .owners.mutex = __RT_SPIN_INITIALIZER(name.owners.mutex), \
RW_DEP_MAP_INIT(name) }
#else /* !PREEMPT_RT */
Index: linux-2.6.24.4-rt4/include/linux/spinlock.h
===================================================================
--- linux-2.6.24.4-rt4.orig/include/linux/spinlock.h 2008-03-25 16:41:48.000000000 -0400
+++ linux-2.6.24.4-rt4/include/linux/spinlock.h 2008-03-25 22:54:24.000000000 -0400
@@ -266,7 +266,7 @@ do { \
#ifdef CONFIG_PREEMPT_RT
# define rt_read_can_lock(rwl) (!rt_mutex_is_locked(&(rwl)->lock))
-# define rt_write_can_lock(rwl) (!rt_mutex_is_locked(&(rwl)->lock))
+# define rt_write_can_lock(rwl) ((rwl)->owners.owner == NULL)
#else
extern int rt_rwlock_can_lock_never_call_on_non_rt(rwlock_t *rwlock);
# define rt_read_can_lock(rwl) rt_rwlock_can_lock_never_call_on_non_rt(rwl)
Index: linux-2.6.24.4-rt4/kernel/rt.c
===================================================================
--- linux-2.6.24.4-rt4.orig/kernel/rt.c 2008-03-25 21:38:23.000000000 -0400
+++ linux-2.6.24.4-rt4/kernel/rt.c 2008-03-25 22:54:24.000000000 -0400
@@ -165,7 +165,7 @@ EXPORT_SYMBOL(_mutex_unlock);
*/
int __lockfunc rt_write_trylock(rwlock_t *rwlock)
{
- int ret = rt_mutex_trylock(&rwlock->lock);
+ int ret = rt_mutex_down_write_trylock(&rwlock->owners);
if (ret)
rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
@@ -183,23 +183,9 @@ EXPORT_SYMBOL(rt_write_trylock_irqsave);
int __lockfunc rt_read_trylock(rwlock_t *rwlock)
{
- struct rt_mutex *lock = &rwlock->lock;
- unsigned long flags;
int ret;
- /*
- * Read locks within the self-held write lock succeed.
- */
- spin_lock_irqsave(&lock->wait_lock, flags);
- if (rt_mutex_real_owner(lock) == current) {
- spin_unlock_irqrestore(&lock->wait_lock, flags);
- rwlock->read_depth++;
- rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_);
- return 1;
- }
- spin_unlock_irqrestore(&lock->wait_lock, flags);
-
- ret = rt_mutex_trylock(lock);
+ ret = rt_mutex_down_read_trylock(&rwlock->owners);
if (ret)
rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_);
@@ -210,27 +196,14 @@ EXPORT_SYMBOL(rt_read_trylock);
void __lockfunc rt_write_lock(rwlock_t *rwlock)
{
rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
- LOCK_CONTENDED_RT(rwlock, rt_mutex_trylock, __rt_spin_lock);
+ LOCK_CONTENDED_RT_RW(rwlock, rt_mutex_down_write_trylock, rt_rwlock_write_lock);
}
EXPORT_SYMBOL(rt_write_lock);
void __lockfunc rt_read_lock(rwlock_t *rwlock)
{
- unsigned long flags;
- struct rt_mutex *lock = &rwlock->lock;
-
rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_);
- /*
- * Read locks within the write lock succeed.
- */
- spin_lock_irqsave(&lock->wait_lock, flags);
- if (rt_mutex_real_owner(lock) == current) {
- spin_unlock_irqrestore(&lock->wait_lock, flags);
- rwlock->read_depth++;
- return;
- }
- spin_unlock_irqrestore(&lock->wait_lock, flags);
- LOCK_CONTENDED_RT(rwlock, rt_mutex_trylock, __rt_spin_lock);
+ LOCK_CONTENDED_RT_RW(rwlock, rt_mutex_down_read_trylock, rt_rwlock_read_lock);
}
EXPORT_SYMBOL(rt_read_lock);
@@ -239,28 +212,14 @@ void __lockfunc rt_write_unlock(rwlock_t
{
/* NOTE: we always pass in '1' for nested, for simplicity */
rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
- __rt_spin_unlock(&rwlock->lock);
+ rt_rwlock_write_unlock(&rwlock->owners);
}
EXPORT_SYMBOL(rt_write_unlock);
void __lockfunc rt_read_unlock(rwlock_t *rwlock)
{
- struct rt_mutex *lock = &rwlock->lock;
- unsigned long flags;
-
rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
- // TRACE_WARN_ON(lock->save_state != 1);
- /*
- * Read locks within the self-held write lock succeed.
- */
- spin_lock_irqsave(&lock->wait_lock, flags);
- if (rt_mutex_real_owner(lock) == current && rwlock->read_depth) {
- spin_unlock_irqrestore(&lock->wait_lock, flags);
- rwlock->read_depth--;
- return;
- }
- spin_unlock_irqrestore(&lock->wait_lock, flags);
- __rt_spin_unlock(&rwlock->lock);
+ rt_rwlock_read_unlock(&rwlock->owners);
}
EXPORT_SYMBOL(rt_read_unlock);
@@ -289,8 +248,7 @@ void __rt_rwlock_init(rwlock_t *rwlock,
debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock));
lockdep_init_map(&rwlock->dep_map, name, key, 0);
#endif
- __rt_mutex_init(&rwlock->lock, name);
- rwlock->read_depth = 0;
+ rt_mutex_rwsem_init(&rwlock->owners, name);
}
EXPORT_SYMBOL(__rt_rwlock_init);
Index: linux-2.6.24.4-rt4/kernel/rtmutex.c
===================================================================
--- linux-2.6.24.4-rt4.orig/kernel/rtmutex.c 2008-03-25 22:39:14.000000000 -0400
+++ linux-2.6.24.4-rt4/kernel/rtmutex.c 2008-03-25 22:54:24.000000000 -0400
@@ -1072,12 +1072,12 @@ try_to_take_rw_write(struct rw_mutex *rw
}
static void
-rt_read_slowlock(struct rw_mutex *rwm)
+rt_read_slowlock(struct rw_mutex *rwm, int mtx)
{
struct rt_mutex_waiter waiter;
struct rt_mutex *mutex = &rwm->mutex;
int saved_lock_depth = -1;
- unsigned long flags;
+ unsigned long saved_state = -1, state, flags;
spin_lock_irqsave(&mutex->wait_lock, flags);
init_lists(mutex);
@@ -1096,13 +1096,19 @@ rt_read_slowlock(struct rw_mutex *rwm)
init_lists(mutex);
- /*
- * We drop the BKL here before we go into the wait loop to avoid a
- * possible deadlock in the scheduler.
- */
- if (unlikely(current->lock_depth >= 0))
- saved_lock_depth = rt_release_bkl(mutex, flags);
- set_current_state(TASK_UNINTERRUPTIBLE);
+ if (mtx) {
+ /*
+ * We drop the BKL here before we go into the wait loop to avoid a
+ * possible deadlock in the scheduler.
+ */
+ if (unlikely(current->lock_depth >= 0))
+ saved_lock_depth = rt_release_bkl(mutex, flags);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ } else {
+ /* Spin lock must preserve BKL */
+ saved_state = xchg(¤t->state, TASK_UNINTERRUPTIBLE);
+ saved_lock_depth = current->lock_depth;
+ }
for (;;) {
unsigned long saved_flags;
@@ -1125,21 +1131,36 @@ rt_read_slowlock(struct rw_mutex *rwm)
}
saved_flags = current->flags & PF_NOSCHED;
current->flags &= ~PF_NOSCHED;
+ if (!mtx)
+ current->lock_depth = -1;
spin_unlock_irqrestore(&mutex->wait_lock, flags);
debug_rt_mutex_print_deadlock(&waiter);
- if (waiter.task)
+ if (!mtx || waiter.task)
schedule_rt_mutex(mutex);
spin_lock_irqsave(&mutex->wait_lock, flags);
current->flags |= saved_flags;
- set_current_state(TASK_UNINTERRUPTIBLE);
+ if (mtx)
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ else {
+ current->lock_depth = saved_lock_depth;
+ state = xchg(¤t->state, TASK_UNINTERRUPTIBLE);
+ if (unlikely(state == TASK_RUNNING))
+ saved_state = TASK_RUNNING;
+ }
}
- set_current_state(TASK_RUNNING);
+ if (mtx)
+ set_current_state(TASK_RUNNING);
+ else {
+ state = xchg(¤t->state, saved_state);
+ if (unlikely(state == TASK_RUNNING))
+ current->state = TASK_RUNNING;
+ }
if (unlikely(waiter.task))
remove_waiter(mutex, &waiter, flags);
@@ -1152,7 +1173,7 @@ rt_read_slowlock(struct rw_mutex *rwm)
spin_unlock_irqrestore(&mutex->wait_lock, flags);
/* Must we reaquire the BKL? */
- if (unlikely(saved_lock_depth >= 0))
+ if (mtx && unlikely(saved_lock_depth >= 0))
rt_reacquire_bkl(saved_lock_depth);
debug_rt_mutex_free_waiter(&waiter);
@@ -1160,7 +1181,8 @@ rt_read_slowlock(struct rw_mutex *rwm)
static inline void
rt_read_fastlock(struct rw_mutex *rwm,
- void fastcall (*slowfn)(struct rw_mutex *rwm))
+ void fastcall (*slowfn)(struct rw_mutex *rwm, int mtx),
+ int mtx)
{
retry:
if (likely(rt_rwlock_cmpxchg(rwm, NULL, current))) {
@@ -1176,12 +1198,17 @@ retry:
goto retry;
}
} else
- slowfn(rwm);
+ slowfn(rwm, mtx);
}
void fastcall rt_mutex_down_read(struct rw_mutex *rwm)
{
- rt_read_fastlock(rwm, rt_read_slowlock);
+ rt_read_fastlock(rwm, rt_read_slowlock, 1);
+}
+
+void fastcall rt_rwlock_read_lock(struct rw_mutex *rwm)
+{
+ rt_read_fastlock(rwm, rt_read_slowlock, 0);
}
@@ -1231,12 +1258,12 @@ int __sched rt_mutex_down_read_trylock(s
}
static void
-rt_write_slowlock(struct rw_mutex *rwm)
+rt_write_slowlock(struct rw_mutex *rwm, int mtx)
{
struct rt_mutex *mutex = &rwm->mutex;
struct rt_mutex_waiter waiter;
int saved_lock_depth = -1;
- unsigned long flags;
+ unsigned long flags, saved_state = -1, state;
debug_rt_mutex_init_waiter(&waiter);
waiter.task = NULL;
@@ -1253,13 +1280,19 @@ rt_write_slowlock(struct rw_mutex *rwm)
}
update_rw_mutex_owner(rwm);
- /*
- * We drop the BKL here before we go into the wait loop to avoid a
- * possible deadlock in the scheduler.
- */
- if (unlikely(current->lock_depth >= 0))
- saved_lock_depth = rt_release_bkl(mutex, flags);
- set_current_state(TASK_UNINTERRUPTIBLE);
+ if (mtx) {
+ /*
+ * We drop the BKL here before we go into the wait loop to avoid a
+ * possible deadlock in the scheduler.
+ */
+ if (unlikely(current->lock_depth >= 0))
+ saved_lock_depth = rt_release_bkl(mutex, flags);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ } else {
+ /* Spin locks must preserve the BKL */
+ saved_lock_depth = current->lock_depth;
+ saved_state = xchg(¤t->state, TASK_UNINTERRUPTIBLE);
+ }
for (;;) {
unsigned long saved_flags;
@@ -1282,21 +1315,36 @@ rt_write_slowlock(struct rw_mutex *rwm)
}
saved_flags = current->flags & PF_NOSCHED;
current->flags &= ~PF_NOSCHED;
+ if (!mtx)
+ current->lock_depth = -1;
spin_unlock_irqrestore(&mutex->wait_lock, flags);
debug_rt_mutex_print_deadlock(&waiter);
- if (waiter.task)
+ if (!mtx || waiter.task)
schedule_rt_mutex(mutex);
spin_lock_irqsave(&mutex->wait_lock, flags);
current->flags |= saved_flags;
- set_current_state(TASK_UNINTERRUPTIBLE);
+ if (mtx)
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ else {
+ current->lock_depth = saved_lock_depth;
+ state = xchg(¤t->state, TASK_UNINTERRUPTIBLE);
+ if (unlikely(state == TASK_RUNNING))
+ saved_state = TASK_RUNNING;
+ }
}
- set_current_state(TASK_RUNNING);
+ if (mtx)
+ set_current_state(TASK_RUNNING);
+ else {
+ state = xchg(¤t->state, saved_state);
+ if (unlikely(state == TASK_RUNNING))
+ current->state = TASK_RUNNING;
+ }
if (unlikely(waiter.task))
remove_waiter(mutex, &waiter, flags);
@@ -1308,7 +1356,7 @@ rt_write_slowlock(struct rw_mutex *rwm)
spin_unlock_irqrestore(&mutex->wait_lock, flags);
/* Must we reaquire the BKL? */
- if (unlikely(saved_lock_depth >= 0))
+ if (mtx && unlikely(saved_lock_depth >= 0))
rt_reacquire_bkl(saved_lock_depth);
WARN_ON(atomic_read(&rwm->count));
@@ -1319,7 +1367,8 @@ rt_write_slowlock(struct rw_mutex *rwm)
static inline void
rt_write_fastlock(struct rw_mutex *rwm,
- void fastcall (*slowfn)(struct rw_mutex *rwm))
+ void fastcall (*slowfn)(struct rw_mutex *rwm, int mtx),
+ int mtx)
{
unsigned long val = (unsigned long)current | RT_RWLOCK_WRITER;
@@ -1327,12 +1376,17 @@ rt_write_fastlock(struct rw_mutex *rwm,
rt_mutex_deadlock_account_lock(&rwm->mutex, current);
WARN_ON(atomic_read(&rwm->count));
} else
- slowfn(rwm);
+ slowfn(rwm, mtx);
}
void fastcall rt_mutex_down_write(struct rw_mutex *rwm)
{
- rt_write_fastlock(rwm, rt_write_slowlock);
+ rt_write_fastlock(rwm, rt_write_slowlock, 1);
+}
+
+void fastcall rt_rwlock_write_lock(struct rw_mutex *rwm)
+{
+ rt_write_fastlock(rwm, rt_write_slowlock, 0);
}
static int
@@ -1373,10 +1427,11 @@ int fastcall rt_mutex_down_write_trylock
}
static void fastcall noinline __sched
-rt_read_slowunlock(struct rw_mutex *rwm)
+rt_read_slowunlock(struct rw_mutex *rwm, int mtx)
{
struct rt_mutex *mutex = &rwm->mutex;
unsigned long flags;
+ int savestate = !mtx;
struct rt_mutex_waiter *waiter;
spin_lock_irqsave(&mutex->wait_lock, flags);
@@ -1436,7 +1491,7 @@ rt_read_slowunlock(struct rw_mutex *rwm)
* will steal the lock from the reader. This is the
* only time we can have a reader pending on a lock.
*/
- wakeup_next_waiter(mutex, 0);
+ wakeup_next_waiter(mutex, savestate);
out:
spin_unlock_irqrestore(&mutex->wait_lock, flags);
@@ -1447,7 +1502,8 @@ rt_read_slowunlock(struct rw_mutex *rwm)
static inline void
rt_read_fastunlock(struct rw_mutex *rwm,
- void fastcall (*slowfn)(struct rw_mutex *rwm))
+ void fastcall (*slowfn)(struct rw_mutex *rwm, int mtx),
+ int mtx)
{
WARN_ON(!atomic_read(&rwm->count));
WARN_ON(!rwm->owner);
@@ -1455,20 +1511,26 @@ rt_read_fastunlock(struct rw_mutex *rwm,
if (likely(rt_rwlock_cmpxchg(rwm, current, NULL)))
rt_mutex_deadlock_account_unlock(current);
else
- slowfn(rwm);
+ slowfn(rwm, mtx);
}
void fastcall rt_mutex_up_read(struct rw_mutex *rwm)
{
- rt_read_fastunlock(rwm, rt_read_slowunlock);
+ rt_read_fastunlock(rwm, rt_read_slowunlock, 1);
+}
+
+void fastcall rt_rwlock_read_unlock(struct rw_mutex *rwm)
+{
+ rt_read_fastunlock(rwm, rt_read_slowunlock, 0);
}
static void fastcall noinline __sched
-rt_write_slowunlock(struct rw_mutex *rwm)
+rt_write_slowunlock(struct rw_mutex *rwm, int mtx)
{
struct rt_mutex *mutex = &rwm->mutex;
struct rt_mutex_waiter *waiter;
struct task_struct *pendowner;
+ int savestate = !mtx;
unsigned long flags;
spin_lock_irqsave(&mutex->wait_lock, flags);
@@ -1499,7 +1561,7 @@ rt_write_slowunlock(struct rw_mutex *rwm
waiter = rt_mutex_top_waiter(mutex);
pendowner = waiter->task;
- wakeup_next_waiter(mutex, 0);
+ wakeup_next_waiter(mutex, savestate);
/* another writer is next? */
if (waiter->write_lock) {
@@ -1535,7 +1597,10 @@ rt_write_slowunlock(struct rw_mutex *rwm
waiter->task = NULL;
reader->pi_blocked_on = NULL;
- wake_up_process(reader);
+ if (savestate)
+ wake_up_process_mutex(reader);
+ else
+ wake_up_process(reader);
if (rt_mutex_has_waiters(mutex))
waiter = rt_mutex_top_waiter(mutex);
@@ -1565,7 +1630,9 @@ rt_write_slowunlock(struct rw_mutex *rwm
static inline void
rt_write_fastunlock(struct rw_mutex *rwm,
- void fastcall (*slowfn)(struct rw_mutex *rwm))
+ void fastcall (*slowfn)(struct rw_mutex *rwm,
+ int mtx),
+ int mtx)
{
unsigned long val = (unsigned long)current | RT_RWLOCK_WRITER;
@@ -1573,12 +1640,17 @@ rt_write_fastunlock(struct rw_mutex *rwm
if (likely(rt_rwlock_cmpxchg(rwm, (struct task_struct *)val, NULL)))
rt_mutex_deadlock_account_unlock(current);
else
- slowfn(rwm);
+ slowfn(rwm, mtx);
}
void fastcall rt_mutex_up_write(struct rw_mutex *rwm)
{
- rt_write_fastunlock(rwm, rt_write_slowunlock);
+ rt_write_fastunlock(rwm, rt_write_slowunlock, 1);
+}
+
+void fastcall rt_rwlock_write_unlock(struct rw_mutex *rwm)
+{
+ rt_write_fastunlock(rwm, rt_write_slowunlock, 0);
}
void rt_mutex_rwsem_init(struct rw_mutex *rwm, const char *name)
Index: linux-2.6.24.4-rt4/kernel/rtmutex_common.h
===================================================================
--- linux-2.6.24.4-rt4.orig/kernel/rtmutex_common.h 2008-03-25 21:45:43.000000000 -0400
+++ linux-2.6.24.4-rt4/kernel/rtmutex_common.h 2008-03-25 22:54:24.000000000 -0400
@@ -166,6 +166,10 @@ extern void rt_mutex_down_write(struct r
extern int rt_mutex_down_read_trylock(struct rw_mutex *rwm);
extern void rt_mutex_down_read(struct rw_mutex *rwm);
extern void rt_mutex_rwsem_init(struct rw_mutex *rwm, const char *name);
+extern void rt_rwlock_write_lock(struct rw_mutex *rwm);
+extern void rt_rwlock_read_lock(struct rw_mutex *rwm);
+extern void rt_rwlock_write_unlock(struct rw_mutex *rwm);
+extern void rt_rwlock_read_unlock(struct rw_mutex *rwm);
#endif /* CONFIG_PREEMPT_RT */
--
^ permalink raw reply [flat|nested] 7+ messages in thread* [PATCH RT 3/6] map tasks to reader locks held
2008-04-25 13:09 [PATCH RT 0/6] New read/write locks for PI and multiple readers Steven Rostedt
2008-04-25 13:09 ` [PATCH RT 1/6] add framework for multi readers on rwsems Steven Rostedt
2008-04-25 13:09 ` [PATCH RT 2/6] implement rwlocks management Steven Rostedt
@ 2008-04-25 13:09 ` Steven Rostedt
2008-04-25 13:09 ` [PATCH RT 4/6] implement reader limit on read write locks Steven Rostedt
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Steven Rostedt @ 2008-04-25 13:09 UTC (permalink / raw)
To: linux-kernel, linux-rt-users
Cc: Ingo Molnar, Steven Rostedt, Peter Zijlstra, Thomas Gleixner,
Clark Williams, Arnaldo Carvalho de Melo, Jon Masters,
Gregory Haskins, Steven Rostedt
[-- Attachment #1: multi-reader-account.patch --]
[-- Type: text/plain, Size: 6444 bytes --]
This patch keeps track of all reader locks that are held for a task.
The max depth is currently set to 5. A task may own the same lock
multiple times for read without affecting this limit. It is bad programming
practice to hold more than 5 different locks for read at the same time
anyway so this should not be a problem. The 5 lock limit should be way
more than enough.
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
include/linux/sched.h | 14 ++++++++++
kernel/fork.c | 4 +++
kernel/rtmutex.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++----
3 files changed, 80 insertions(+), 4 deletions(-)
Index: linux-2.6.24.4-rt4/include/linux/sched.h
===================================================================
--- linux-2.6.24.4-rt4.orig/include/linux/sched.h 2008-03-25 16:41:48.000000000 -0400
+++ linux-2.6.24.4-rt4/include/linux/sched.h 2008-03-25 22:55:46.000000000 -0400
@@ -1005,6 +1005,14 @@ struct sched_entity {
#endif
};
+#ifdef CONFIG_PREEMPT_RT
+struct rw_mutex;
+struct reader_lock_struct {
+ struct rw_mutex *lock;
+ int count;
+};
+
+#endif
struct task_struct {
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
void *stack;
@@ -1226,6 +1234,12 @@ struct task_struct {
#endif
#define MAX_PREEMPT_TRACE 25
+#define MAX_RWLOCK_DEPTH 5
+
+#ifdef CONFIG_PREEMPT_RT
+ int reader_lock_count;
+ struct reader_lock_struct owned_read_locks[MAX_RWLOCK_DEPTH];
+#endif
#ifdef CONFIG_PREEMPT_TRACE
unsigned long preempt_trace_eip[MAX_PREEMPT_TRACE];
Index: linux-2.6.24.4-rt4/kernel/rtmutex.c
===================================================================
--- linux-2.6.24.4-rt4.orig/kernel/rtmutex.c 2008-03-25 22:54:24.000000000 -0400
+++ linux-2.6.24.4-rt4/kernel/rtmutex.c 2008-03-25 22:55:46.000000000 -0400
@@ -968,6 +968,8 @@ static int try_to_take_rw_read(struct rw
struct rt_mutex *mutex = &rwm->mutex;
struct rt_mutex_waiter *waiter;
struct task_struct *mtxowner;
+ int reader_count, i;
+ int incr = 1;
assert_spin_locked(&mutex->wait_lock);
@@ -978,6 +980,16 @@ static int try_to_take_rw_read(struct rw
if (unlikely(rt_rwlock_writer(rwm)))
return 0;
+ /* check to see if we don't already own this lock */
+ for (i = current->reader_lock_count - 1; i >= 0; i--) {
+ if (current->owned_read_locks[i].lock == rwm) {
+ rt_rwlock_set_owner(rwm, RT_RW_READER, 0);
+ current->owned_read_locks[i].count++;
+ incr = 0;
+ goto taken;
+ }
+ }
+
/* A writer is not the owner, but is a writer waiting */
mtxowner = rt_mutex_owner(mutex);
@@ -1031,6 +1043,14 @@ static int try_to_take_rw_read(struct rw
/* RT_RW_READER forces slow paths */
rt_rwlock_set_owner(rwm, RT_RW_READER, 0);
taken:
+ if (incr) {
+ reader_count = current->reader_lock_count++;
+ if (likely(reader_count < MAX_RWLOCK_DEPTH)) {
+ current->owned_read_locks[reader_count].lock = rwm;
+ current->owned_read_locks[reader_count].count = 1;
+ } else
+ WARN_ON_ONCE(1);
+ }
rt_mutex_deadlock_account_lock(mutex, current);
atomic_inc(&rwm->count);
return 1;
@@ -1184,10 +1204,13 @@ rt_read_fastlock(struct rw_mutex *rwm,
void fastcall (*slowfn)(struct rw_mutex *rwm, int mtx),
int mtx)
{
-retry:
+ retry:
if (likely(rt_rwlock_cmpxchg(rwm, NULL, current))) {
+ int reader_count;
+
rt_mutex_deadlock_account_lock(&rwm->mutex, current);
atomic_inc(&rwm->count);
+ smp_mb();
/*
* It is possible that the owner was zeroed
* before we incremented count. If owner is not
@@ -1197,6 +1220,13 @@ retry:
atomic_dec(&rwm->count);
goto retry;
}
+
+ reader_count = current->reader_lock_count++;
+ if (likely(reader_count < MAX_RWLOCK_DEPTH)) {
+ current->owned_read_locks[reader_count].lock = rwm;
+ current->owned_read_locks[reader_count].count = 1;
+ } else
+ WARN_ON_ONCE(1);
} else
slowfn(rwm, mtx);
}
@@ -1236,6 +1266,8 @@ rt_read_fasttrylock(struct rw_mutex *rwm
{
retry:
if (likely(rt_rwlock_cmpxchg(rwm, NULL, current))) {
+ int reader_count;
+
rt_mutex_deadlock_account_lock(&rwm->mutex, current);
atomic_inc(&rwm->count);
/*
@@ -1247,6 +1279,13 @@ retry:
atomic_dec(&rwm->count);
goto retry;
}
+
+ reader_count = current->reader_lock_count++;
+ if (likely(reader_count < MAX_RWLOCK_DEPTH)) {
+ current->owned_read_locks[reader_count].lock = rwm;
+ current->owned_read_locks[reader_count].count = 1;
+ } else
+ WARN_ON_ONCE(1);
return 1;
} else
return slowfn(rwm);
@@ -1430,9 +1469,10 @@ static void fastcall noinline __sched
rt_read_slowunlock(struct rw_mutex *rwm, int mtx)
{
struct rt_mutex *mutex = &rwm->mutex;
+ struct rt_mutex_waiter *waiter;
unsigned long flags;
int savestate = !mtx;
- struct rt_mutex_waiter *waiter;
+ int i;
spin_lock_irqsave(&mutex->wait_lock, flags);
@@ -1447,6 +1487,18 @@ rt_read_slowunlock(struct rw_mutex *rwm,
*/
mark_rt_rwlock_check(rwm);
+ for (i = current->reader_lock_count - 1; i >= 0; i--) {
+ if (current->owned_read_locks[i].lock == rwm) {
+ current->owned_read_locks[i].count--;
+ if (!current->owned_read_locks[i].count) {
+ current->reader_lock_count--;
+ WARN_ON_ONCE(i != current->reader_lock_count);
+ }
+ break;
+ }
+ }
+ WARN_ON_ONCE(i < 0);
+
/*
* If there are more readers, let the last one do any wakeups.
* Also check to make sure the owner wasn't cleared when two
@@ -1508,9 +1560,15 @@ rt_read_fastunlock(struct rw_mutex *rwm,
WARN_ON(!atomic_read(&rwm->count));
WARN_ON(!rwm->owner);
atomic_dec(&rwm->count);
- if (likely(rt_rwlock_cmpxchg(rwm, current, NULL)))
+ if (likely(rt_rwlock_cmpxchg(rwm, current, NULL))) {
+ int reader_count = --current->reader_lock_count;
rt_mutex_deadlock_account_unlock(current);
- else
+ if (unlikely(reader_count < 0)) {
+ reader_count = 0;
+ WARN_ON_ONCE(1);
+ }
+ WARN_ON_ONCE(current->owned_read_locks[reader_count].lock != rwm);
+ } else
slowfn(rwm, mtx);
}
Index: linux-2.6.24.4-rt4/kernel/fork.c
===================================================================
--- linux-2.6.24.4-rt4.orig/kernel/fork.c 2008-03-25 16:41:48.000000000 -0400
+++ linux-2.6.24.4-rt4/kernel/fork.c 2008-03-25 22:55:46.000000000 -0400
@@ -1206,6 +1206,10 @@ static struct task_struct *copy_process(
p->lock_count = 0;
#endif
+#ifdef CONFIG_PREEMPT_RT
+ p->reader_lock_count = 0;
+#endif
+
if (pid != &init_struct_pid) {
retval = -ENOMEM;
pid = alloc_pid(task_active_pid_ns(p));
--
^ permalink raw reply [flat|nested] 7+ messages in thread* [PATCH RT 4/6] implement reader limit on read write locks
2008-04-25 13:09 [PATCH RT 0/6] New read/write locks for PI and multiple readers Steven Rostedt
` (2 preceding siblings ...)
2008-04-25 13:09 ` [PATCH RT 3/6] map tasks to reader locks held Steven Rostedt
@ 2008-04-25 13:09 ` Steven Rostedt
2008-04-25 13:09 ` [PATCH RT 5/6] map read/write locks back to their readers Steven Rostedt
2008-04-25 13:09 ` [PATCH RT 6/6] read lock Priority Inheritance implementation Steven Rostedt
5 siblings, 0 replies; 7+ messages in thread
From: Steven Rostedt @ 2008-04-25 13:09 UTC (permalink / raw)
To: linux-kernel, linux-rt-users
Cc: Ingo Molnar, Steven Rostedt, Peter Zijlstra, Thomas Gleixner,
Clark Williams, Arnaldo Carvalho de Melo, Jon Masters,
Gregory Haskins, Steven Rostedt
[-- Attachment #1: multi-reader-limit.patch --]
[-- Type: text/plain, Size: 8427 bytes --]
This patch allows for limiting the number of readers a lock may have.
The limit is default to "no limit". The read write locks now keep
track of, not only the number of times a lock is held by read, but also
the number of tasks that have a reader. i.e. If 2 tasks hold the same
read/write lock, and one task holds the lock twice, the count for the
read/write lock would be 3 and the owner count is 2.
The limit of readers is controlled by
/proc/sys/kernel/rwlock_reader_limit
If this is set to zero or negative, than there is no limit.
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
include/linux/rt_lock.h | 1
kernel/rtmutex.c | 89 +++++++++++++++++++++++++++++++++++-------------
kernel/sysctl.c | 14 +++++++
3 files changed, 80 insertions(+), 24 deletions(-)
Index: linux-2.6.24.4-rt4/include/linux/rt_lock.h
===================================================================
--- linux-2.6.24.4-rt4.orig/include/linux/rt_lock.h 2008-03-25 22:54:24.000000000 -0400
+++ linux-2.6.24.4-rt4/include/linux/rt_lock.h 2008-03-25 23:00:46.000000000 -0400
@@ -64,6 +64,7 @@ struct rw_mutex {
struct task_struct *owner;
struct rt_mutex mutex;
atomic_t count; /* number of times held for read */
+ atomic_t owners; /* number of owners as readers */
};
/*
Index: linux-2.6.24.4-rt4/kernel/rtmutex.c
===================================================================
--- linux-2.6.24.4-rt4.orig/kernel/rtmutex.c 2008-03-25 22:55:46.000000000 -0400
+++ linux-2.6.24.4-rt4/kernel/rtmutex.c 2008-03-25 23:06:32.000000000 -0400
@@ -927,6 +927,8 @@ __rt_spin_lock_init(spinlock_t *lock, ch
}
EXPORT_SYMBOL(__rt_spin_lock_init);
+int rt_rwlock_limit;
+
static inline int rt_release_bkl(struct rt_mutex *lock, unsigned long flags);
static inline void rt_reacquire_bkl(int saved_lock_depth);
@@ -1000,6 +1002,10 @@ static int try_to_take_rw_read(struct rw
goto taken;
}
+ /* Check for rwlock limits */
+ if (rt_rwlock_limit && atomic_read(&rwm->owners) >= rt_rwlock_limit)
+ return 0;
+
if (mtxowner && mtxowner != RT_RW_READER) {
if (!try_to_steal_lock(mutex)) {
/*
@@ -1044,6 +1050,7 @@ static int try_to_take_rw_read(struct rw
rt_rwlock_set_owner(rwm, RT_RW_READER, 0);
taken:
if (incr) {
+ atomic_inc(&rwm->owners);
reader_count = current->reader_lock_count++;
if (likely(reader_count < MAX_RWLOCK_DEPTH)) {
current->owned_read_locks[reader_count].lock = rwm;
@@ -1221,6 +1228,7 @@ rt_read_fastlock(struct rw_mutex *rwm,
goto retry;
}
+ atomic_inc(&rwm->owners);
reader_count = current->reader_lock_count++;
if (likely(reader_count < MAX_RWLOCK_DEPTH)) {
current->owned_read_locks[reader_count].lock = rwm;
@@ -1280,6 +1288,7 @@ retry:
goto retry;
}
+ atomic_inc(&rwm->owners);
reader_count = current->reader_lock_count++;
if (likely(reader_count < MAX_RWLOCK_DEPTH)) {
current->owned_read_locks[reader_count].lock = rwm;
@@ -1471,6 +1480,7 @@ rt_read_slowunlock(struct rw_mutex *rwm,
struct rt_mutex *mutex = &rwm->mutex;
struct rt_mutex_waiter *waiter;
unsigned long flags;
+ unsigned int reader_count;
int savestate = !mtx;
int i;
@@ -1493,6 +1503,7 @@ rt_read_slowunlock(struct rw_mutex *rwm,
if (!current->owned_read_locks[i].count) {
current->reader_lock_count--;
WARN_ON_ONCE(i != current->reader_lock_count);
+ atomic_dec(&rwm->owners);
}
break;
}
@@ -1500,20 +1511,34 @@ rt_read_slowunlock(struct rw_mutex *rwm,
WARN_ON_ONCE(i < 0);
/*
- * If there are more readers, let the last one do any wakeups.
- * Also check to make sure the owner wasn't cleared when two
- * readers released the lock at the same time, and the count
- * went to zero before grabbing the wait_lock.
+ * If the last two (or more) readers unlocked at the same
+ * time, the owner could be cleared since the count went to
+ * zero. If this has happened, the rwm owner will not
+ * be set to current or readers. This means that another reader
+ * already reset the lock, so there is nothing left to do.
*/
- if (atomic_read(&rwm->count) ||
- (rt_rwlock_owner(rwm) != current &&
- rt_rwlock_owner(rwm) != RT_RW_READER)) {
- spin_unlock_irqrestore(&mutex->wait_lock, flags);
- return;
- }
+ if ((rt_rwlock_owner(rwm) != current &&
+ rt_rwlock_owner(rwm) != RT_RW_READER))
+ goto out;
+
+ /*
+ * If there are more readers and we are under the limit
+ * let the last reader do the wakeups.
+ */
+ reader_count = atomic_read(&rwm->count);
+ if (reader_count &&
+ (!rt_rwlock_limit || atomic_read(&rwm->owners) >= rt_rwlock_limit))
+ goto out;
/* If no one is blocked, then clear all ownership */
if (!rt_mutex_has_waiters(mutex)) {
+ /*
+ * If count is not zero, we are under the limit with
+ * no other readers.
+ */
+ if (reader_count)
+ goto out;
+
/* We could still have a pending reader waiting */
if (rt_mutex_owner_pending(mutex)) {
/* set the rwm back to pending */
@@ -1525,24 +1550,32 @@ rt_read_slowunlock(struct rw_mutex *rwm,
goto out;
}
- /* We are the last reader with pending waiters. */
+ /*
+ * If the next waiter is a reader, this can be because of
+ * two things. One is that we hit the reader limit, or
+ * Two, there is a pending writer.
+ * We still only wake up one reader at a time (even if
+ * we could wake up more). This is because we dont
+ * have any idea if a writer is pending.
+ */
waiter = rt_mutex_top_waiter(mutex);
- if (waiter->write_lock)
+ if (waiter->write_lock) {
+ /* only wake up if there are no readers */
+ if (reader_count)
+ goto out;
rwm->owner = RT_RW_PENDING_WRITE;
- else
+ } else {
+ /*
+ * It is also possible that the reader limit decreased.
+ * If the limit did decrease, we may not be able to
+ * wake up the reader if we are currently above the limit.
+ */
+ if (rt_rwlock_limit &&
+ unlikely(atomic_read(&rwm->owners) >= rt_rwlock_limit))
+ goto out;
rwm->owner = RT_RW_PENDING_READ;
+ }
- /*
- * It is possible to have a reader waiting. We still only
- * wake one up in that case. A way we can have a reader waiting
- * is because a writer woke up, a higher prio reader came
- * and stole the lock from the writer. But the writer now
- * is no longer waiting on the lock and needs to retake
- * the lock. We simply wake up the reader and let the
- * reader have the lock. If the writer comes by, it
- * will steal the lock from the reader. This is the
- * only time we can have a reader pending on a lock.
- */
wakeup_next_waiter(mutex, savestate);
out:
@@ -1558,15 +1591,22 @@ rt_read_fastunlock(struct rw_mutex *rwm,
int mtx)
{
WARN_ON(!atomic_read(&rwm->count));
+ WARN_ON(!atomic_read(&rwm->owners));
WARN_ON(!rwm->owner);
atomic_dec(&rwm->count);
if (likely(rt_rwlock_cmpxchg(rwm, current, NULL))) {
int reader_count = --current->reader_lock_count;
+ int owners;
rt_mutex_deadlock_account_unlock(current);
if (unlikely(reader_count < 0)) {
reader_count = 0;
WARN_ON_ONCE(1);
}
+ owners = atomic_dec_return(&rwm->owners);
+ if (unlikely(owners < 0)) {
+ atomic_set(&rwm->owners, 0);
+ WARN_ON_ONCE(1);
+ }
WARN_ON_ONCE(current->owned_read_locks[reader_count].lock != rwm);
} else
slowfn(rwm, mtx);
@@ -1717,6 +1757,7 @@ void rt_mutex_rwsem_init(struct rw_mutex
rwm->owner = NULL;
atomic_set(&rwm->count, 0);
+ atomic_set(&rwm->owners, 0);
__rt_mutex_init(mutex, name);
}
Index: linux-2.6.24.4-rt4/kernel/sysctl.c
===================================================================
--- linux-2.6.24.4-rt4.orig/kernel/sysctl.c 2008-03-25 16:41:48.000000000 -0400
+++ linux-2.6.24.4-rt4/kernel/sysctl.c 2008-03-25 23:00:12.000000000 -0400
@@ -150,6 +150,10 @@ static int parse_table(int __user *, int
void __user *, size_t, struct ctl_table *);
#endif
+#ifdef CONFIG_PREEMPT_RT
+extern int rt_rwlock_limit;
+#endif
+
#ifdef CONFIG_PROC_SYSCTL
static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp,
@@ -399,6 +403,16 @@ static struct ctl_table kern_table[] = {
.proc_handler = &proc_dointvec,
},
#endif
+#ifdef CONFIG_PREEMPT_RT
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "rwlock_reader_limit",
+ .data = &rt_rwlock_limit,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+#endif
{
.ctl_name = KERN_PANIC,
.procname = "panic",
--
^ permalink raw reply [flat|nested] 7+ messages in thread* [PATCH RT 5/6] map read/write locks back to their readers
2008-04-25 13:09 [PATCH RT 0/6] New read/write locks for PI and multiple readers Steven Rostedt
` (3 preceding siblings ...)
2008-04-25 13:09 ` [PATCH RT 4/6] implement reader limit on read write locks Steven Rostedt
@ 2008-04-25 13:09 ` Steven Rostedt
2008-04-25 13:09 ` [PATCH RT 6/6] read lock Priority Inheritance implementation Steven Rostedt
5 siblings, 0 replies; 7+ messages in thread
From: Steven Rostedt @ 2008-04-25 13:09 UTC (permalink / raw)
To: linux-kernel, linux-rt-users
Cc: Ingo Molnar, Steven Rostedt, Peter Zijlstra, Thomas Gleixner,
Clark Williams, Arnaldo Carvalho de Melo, Jon Masters,
Gregory Haskins, Steven Rostedt
[-- Attachment #1: multi-reader-lock-account.patch --]
[-- Type: text/plain, Size: 14041 bytes --]
This patch adds a mapping from the read/write lock back to the owners
that are readers. This is a link list of tasks that own the lock for read.
The link list is protected by the read/write lock's mutex wait_lock. To
prevent grabbing this spinlock on the fast path, the list in not updated
when there is only one reader. The reader task is pointed to by the
owner field of the rw_mutex. When the second reader grabs the read lock
it will add the first owner to the list under the wait_lock.
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
include/linux/rt_lock.h | 3
include/linux/sched.h | 2
kernel/fork.c | 8 ++
kernel/rtmutex.c | 187 ++++++++++++++++++++++++++++++++++--------------
4 files changed, 146 insertions(+), 54 deletions(-)
Index: linux-2.6.24.4-rt4/include/linux/rt_lock.h
===================================================================
--- linux-2.6.24.4-rt4.orig/include/linux/rt_lock.h 2008-03-25 23:12:15.000000000 -0400
+++ linux-2.6.24.4-rt4/include/linux/rt_lock.h 2008-03-25 23:13:17.000000000 -0400
@@ -65,6 +65,7 @@ struct rw_mutex {
struct rt_mutex mutex;
atomic_t count; /* number of times held for read */
atomic_t owners; /* number of owners as readers */
+ struct list_head readers;
};
/*
@@ -194,7 +195,7 @@ extern int __bad_func_type(void);
*/
#define __RWSEM_INITIALIZER(name) \
- { .owners.mutex = __RT_MUTEX_INITIALIZER(name.owners.mutex), \
+ { .owners.mutex = __RT_MUTEX_INITIALIZER(name.owners.mutex), \
RW_DEP_MAP_INIT(name) }
#define DECLARE_RWSEM(lockname) \
Index: linux-2.6.24.4-rt4/include/linux/sched.h
===================================================================
--- linux-2.6.24.4-rt4.orig/include/linux/sched.h 2008-03-25 23:12:15.000000000 -0400
+++ linux-2.6.24.4-rt4/include/linux/sched.h 2008-03-25 23:13:17.000000000 -0400
@@ -1009,6 +1009,8 @@ struct sched_entity {
struct rw_mutex;
struct reader_lock_struct {
struct rw_mutex *lock;
+ struct list_head list;
+ struct task_struct *task;
int count;
};
Index: linux-2.6.24.4-rt4/kernel/fork.c
===================================================================
--- linux-2.6.24.4-rt4.orig/kernel/fork.c 2008-03-25 23:12:15.000000000 -0400
+++ linux-2.6.24.4-rt4/kernel/fork.c 2008-03-25 23:13:17.000000000 -0400
@@ -1208,6 +1208,14 @@ static struct task_struct *copy_process(
#ifdef CONFIG_PREEMPT_RT
p->reader_lock_count = 0;
+ {
+ int i;
+ for (i = 0; i < MAX_RWLOCK_DEPTH; i++) {
+ INIT_LIST_HEAD(&p->owned_read_locks[i].list);
+ p->owned_read_locks[i].count = 0;
+ p->owned_read_locks[i].lock = NULL;
+ }
+ }
#endif
if (pid != &init_struct_pid) {
Index: linux-2.6.24.4-rt4/kernel/rtmutex.c
===================================================================
--- linux-2.6.24.4-rt4.orig/kernel/rtmutex.c 2008-03-25 23:12:15.000000000 -0400
+++ linux-2.6.24.4-rt4/kernel/rtmutex.c 2008-03-25 23:13:17.000000000 -0400
@@ -941,6 +941,14 @@ rt_rwlock_set_owner(struct rw_mutex *rwm
rwm->owner = (struct task_struct *)val;
}
+static inline void init_rw_lists(struct rw_mutex *rwm)
+{
+ if (unlikely(!rwm->readers.prev)) {
+ init_lists(&rwm->mutex);
+ INIT_LIST_HEAD(&rwm->readers);
+ }
+}
+
/*
* The fast paths of the rw locks do not set up owners to
* the mutex. When blocking on an rwlock we must make sure
@@ -965,11 +973,59 @@ update_rw_mutex_owner(struct rw_mutex *r
rt_mutex_set_owner(mutex, mtxowner, 0);
}
+/*
+ * The fast path does not add itself to the reader list to keep
+ * from needing to grab the spinlock. We need to add the owner
+ * itself. This may seem racy, but in practice, it is fine.
+ * The link list is protected by mutex->wait_lock. But to find
+ * the lock on the owner we need to read the owners reader counter.
+ * That counter is modified only by the owner. We are OK with that
+ * because to remove the lock that we are looking for, the owner
+ * must first grab the mutex->wait_lock. The lock will not disappear
+ * from the owner now, and we don't care if we see other locks
+ * held or not held.
+ */
+
+static inline void
+rt_rwlock_update_owner(struct rw_mutex *rwm, unsigned owners)
+{
+ struct reader_lock_struct *rls;
+ struct task_struct *own;
+ int i;
+
+ if (!owners || rt_rwlock_pending(rwm))
+ return;
+
+ own = rt_rwlock_owner(rwm);
+ if (own == RT_RW_READER)
+ return;
+
+ for (i = own->reader_lock_count - 1; i >= 0; i--) {
+ if (own->owned_read_locks[i].lock == rwm)
+ break;
+ }
+ /* It is possible the owner didn't add it yet */
+ if (i < 0)
+ return;
+
+ rls = &own->owned_read_locks[i];
+ /* It is also possible that the owner added it already */
+ if (rls->list.prev && !list_empty(&rls->list))
+ return;
+
+ list_add(&rls->list, &rwm->readers);
+
+ /* change to reader, so no one else updates too */
+ rt_rwlock_set_owner(rwm, RT_RW_READER, RT_RWLOCK_CHECK);
+}
+
static int try_to_take_rw_read(struct rw_mutex *rwm)
{
struct rt_mutex *mutex = &rwm->mutex;
struct rt_mutex_waiter *waiter;
+ struct reader_lock_struct *rls;
struct task_struct *mtxowner;
+ int owners;
int reader_count, i;
int incr = 1;
@@ -985,8 +1041,15 @@ static int try_to_take_rw_read(struct rw
/* check to see if we don't already own this lock */
for (i = current->reader_lock_count - 1; i >= 0; i--) {
if (current->owned_read_locks[i].lock == rwm) {
+ rls = ¤t->owned_read_locks[i];
+ /*
+ * If this was taken via the fast path, then
+ * it hasn't been added to the link list yet.
+ */
+ if (!rls->list.prev || list_empty(&rls->list))
+ list_add(&rls->list, &rwm->readers);
rt_rwlock_set_owner(rwm, RT_RW_READER, 0);
- current->owned_read_locks[i].count++;
+ rls->count++;
incr = 0;
goto taken;
}
@@ -997,13 +1060,16 @@ static int try_to_take_rw_read(struct rw
/* if the owner released it before we marked it then take it */
if (!mtxowner && !rt_rwlock_owner(rwm)) {
- WARN_ON(atomic_read(&rwm->count));
- rt_rwlock_set_owner(rwm, current, 0);
+ /* Still unlock with the slow path (for PI handling) */
+ rt_rwlock_set_owner(rwm, RT_RW_READER, 0);
goto taken;
}
+ owners = atomic_read(&rwm->owners);
+ rt_rwlock_update_owner(rwm, owners);
+
/* Check for rwlock limits */
- if (rt_rwlock_limit && atomic_read(&rwm->owners) >= rt_rwlock_limit)
+ if (rt_rwlock_limit && owners >= rt_rwlock_limit)
return 0;
if (mtxowner && mtxowner != RT_RW_READER) {
@@ -1053,8 +1119,11 @@ static int try_to_take_rw_read(struct rw
atomic_inc(&rwm->owners);
reader_count = current->reader_lock_count++;
if (likely(reader_count < MAX_RWLOCK_DEPTH)) {
- current->owned_read_locks[reader_count].lock = rwm;
- current->owned_read_locks[reader_count].count = 1;
+ rls = ¤t->owned_read_locks[reader_count];
+ rls->lock = rwm;
+ rls->count = 1;
+ WARN_ON(rls->list.prev && !list_empty(&rls->list));
+ list_add(&rls->list, &rwm->readers);
} else
WARN_ON_ONCE(1);
}
@@ -1074,12 +1143,13 @@ try_to_take_rw_write(struct rw_mutex *rw
own = rt_rwlock_owner(rwm);
+ /* owners must be zero for writer */
+ rt_rwlock_update_owner(rwm, atomic_read(&rwm->owners));
+
/* readers or writers? */
if ((own && !rt_rwlock_pending(rwm)))
return 0;
- WARN_ON(atomic_read(&rwm->count));
-
/*
* RT_RW_PENDING means that the lock is free, but there are
* pending owners on the mutex
@@ -1107,7 +1177,7 @@ rt_read_slowlock(struct rw_mutex *rwm, i
unsigned long saved_state = -1, state, flags;
spin_lock_irqsave(&mutex->wait_lock, flags);
- init_lists(mutex);
+ init_rw_lists(rwm);
if (try_to_take_rw_read(rwm)) {
spin_unlock_irqrestore(&mutex->wait_lock, flags);
@@ -1121,8 +1191,6 @@ rt_read_slowlock(struct rw_mutex *rwm, i
waiter.task = NULL;
waiter.write_lock = 0;
- init_lists(mutex);
-
if (mtx) {
/*
* We drop the BKL here before we go into the wait loop to avoid a
@@ -1206,10 +1274,8 @@ rt_read_slowlock(struct rw_mutex *rwm, i
debug_rt_mutex_free_waiter(&waiter);
}
-static inline void
-rt_read_fastlock(struct rw_mutex *rwm,
- void fastcall (*slowfn)(struct rw_mutex *rwm, int mtx),
- int mtx)
+static inline int
+__rt_read_fasttrylock(struct rw_mutex *rwm)
{
retry:
if (likely(rt_rwlock_cmpxchg(rwm, NULL, current))) {
@@ -1229,13 +1295,41 @@ rt_read_fastlock(struct rw_mutex *rwm,
}
atomic_inc(&rwm->owners);
- reader_count = current->reader_lock_count++;
+ reader_count = current->reader_lock_count;
if (likely(reader_count < MAX_RWLOCK_DEPTH)) {
current->owned_read_locks[reader_count].lock = rwm;
current->owned_read_locks[reader_count].count = 1;
} else
WARN_ON_ONCE(1);
- } else
+ /*
+ * If this task is no longer the sole owner of the lock
+ * or someone is blocking, then we need to add the task
+ * to the lock.
+ */
+ smp_mb();
+ current->reader_lock_count++;
+ if (unlikely(rwm->owner != current)) {
+ struct rt_mutex *mutex = &rwm->mutex;
+ struct reader_lock_struct *rls;
+ unsigned long flags;
+
+ spin_lock_irqsave(&mutex->wait_lock, flags);
+ rls = ¤t->owned_read_locks[reader_count];
+ if (!rls->list.prev || list_empty(&rls->list))
+ list_add(&rls->list, &rwm->readers);
+ spin_unlock_irqrestore(&mutex->wait_lock, flags);
+ }
+ return 1;
+ }
+ return 0;
+}
+
+static inline void
+rt_read_fastlock(struct rw_mutex *rwm,
+ void fastcall (*slowfn)(struct rw_mutex *rwm, int mtx),
+ int mtx)
+{
+ if (unlikely(!__rt_read_fasttrylock(rwm)))
slowfn(rwm, mtx);
}
@@ -1258,7 +1352,7 @@ rt_read_slowtrylock(struct rw_mutex *rwm
int ret = 0;
spin_lock_irqsave(&mutex->wait_lock, flags);
- init_lists(mutex);
+ init_rw_lists(rwm);
if (try_to_take_rw_read(rwm))
ret = 1;
@@ -1272,31 +1366,9 @@ static inline int
rt_read_fasttrylock(struct rw_mutex *rwm,
int fastcall (*slowfn)(struct rw_mutex *rwm))
{
-retry:
- if (likely(rt_rwlock_cmpxchg(rwm, NULL, current))) {
- int reader_count;
-
- rt_mutex_deadlock_account_lock(&rwm->mutex, current);
- atomic_inc(&rwm->count);
- /*
- * It is possible that the owner was zeroed
- * before we incremented count. If owner is not
- * current, then retry again
- */
- if (unlikely(rwm->owner != current)) {
- atomic_dec(&rwm->count);
- goto retry;
- }
-
- atomic_inc(&rwm->owners);
- reader_count = current->reader_lock_count++;
- if (likely(reader_count < MAX_RWLOCK_DEPTH)) {
- current->owned_read_locks[reader_count].lock = rwm;
- current->owned_read_locks[reader_count].count = 1;
- } else
- WARN_ON_ONCE(1);
+ if (likely(__rt_read_fasttrylock(rwm)))
return 1;
- } else
+ else
return slowfn(rwm);
}
@@ -1320,7 +1392,7 @@ rt_write_slowlock(struct rw_mutex *rwm,
waiter.write_lock = 1;
spin_lock_irqsave(&mutex->wait_lock, flags);
- init_lists(mutex);
+ init_rw_lists(rwm);
if (try_to_take_rw_write(rwm)) {
spin_unlock_irqrestore(&mutex->wait_lock, flags);
@@ -1407,8 +1479,6 @@ rt_write_slowlock(struct rw_mutex *rwm,
if (mtx && unlikely(saved_lock_depth >= 0))
rt_reacquire_bkl(saved_lock_depth);
- WARN_ON(atomic_read(&rwm->count));
-
debug_rt_mutex_free_waiter(&waiter);
}
@@ -1420,10 +1490,9 @@ rt_write_fastlock(struct rw_mutex *rwm,
{
unsigned long val = (unsigned long)current | RT_RWLOCK_WRITER;
- if (likely(rt_rwlock_cmpxchg(rwm, NULL, val))) {
+ if (likely(rt_rwlock_cmpxchg(rwm, NULL, val)))
rt_mutex_deadlock_account_lock(&rwm->mutex, current);
- WARN_ON(atomic_read(&rwm->count));
- } else
+ else
slowfn(rwm, mtx);
}
@@ -1445,7 +1514,7 @@ rt_write_slowtrylock(struct rw_mutex *rw
int ret = 0;
spin_lock_irqsave(&mutex->wait_lock, flags);
- init_lists(mutex);
+ init_rw_lists(rwm);
if (try_to_take_rw_write(rwm))
ret = 1;
@@ -1463,7 +1532,6 @@ rt_write_fasttrylock(struct rw_mutex *rw
if (likely(rt_rwlock_cmpxchg(rwm, NULL, val))) {
rt_mutex_deadlock_account_lock(&rwm->mutex, current);
- WARN_ON(atomic_read(&rwm->count));
return 1;
} else
return slowfn(rwm);
@@ -1479,6 +1547,7 @@ rt_read_slowunlock(struct rw_mutex *rwm,
{
struct rt_mutex *mutex = &rwm->mutex;
struct rt_mutex_waiter *waiter;
+ struct reader_lock_struct *rls;
unsigned long flags;
unsigned int reader_count;
int savestate = !mtx;
@@ -1504,6 +1573,10 @@ rt_read_slowunlock(struct rw_mutex *rwm,
current->reader_lock_count--;
WARN_ON_ONCE(i != current->reader_lock_count);
atomic_dec(&rwm->owners);
+ rls = ¤t->owned_read_locks[i];
+ WARN_ON(!rls->list.prev || list_empty(&rls->list));
+ list_del_init(&rls->list);
+ rls->lock = NULL;
}
break;
}
@@ -1517,9 +1590,12 @@ rt_read_slowunlock(struct rw_mutex *rwm,
* be set to current or readers. This means that another reader
* already reset the lock, so there is nothing left to do.
*/
- if ((rt_rwlock_owner(rwm) != current &&
- rt_rwlock_owner(rwm) != RT_RW_READER))
+ if (unlikely(rt_rwlock_owner(rwm) != current &&
+ rt_rwlock_owner(rwm) != RT_RW_READER)) {
+ /* Update the owner if necessary */
+ rt_rwlock_update_owner(rwm, atomic_read(&rwm->owners));
goto out;
+ }
/*
* If there are more readers and we are under the limit
@@ -1595,6 +1671,7 @@ rt_read_fastunlock(struct rw_mutex *rwm,
WARN_ON(!rwm->owner);
atomic_dec(&rwm->count);
if (likely(rt_rwlock_cmpxchg(rwm, current, NULL))) {
+ struct reader_lock_struct *rls;
int reader_count = --current->reader_lock_count;
int owners;
rt_mutex_deadlock_account_unlock(current);
@@ -1607,7 +1684,10 @@ rt_read_fastunlock(struct rw_mutex *rwm,
atomic_set(&rwm->owners, 0);
WARN_ON_ONCE(1);
}
- WARN_ON_ONCE(current->owned_read_locks[reader_count].lock != rwm);
+ rls = ¤t->owned_read_locks[reader_count];
+ WARN_ON_ONCE(rls->lock != rwm);
+ WARN_ON(rls->list.prev && !list_empty(&rls->list));
+ rls->lock = NULL;
} else
slowfn(rwm, mtx);
}
@@ -1758,6 +1838,7 @@ void rt_mutex_rwsem_init(struct rw_mutex
rwm->owner = NULL;
atomic_set(&rwm->count, 0);
atomic_set(&rwm->owners, 0);
+ INIT_LIST_HEAD(&rwm->readers);
__rt_mutex_init(mutex, name);
}
--
^ permalink raw reply [flat|nested] 7+ messages in thread* [PATCH RT 6/6] read lock Priority Inheritance implementation
2008-04-25 13:09 [PATCH RT 0/6] New read/write locks for PI and multiple readers Steven Rostedt
` (4 preceding siblings ...)
2008-04-25 13:09 ` [PATCH RT 5/6] map read/write locks back to their readers Steven Rostedt
@ 2008-04-25 13:09 ` Steven Rostedt
5 siblings, 0 replies; 7+ messages in thread
From: Steven Rostedt @ 2008-04-25 13:09 UTC (permalink / raw)
To: linux-kernel, linux-rt-users
Cc: Ingo Molnar, Steven Rostedt, Peter Zijlstra, Thomas Gleixner,
Clark Williams, Arnaldo Carvalho de Melo, Jon Masters,
Gregory Haskins, Steven Rostedt
[-- Attachment #1: multi-reader-pi.patch --]
[-- Type: text/plain, Size: 9409 bytes --]
This patch adds the priority inheritance (PI) to the read / write locks.
When a task is blocked on the lock that eventually is owned by a reader
in the PI chain, it will boost all the readers if they are of lower priority
than the blocked task.
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
include/linux/init_task.h | 8 +++
include/linux/rt_lock.h | 4 +
kernel/fork.c | 1
kernel/rtmutex.c | 115 ++++++++++++++++++++++++++++++++++++++++++----
4 files changed, 118 insertions(+), 10 deletions(-)
Index: linux-2.6.24.4-rt4/include/linux/rt_lock.h
===================================================================
--- linux-2.6.24.4-rt4.orig/include/linux/rt_lock.h 2008-03-25 23:13:17.000000000 -0400
+++ linux-2.6.24.4-rt4/include/linux/rt_lock.h 2008-03-25 23:14:47.000000000 -0400
@@ -13,6 +13,7 @@
#include <linux/rtmutex.h>
#include <asm/atomic.h>
#include <linux/spinlock_types.h>
+#include <linux/sched_prio.h>
#ifdef CONFIG_PREEMPT_RT
/*
@@ -66,6 +67,7 @@ struct rw_mutex {
atomic_t count; /* number of times held for read */
atomic_t owners; /* number of owners as readers */
struct list_head readers;
+ int prio;
};
/*
@@ -98,6 +100,7 @@ typedef struct {
#define __RW_LOCK_UNLOCKED(name) (rwlock_t) \
{ .owners.mutex = __RT_SPIN_INITIALIZER(name.owners.mutex), \
+ .owners.prio = MAX_PRIO, \
RW_DEP_MAP_INIT(name) }
#else /* !PREEMPT_RT */
@@ -196,6 +199,7 @@ extern int __bad_func_type(void);
#define __RWSEM_INITIALIZER(name) \
{ .owners.mutex = __RT_MUTEX_INITIALIZER(name.owners.mutex), \
+ .owners.prio = MAX_PRIO, \
RW_DEP_MAP_INIT(name) }
#define DECLARE_RWSEM(lockname) \
Index: linux-2.6.24.4-rt4/kernel/rtmutex.c
===================================================================
--- linux-2.6.24.4-rt4.orig/kernel/rtmutex.c 2008-03-25 23:13:17.000000000 -0400
+++ linux-2.6.24.4-rt4/kernel/rtmutex.c 2008-03-25 23:14:47.000000000 -0400
@@ -133,6 +133,8 @@ static inline void init_lists(struct rt_
}
}
+static int rt_mutex_get_readers_prio(struct task_struct *task, int prio);
+
/*
* Calculate task priority from the waiter list priority
*
@@ -143,6 +145,8 @@ int rt_mutex_getprio(struct task_struct
{
int prio = min(task->normal_prio, get_rcu_prio(task));
+ prio = rt_mutex_get_readers_prio(task, prio);
+
if (likely(!task_has_pi_waiters(task)))
return prio;
@@ -185,6 +189,11 @@ static void rt_mutex_adjust_prio(struct
*/
int max_lock_depth = 1024;
+static int rt_mutex_adjust_readers(struct rt_mutex *orig_lock,
+ struct rt_mutex_waiter *orig_waiter,
+ struct task_struct *top_task,
+ struct rt_mutex *lock,
+ int recursion_depth);
/*
* Adjust the priority chain. Also used for deadlock detection.
* Decreases task's usage by one - may thus free the task.
@@ -194,7 +203,8 @@ static int rt_mutex_adjust_prio_chain(st
int deadlock_detect,
struct rt_mutex *orig_lock,
struct rt_mutex_waiter *orig_waiter,
- struct task_struct *top_task)
+ struct task_struct *top_task,
+ int recursion_depth)
{
struct rt_mutex *lock;
struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
@@ -296,8 +306,13 @@ static int rt_mutex_adjust_prio_chain(st
/* Grab the next task */
task = rt_mutex_owner(lock);
- /* Writers do not boost their readers. */
+ /*
+ * Readers are special. We may need to boost more than one owner.
+ */
if (task == RT_RW_READER) {
+ ret = rt_mutex_adjust_readers(orig_lock, orig_waiter,
+ top_task, lock,
+ recursion_depth);
spin_unlock_irqrestore(&lock->wait_lock, flags);
goto out;
}
@@ -479,9 +494,12 @@ static int task_blocks_on_rt_mutex(struc
spin_unlock(¤t->pi_lock);
if (waiter == rt_mutex_top_waiter(lock)) {
- /* readers are not handled */
- if (owner == RT_RW_READER)
- return 0;
+ /* readers are handled differently */
+ if (owner == RT_RW_READER) {
+ res = rt_mutex_adjust_readers(lock, waiter,
+ current, lock, 0);
+ return res;
+ }
spin_lock(&owner->pi_lock);
plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
@@ -508,7 +526,7 @@ static int task_blocks_on_rt_mutex(struc
spin_unlock_irqrestore(&lock->wait_lock, flags);
res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
- current);
+ current, 0);
spin_lock_irq(&lock->wait_lock);
@@ -625,7 +643,7 @@ static void remove_waiter(struct rt_mute
spin_unlock_irqrestore(&lock->wait_lock, flags);
- rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current);
+ rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current, 0);
spin_lock_irq(&lock->wait_lock);
}
@@ -652,7 +670,7 @@ void rt_mutex_adjust_pi(struct task_stru
get_task_struct(task);
spin_unlock_irqrestore(&task->pi_lock, flags);
- rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
+ rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task, 0);
}
/*
@@ -1088,7 +1106,6 @@ static int try_to_take_rw_read(struct rw
if (rt_rwlock_pending_writer(rwm))
return 0;
if (rt_mutex_has_waiters(mutex)) {
- /* readers don't do PI */
waiter = rt_mutex_top_waiter(mutex);
if (current->prio >= waiter->task->prio)
return 0;
@@ -1102,7 +1119,7 @@ static int try_to_take_rw_read(struct rw
spin_unlock(&mtxowner->pi_lock);
}
} else if (rt_mutex_has_waiters(mutex)) {
- /* Readers don't do PI */
+ /* Readers do things differently with respect to PI */
waiter = rt_mutex_top_waiter(mutex);
spin_lock(¤t->pi_lock);
plist_del(&waiter->pi_list_entry, ¤t->pi_waiters);
@@ -1608,6 +1625,7 @@ rt_read_slowunlock(struct rw_mutex *rwm,
/* If no one is blocked, then clear all ownership */
if (!rt_mutex_has_waiters(mutex)) {
+ rwm->prio = MAX_PRIO;
/*
* If count is not zero, we are under the limit with
* no other readers.
@@ -1838,11 +1856,88 @@ void rt_mutex_rwsem_init(struct rw_mutex
rwm->owner = NULL;
atomic_set(&rwm->count, 0);
atomic_set(&rwm->owners, 0);
+ rwm->prio = MAX_PRIO;
INIT_LIST_HEAD(&rwm->readers);
__rt_mutex_init(mutex, name);
}
+static int rt_mutex_get_readers_prio(struct task_struct *task, int prio)
+{
+ struct reader_lock_struct *rls;
+ struct rw_mutex *rwm;
+ int lock_prio;
+ int i;
+
+ for (i = 0; i < task->reader_lock_count; i++) {
+ rls = &task->owned_read_locks[i];
+ rwm = rls->lock;
+ if (rwm) {
+ lock_prio = rwm->prio;
+ if (prio > lock_prio)
+ prio = lock_prio;
+ }
+ }
+
+ return prio;
+}
+
+static int rt_mutex_adjust_readers(struct rt_mutex *orig_lock,
+ struct rt_mutex_waiter *orig_waiter,
+ struct task_struct *top_task,
+ struct rt_mutex *lock,
+ int recursion_depth)
+{
+ struct reader_lock_struct *rls;
+ struct rt_mutex_waiter *waiter;
+ struct task_struct *task;
+ struct rw_mutex *rwm = container_of(lock, struct rw_mutex, mutex);
+
+ if (rt_mutex_has_waiters(lock)) {
+ waiter = rt_mutex_top_waiter(lock);
+ /*
+ * Do we need to grab the task->pi_lock?
+ * Really, we are only reading it. If it
+ * changes, then that should follow this chain
+ * too.
+ */
+ rwm->prio = waiter->task->prio;
+ } else
+ rwm->prio = MAX_PRIO;
+
+ if (recursion_depth >= MAX_RWLOCK_DEPTH) {
+ WARN_ON(1);
+ return 1;
+ }
+
+ list_for_each_entry(rls, &rwm->readers, list) {
+ task = rls->task;
+ get_task_struct(task);
+ /*
+ * rt_mutex_adjust_prio_chain will do
+ * the put_task_struct
+ */
+ rt_mutex_adjust_prio_chain(task, 0, orig_lock,
+ orig_waiter, top_task,
+ recursion_depth+1);
+ }
+
+ return 0;
+}
+#else
+static int rt_mutex_adjust_readers(struct rt_mutex *orig_lock,
+ struct rt_mutex_waiter *orig_waiter,
+ struct task_struct *top_task,
+ struct rt_mutex *lock,
+ int recursion_depth)
+{
+ return 0;
+}
+
+static int rt_mutex_get_readers_prio(struct task_struct *task, int prio)
+{
+ return prio;
+}
#endif /* CONFIG_PREEMPT_RT */
#ifdef CONFIG_PREEMPT_BKL
Index: linux-2.6.24.4-rt4/include/linux/init_task.h
===================================================================
--- linux-2.6.24.4-rt4.orig/include/linux/init_task.h 2008-03-25 16:41:47.000000000 -0400
+++ linux-2.6.24.4-rt4/include/linux/init_task.h 2008-03-25 23:14:47.000000000 -0400
@@ -99,6 +99,13 @@ extern struct nsproxy init_nsproxy;
#define INIT_PREEMPT_RCU_BOOST(tsk)
#endif /* #else #ifdef CONFIG_PREEMPT_RCU_BOOST */
+#ifdef CONFIG_PREEMPT_RT
+# define INIT_RW_OWNERS(tsk) .owned_read_locks = { \
+ [0 ... (MAX_RWLOCK_DEPTH - 1) ] = { .task = &tsk } },
+#else
+# define INIT_RW_OWNERS(tsk)
+#endif
+
extern struct group_info init_groups;
#define INIT_STRUCT_PID { \
@@ -189,6 +196,7 @@ extern struct group_info init_groups;
INIT_TRACE_IRQFLAGS \
INIT_LOCKDEP \
INIT_PREEMPT_RCU_BOOST(tsk) \
+ INIT_RW_OWNERS(tsk) \
}
Index: linux-2.6.24.4-rt4/kernel/fork.c
===================================================================
--- linux-2.6.24.4-rt4.orig/kernel/fork.c 2008-03-25 23:13:17.000000000 -0400
+++ linux-2.6.24.4-rt4/kernel/fork.c 2008-03-25 23:14:47.000000000 -0400
@@ -1214,6 +1214,7 @@ static struct task_struct *copy_process(
INIT_LIST_HEAD(&p->owned_read_locks[i].list);
p->owned_read_locks[i].count = 0;
p->owned_read_locks[i].lock = NULL;
+ p->owned_read_locks[i].task = p;
}
}
#endif
--
^ permalink raw reply [flat|nested] 7+ messages in thread