From: "Li Yu" <raise.sail@gmail.com>
To: "LKML" <linux-kernel@vger.kernel.org>
Subject: [PATCH] sched: A simple priority ceiling framework and an implementation for mutex.
Date: Mon, 11 Dec 2006 18:48:47 +0800 [thread overview]
Message-ID: <200612111848436406815@gmail.com> (raw)
Some tips of the design:
1. The prio_floor linked-list save all mutex is holded by each task in time order. It is a stack data srtucture.
2. The prio_rmin member of the mutex_waiter is used for tracing current minimum-value(highest) priority of this mutex.
3. It seem the task_struct->nr_null_floors have a bit of redundancy, it's used to guarantee everything's OK when we failed to allocate prio_floor structure.
4. At last, the magic value PRIO_UNDERGROUND (0x19491001), it is the Chinese National Day ;)
The most largest question is how meansure the performance of this patch ? I have no any good idea for this.
Good luck.
Signed-off-by: Li Yu <raise.sail@gmail.com>
diff -Naurp linux-2.6.19/include/linux/prio_ceiling.h linux-2.6.19.ceiling/include/linux/prio_ceiling.h
--- linux-2.6.19/include/linux/prio_ceiling.h 1970-01-01 08:00:00.000000000 +0800
+++ linux-2.6.19.ceiling/include/linux/prio_ceiling.h 2006-12-11 14:39:09.000000000 +0800
@@ -0,0 +1,17 @@
+#ifndef _LINUX_PRIO_CEILING_H
+#define _LINUX_PRIO_CEILING_H
+
+#ifdef __KERNEL__
+
+struct prio_floor {
+ struct prio_floor *next; /* linked in priority stair */
+ void *id; /* which mutex own this floor ? */
+ int prio; /* save the priority when acquire */
+};
+
+#define FLOOR_ID_NONE ((void*)0)
+#define FLOOR_ID_NEXT ((void*)1)
+
+#endif /* __KERNEL__ */
+
+#endif
diff -Naurp linux-2.6.19/include/linux/sched.h linux-2.6.19.ceiling/include/linux/sched.h
--- linux-2.6.19/include/linux/sched.h 2006-11-30 05:57:37.000000000 +0800
+++ linux-2.6.19.ceiling/include/linux/sched.h 2006-12-11 14:39:44.000000000 +0800
@@ -2,7 +2,6 @@
#define _LINUX_SCHED_H
#include <linux/auxvec.h> /* For AT_VECTOR_SIZE */
-
/*
* cloning flags:
*/
@@ -69,6 +68,7 @@ struct sched_param {
#include <linux/fs_struct.h>
#include <linux/compiler.h>
#include <linux/completion.h>
+#include <linux/prio_ceiling.h>
#include <linux/pid.h>
#include <linux/percpu.h>
#include <linux/topology.h>
@@ -501,6 +501,7 @@ struct signal_struct {
#define MAX_RT_PRIO MAX_USER_RT_PRIO
#define MAX_PRIO (MAX_RT_PRIO + 40)
+#define PRIO_UNDERGROUND (0x19491001)
#define rt_prio(prio) unlikely((prio) < MAX_RT_PRIO)
#define rt_task(p) rt_prio((p)->prio)
@@ -790,6 +791,12 @@ struct task_struct {
#endif
int load_weight; /* for niceness load balancing purposes */
int prio, static_prio, normal_prio;
+
+ int prio_ceiling; /* for priority ceiling */
+ struct prio_floor *prio_stair;
+ void *fly_floor;
+ unsigned long nr_null_floors;
+
struct list_head run_list;
struct prio_array *array;
@@ -1076,6 +1083,82 @@ static inline int is_init(struct task_st
extern struct pid *cad_pid;
+#define task_top_prio_floor(task) ((task)->prio_stair)
+
+static inline void task_push_prio_floor(struct prio_floor *floor)
+{
+ struct task_struct *tsk = current;
+
+ if (unlikely(!floor)) { /* so we are safety when out of memory. */
+ ++tsk->nr_null_floors;
+ return;
+ }
+
+ /* keep the top of priority stack have min priority in value. */
+ if (tsk->prio_stair && tsk->prio_stair->prio < floor->prio)
+ floor->prio = tsk->prio_stair->prio;
+ if (floor->prio < tsk->prio)
+ tsk->prio_ceiling = floor->prio;
+
+ if (tsk->prio_stair && tsk->prio_stair == floor) {
+ floor->next = tsk->prio_stair;
+ tsk->prio_stair = floor->next;
+ }
+ return;
+}
+
+static inline void task_pop_prio_floor(void)
+{
+ struct task_struct *tsk = current;
+ struct prio_floor *top;
+
+ if (unlikely(tsk->nr_null_floors)) {
+ --tsk->nr_null_floors;
+ return;
+ }
+
+ if ((top=tsk->prio_stair))
+ tsk->prio_stair = tsk->prio_stair->next;
+ tsk->prio_ceiling = (tsk->prio_stair ? tsk->prio_stair->prio : PRIO_UNDERGROUND);
+ return;
+}
+
+static inline void task_climb_down(struct task_struct *task)
+{
+ struct prio_floor *top = task->prio_stair;
+
+ if (!top) {
+ /* task hold not any mutex, to restore our normal priority game */
+ task->prio_ceiling = PRIO_UNDERGROUND;
+ task->fly_floor = FLOOR_ID_NONE;
+ return;
+ }
+ if (task->fly_floor && FLOOR_ID_NEXT != task->fly_floor ) {
+ if (task->fly_floor == top->id)
+ task->fly_floor = FLOOR_ID_NEXT;
+ } else
+ task->prio_ceiling = top->prio;
+}
+
+static inline void __task_climb_up(struct task_struct *task, struct prio_floor *floor)
+{
+ if (floor->prio >= task->prio_ceiling)
+ return;
+ task->prio_ceiling = floor->prio;
+ task->fly_floor = floor->id;
+}
+
+#define task_climb_up_wait __task_climb_up
+
+static inline void task_climb_up_wake(struct task_struct *task, void *id, int prio)
+{
+ struct prio_floor floor;
+
+ floor.id = id;
+ floor.prio = prio;
+ __task_climb_up(task, &floor);
+}
+
extern void free_task(struct task_struct *tsk);
#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
--- linux-2.6.19/kernel/sched.c 2006-11-30 05:57:37.000000000 +0800
+++ linux-2.6.19.ceiling/kernel/sched.c 2006-12-11 14:42:39.000000000 +0800
@@ -727,6 +727,9 @@ static inline int __normal_prio(struct t
bonus = CURRENT_BONUS(p) - MAX_BONUS / 2;
prio = p->static_prio - bonus;
+
+ if (!rt_prio(p->prio_ceiling) && p->prio_ceiling < prio)
+ prio = p->prio_ceiling;
if (prio < MAX_RT_PRIO)
prio = MAX_RT_PRIO;
if (prio > MAX_PRIO-1)
diff -Naurp linux-2.6.19/include/linux/mutex.h linux-2.6.19.ceiling/include/linux/mutex.h
--- linux-2.6.19/include/linux/mutex.h 2006-11-30 05:57:37.000000000 +0800
+++ linux-2.6.19.ceiling/include/linux/mutex.h 2006-12-11 14:35:26.000000000 +0800
@@ -14,6 +14,7 @@
#include <linux/spinlock_types.h>
#include <linux/linkage.h>
#include <linux/lockdep.h>
+#include <linux/prio_ceiling.h>
#include <asm/atomic.h>
@@ -49,6 +50,8 @@ struct mutex {
atomic_t count;
spinlock_t wait_lock;
struct list_head wait_list;
+ struct task_struct *holder;
+ struct prio_floor floor;
#ifdef CONFIG_DEBUG_MUTEXES
struct thread_info *owner;
const char *name;
@@ -66,6 +69,7 @@ struct mutex {
struct mutex_waiter {
struct list_head list;
struct task_struct *task;
+ int prio_rmin;
#ifdef CONFIG_DEBUG_MUTEXES
struct mutex *lock;
void *magic;
--- linux-2.6.19/kernel/mutex.c 2006-11-30 05:57:37.000000000 +0800
+++ linux-2.6.19.ceiling/kernel/mutex.c 2006-12-11 14:39:57.000000000 +0800
@@ -89,6 +89,7 @@ void inline fastcall __sched mutex_lock(
* 'unlocked' into 'locked' state.
*/
__mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
+ task_push_prio_floor(&lock->floor);
}
EXPORT_SYMBOL(mutex_lock);
@@ -113,6 +114,7 @@ void fastcall __sched mutex_unlock(struc
* The unlocking fastpath is the 0->1 transition from 'locked'
* into 'unlocked' state:
*/
+ task_pop_prio_floor();
__mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath);
}
@@ -135,6 +137,19 @@ __mutex_lock_common(struct mutex *lock,
mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
debug_mutex_add_waiter(lock, &waiter, task->thread_info);
+ if (!list_empty(&lock->wait_list)) {
+ struct mutex_waiter *last =
+ list_entry(lock->wait_list.prev,
+ struct mutex_waiter, list);
+ if (task->prio < last->prio_rmin)
+ waiter.prio_rmin = task->prio;
+ else
+ waiter.prio_rmin = last->prio_rmin;
+ } else
+ waiter.prio_rmin = task->prio;
+ lock->floor.prio = waiter.prio_rmin;
+ lock->floor.id = lock;
+
/* add waiting tasks to the end of the waitqueue (FIFO): */
list_add_tail(&waiter.list, &lock->wait_list);
waiter.task = task;
@@ -167,6 +182,8 @@ __mutex_lock_common(struct mutex *lock,
return -EINTR;
}
__set_task_state(task, state);
+
+ task_climb_up_wait(lock->holder, &lock->floor);
/* didnt get the lock, go to sleep: */
spin_unlock_mutex(&lock->wait_lock, flags);
@@ -177,6 +194,8 @@ __mutex_lock_common(struct mutex *lock,
/* got the lock - rejoice! */
mutex_remove_waiter(lock, &waiter, task->thread_info);
debug_mutex_set_owner(lock, task->thread_info);
+
+ lock->holder = task;
/* set it to 0 if there are no waiters left: */
if (likely(list_empty(&lock->wait_list)))
@@ -229,14 +248,22 @@ __mutex_unlock_common_slowpath(atomic_t
if (__mutex_slowpath_needs_to_unlock())
atomic_set(&lock->count, 1);
+ lock->holder = NULL;
+ task_climb_down(current);
+
if (!list_empty(&lock->wait_list)) {
/* get the first entry from the wait-list: */
struct mutex_waiter *waiter =
list_entry(lock->wait_list.next,
struct mutex_waiter, list);
+ struct mutex_waiter *tailor =
+ list_entry(lock->wait_list.prev,
+ struct mutex_waiter, list);
debug_mutex_wake_waiter(lock, waiter);
+ task_climb_up_wake(waiter->task, lock, tailor->prio_rmin);
+
wake_up_process(waiter->task);
}
@@ -331,8 +358,12 @@ static inline int __mutex_trylock_slowpa
*/
int fastcall __sched mutex_trylock(struct mutex *lock)
{
- return __mutex_fastpath_trylock(&lock->count,
- __mutex_trylock_slowpath);
+ if (__mutex_fastpath_trylock(&lock->count, __mutex_trylock_slowpath)) {
+ task_push_prio_floor(&lock->floor);
+ return 1;
+ } else
+ return 0;
+
}
EXPORT_SYMBOL(mutex_trylock);
reply other threads:[~2006-12-11 10:48 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=200612111848436406815@gmail.com \
--to=raise.sail@gmail.com \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.