From: npiggin@suse.de
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Manfred Spraul <manfred@colorfullife.com>,
Nadia Derbey <Nadia.Derbey@bull.net>,
Pierre Peiffer <peifferp@gmail.com>,
linux-kernel@vger.kernel.org
Subject: [patch 4/4] ipc: sem optimise simple operations
Date: Tue, 11 Aug 2009 21:09:06 +1000 [thread overview]
Message-ID: <20090811111607.310739140@suse.de> (raw)
In-Reply-To: 20090811110902.255877673@suse.de
[-- Attachment #1: ipc-sem-simple-lists.patch --]
[-- Type: text/plain, Size: 9587 bytes --]
When sysv semaphores are used as a simple semaphore/mutex (single semop
operation on a single sem in the sem-array), the current implementation
is suboptimal because it has to handle the complex cases of multiple
operations atomically on multiple semaphores.
To optimise the simple case, add operations lists to struct sem, where
simple operations are added. When there are complex operations pending,
we ignore the simple lists and continue to use the old algorithm, however
when there are no complex operations, subsequent simple operations need
only look at the simple lists which can be done in a much simper way
(no need to search the list multiple times).
Timed with a simple test program to fork a number of threads which each
lock/unlock a sysv semaphore a number of times (with 1us busyloop in
each critical section and 10us busyloop outside). Run on 2s8c Opteron.
procs time per loop vanilla patched
1 11986ns 11837ns
32 10650ns 7746ns
128 17103ns 8472ns
1024 385885ns 9699ns
We hae a report of an SAP application with high idle time caused by very
high wait-time on a sysv semaphore, on which it is performing these "simple"
operations. Semaphore wait times are somewhat improved with this patch, and
sysv semaphore kernel functions are reduced by 40% in the profile (with this
and the previous patches combined). It is quite significant considering it
is measured on quite a small system (4s16c) and only with 64 processes.
Signed-off-by: Nick Piggin <npiggin@suse.de>
---
include/linux/sem.h | 6 ++
ipc/sem.c | 130 +++++++++++++++++++++++++++++++++++++++++++++++-----
2 files changed, 123 insertions(+), 13 deletions(-)
Index: linux-2.6/ipc/sem.c
===================================================================
--- linux-2.6.orig/ipc/sem.c
+++ linux-2.6/ipc/sem.c
@@ -240,6 +240,7 @@ static int newary(struct ipc_namespace *
key_t key = params->key;
int nsems = params->u.nsems;
int semflg = params->flg;
+ int i;
if (!nsems)
return -EINVAL;
@@ -272,6 +273,15 @@ static int newary(struct ipc_namespace *
ns->used_sems += nsems;
sma->sem_base = (struct sem *) &sma[1];
+
+ for (i = 0; i < nsems; i++) {
+ struct sem *s = &sma->sem_base[i];
+
+ INIT_LIST_HEAD(&s->negv_pending);
+ INIT_LIST_HEAD(&s->zero_pending);
+ }
+
+ sma->complex_count = 0;
INIT_LIST_HEAD(&sma->sem_pending);
INIT_LIST_HEAD(&sma->list_id);
sma->sem_nsems = nsems;
@@ -335,7 +345,7 @@ SYSCALL_DEFINE3(semget, key_t, key, int,
* all at once. Return 0 if yes, 1 if need to sleep, else return error code.
*/
-static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops,
+static int try_atomic_semop(struct sem_array * sma, struct sembuf * sops,
int nsops, struct sem_undo *un, int pid)
{
int result, sem_op;
@@ -421,7 +431,7 @@ static void wake_up_sem_queue(struct sem
/* Go through the pending queue for the indicated semaphore
* looking for tasks that can be completed.
*/
-static void update_queue (struct sem_array * sma)
+static void update_queue(struct sem_array *sma)
{
struct sem_queue *q, *tq;
@@ -438,23 +448,94 @@ again:
continue;
list_del(&q->list);
+ if (q->nsops == 1)
+ list_del(&q->simple_list);
+ else
+ sma->complex_count--;
+
+ alter = q->alter;
+ wake_up_sem_queue(q, error);
/*
* The next operation that must be checked depends on the type
* of the completed operation:
* - if the operation modified the array, then restart from the
* head of the queue and check for threads that might be
- * waiting for semaphore values to become 0.
+ * waiting for this particular semaphore value.
* - if the operation didn't modify the array, then just
* continue.
*/
- alter = q->alter;
- wake_up_sem_queue(q, error);
- if (alter)
+ if (alter && !error)
goto again;
}
}
+static void update_zero_queue(struct sem_array *sma, struct sem *sem)
+{
+ struct sem_queue *q, *tq;
+
+ list_for_each_entry_safe(q, tq, &sem->zero_pending, simple_list) {
+ int error;
+
+ BUG_ON(q->nsops != 1);
+ BUG_ON(q->alter);
+ error = try_atomic_semop(sma, q->sops, q->nsops,
+ q->undo, q->pid);
+ BUG_ON(error);
+
+ list_del(&q->list);
+ list_del(&q->simple_list);
+ wake_up_sem_queue(q, error);
+ }
+}
+
+static void update_negv_queue(struct sem_array *sma, struct sem *sem)
+{
+ struct sem_queue *q, *tq;
+
+ list_for_each_entry_safe(q, tq, &sem->negv_pending, simple_list) {
+ int error;
+
+ BUG_ON(q->nsops != 1);
+ BUG_ON(!q->alter);
+ error = try_atomic_semop(sma, q->sops, q->nsops,
+ q->undo, q->pid);
+
+ /* Does q->sleeper still need to sleep? */
+ if (error > 0)
+ continue;
+
+ /* Continue scanning as in update_queue */
+ list_del(&q->list);
+ list_del(&q->simple_list);
+ wake_up_sem_queue(q, error);
+
+ /*
+ * negv queue should only decrease queue value, so there is
+ * no point in retrying failed ops on the negv queue after
+ * altering the sem value here. If the semval is not greater
+ * than zero, then there will be nothing else to do either.
+ */
+ if (sem->semval <= 0)
+ break;
+ }
+}
+
+static void update_queue_simple(struct sem_array *sma, ushort semnum)
+{
+ if (unlikely(sma->complex_count)) {
+ update_queue(sma);
+ } else {
+ struct sem *sem;
+
+ sem = &sma->sem_base[semnum];
+ if (sem->semval > 0)
+ update_negv_queue(sma, sem);
+ if (sem->semval == 0)
+ update_zero_queue(sma, sem);
+ }
+}
+
/* The following counts are associated to each semaphore:
* semncnt number of tasks waiting on semval being nonzero
* semzcnt number of tasks waiting on semval being zero
@@ -532,6 +613,9 @@ static void freeary(struct ipc_namespace
/* Wake up all pending processes and let them fail with EIDRM. */
list_for_each_entry_safe(q, tq, &sma->sem_pending, list) {
list_del(&q->list);
+ list_del(&q->simple_list);
+ if (q->nsops > 1)
+ sma->complex_count--;
wake_up_sem_queue(q, -EIDRM);
}
@@ -796,7 +880,7 @@ static int semctl_main(struct ipc_namesp
curr->sempid = task_tgid_vnr(current);
sma->sem_ctime = get_seconds();
/* maybe some queued-up processes were waiting for this */
- update_queue(sma);
+ update_queue_simple(sma, semnum);
err = 0;
goto out_unlock;
}
@@ -1169,10 +1253,14 @@ SYSCALL_DEFINE4(semtimedop, int, semid,
if (error)
goto out_unlock_free;
- error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current));
+ error = try_atomic_semop(sma, sops, nsops, un, task_tgid_vnr(current));
if (error <= 0) {
- if (alter && error == 0)
- update_queue (sma);
+ if (alter && !error) {
+ if (nsops == 1)
+ update_queue_simple(sma, sops->sem_num);
+ else
+ update_queue(sma);
+ }
goto out_unlock_free;
}
@@ -1189,6 +1277,17 @@ SYSCALL_DEFINE4(semtimedop, int, semid,
list_add_tail(&queue.list, &sma->sem_pending);
else
list_add(&queue.list, &sma->sem_pending);
+ if (nsops == 1) {
+ struct sem *curr;
+ curr = &sma->sem_base[sops->sem_num];
+ if (alter)
+ list_add_tail(&queue.simple_list, &curr->negv_pending);
+ else
+ list_add_tail(&queue.simple_list, &curr->zero_pending);
+ } else {
+ INIT_LIST_HEAD(&queue.simple_list);
+ sma->complex_count++;
+ }
queue.status = -EINTR;
queue.sleeper = current;
@@ -1232,6 +1331,10 @@ SYSCALL_DEFINE4(semtimedop, int, semid,
if (timeout && jiffies_left == 0)
error = -EAGAIN;
list_del(&queue.list);
+ if (nsops == 1)
+ list_del(&queue.simple_list);
+ else
+ sma->complex_count--;
out_unlock_free:
sem_unlock(sma);
@@ -1356,11 +1459,14 @@ void exit_sem(struct task_struct *tsk)
if (semaphore->semval > SEMVMX)
semaphore->semval = SEMVMX;
semaphore->sempid = task_tgid_vnr(current);
+ if (likely(!sma->complex_count))
+ update_queue_simple(sma, i);
}
}
sma->sem_otime = get_seconds();
/* maybe some queued-up processes were waiting for this */
- update_queue(sma);
+ if (unlikely(sma->complex_count))
+ update_queue(sma);
sem_unlock(sma);
call_rcu(&un->rcu, free_un);
@@ -1374,7 +1480,7 @@ static int sysvipc_sem_proc_show(struct
struct sem_array *sma = it;
return seq_printf(s,
- "%10d %10d %4o %10lu %5u %5u %5u %5u %10lu %10lu\n",
+ "%10d %10d %4o %10u %5u %5u %5u %5u %10lu %10lu\n",
sma->sem_perm.key,
sma->sem_perm.id,
sma->sem_perm.mode,
Index: linux-2.6/include/linux/sem.h
===================================================================
--- linux-2.6.orig/include/linux/sem.h
+++ linux-2.6/include/linux/sem.h
@@ -86,6 +86,8 @@ struct task_struct;
struct sem {
int semval; /* current value */
int sempid; /* pid of last operation */
+ struct list_head negv_pending;
+ struct list_head zero_pending;
};
/* One sem_array data structure for each set of semaphores in the system. */
@@ -96,11 +98,13 @@ struct sem_array {
struct sem *sem_base; /* ptr to first semaphore in array */
struct list_head sem_pending; /* pending operations to be processed */
struct list_head list_id; /* undo requests on this array */
- unsigned long sem_nsems; /* no. of semaphores in array */
+ int sem_nsems; /* no. of semaphores in array */
+ int complex_count; /* pending complex operations */
};
/* One queue for each sleeping process in the system. */
struct sem_queue {
+ struct list_head simple_list; /* queue of pending operations */
struct list_head list; /* queue of pending operations */
struct task_struct *sleeper; /* this process */
struct sem_undo *undo; /* undo structure */
next prev parent reply other threads:[~2009-08-11 11:54 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-08-11 11:09 [patch 0/4] ipc sem improvements npiggin
2009-08-11 11:09 ` [patch 1/4] ipc: sem optimise undo list search npiggin
2009-08-16 13:17 ` Manfred Spraul
2009-08-11 11:09 ` [patch 2/4] ipc: sem use list operations npiggin
2009-08-16 13:18 ` Manfred Spraul
2009-08-11 11:09 ` [patch 3/4] ipc: sem preempt improve npiggin
2009-08-16 13:20 ` Manfred Spraul
2009-08-11 11:09 ` npiggin [this message]
2009-08-11 18:19 ` [patch 4/4] ipc: sem optimise simple operations Manfred Spraul
2009-08-11 18:23 ` Zach Brown
2009-08-12 4:07 ` Nick Piggin
2009-08-12 18:35 ` Manfred Spraul
2009-08-14 8:58 ` Nick Piggin
2009-08-14 17:53 ` Manfred Spraul
2009-08-11 20:07 ` Cyrill Gorcunov
2009-08-12 4:48 ` Nick Piggin
2009-08-12 5:43 ` Cyrill Gorcunov
2009-08-14 18:48 ` Manfred Spraul
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090811111607.310739140@suse.de \
--to=npiggin@suse.de \
--cc=Nadia.Derbey@bull.net \
--cc=akpm@linux-foundation.org \
--cc=linux-kernel@vger.kernel.org \
--cc=manfred@colorfullife.com \
--cc=peifferp@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox