From: Shailabh Nagar <nagar@watson.ibm.com>
To: Andrew Morton <akpm@osdl.org>
Cc: Jay Lan <jlan@sgi.com>, Balbir Singh <balbir@in.ibm.com>,
Chris Sturtivant <csturtiv@sgi.com>,
linux-kernel <linux-kernel@vger.kernel.org>
Subject: [Patch] Revised locking for taskstats interface
Date: Fri, 23 Jun 2006 13:52:04 -0400 [thread overview]
Message-ID: <449C2A44.9000206@watson.ibm.com> (raw)
Convert locking used within taskstats interface and delay accounting
code to be more fine-grained.
Dynamic allocation of the delays data structure led to exit race
conditions that were being fixed through use of a global mutex at the
taskstats interface level. The same mutex was also being used for
protecting per-task delay structure allocation. Together, these were
causing higher contention and unnecessary serialization.
This patch switches to a per-task locking for protecting tsk->delays
and eliminates global locking within the taskstats interface.
Results collected by Jay Lan (jlan@sgi.com) from running a test
with rapid forks/exits shows substantial improvement in system time.
For a benchmark that only forks+exits 1000 threads and runs for
5000 iterations, the reduction seen are as follows:
base +patch %improvement
user 0.06 0.07 -16%
system 1.34 0.86 35%
elapsed 622 470 24%
Signed-off-by: Shailabh Nagar <nagar@watson.ibm.com>
---
include/linux/sched.h | 1 +
kernel/delayacct.c | 18 ++++++++++++++++--
kernel/taskstats.c | 7 -------
3 files changed, 17 insertions(+), 9 deletions(-)
Index: linux-2.6.17/include/linux/sched.h
===================================================================
--- linux-2.6.17.orig/include/linux/sched.h 2006-06-22 02:56:44.000000000 -0400
+++ linux-2.6.17/include/linux/sched.h 2006-06-22 15:18:09.000000000 -0400
@@ -933,6 +933,7 @@ struct task_struct {
*/
struct pipe_inode_info *splice_pipe;
#ifdef CONFIG_TASK_DELAY_ACCT
+ spinlock_t delays_lock;
struct task_delay_info *delays;
#endif
};
Index: linux-2.6.17/kernel/delayacct.c
===================================================================
--- linux-2.6.17.orig/kernel/delayacct.c 2006-06-22 02:56:44.000000000 -0400
+++ linux-2.6.17/kernel/delayacct.c 2006-06-22 15:18:09.000000000 -0400
@@ -41,6 +41,10 @@ void delayacct_init(void)
void __delayacct_tsk_init(struct task_struct *tsk)
{
+ spin_lock_init(&tsk->delays_lock);
+ /* No need to acquire tsk->delays_lock for allocation here unless
+ __delayacct_tsk_init called after tsk is attached to tasklist
+ */
tsk->delays = kmem_cache_zalloc(delayacct_cache, SLAB_KERNEL);
if (tsk->delays)
spin_lock_init(&tsk->delays->lock);
@@ -49,9 +53,9 @@ void __delayacct_tsk_init(struct task_st
void __delayacct_tsk_exit(struct task_struct *tsk)
{
struct task_delay_info *delays = tsk->delays;
- mutex_lock(&taskstats_exit_mutex);
+ spin_lock(&tsk->delays_lock);
tsk->delays = NULL;
- mutex_unlock(&taskstats_exit_mutex);
+ spin_unlock(&tsk->delays_lock);
kmem_cache_free(delayacct_cache, delays);
}
@@ -114,6 +118,14 @@ int __delayacct_add_tsk(struct taskstats
struct timespec ts;
unsigned long t1,t2,t3;
+ spin_lock(&tsk->delays_lock);
+
+ /* Though tsk->delays accessed later, early exit avoids
+ * unnecessary returning of other data
+ */
+ if (!tsk->delays)
+ goto done;
+
tmp = (s64)d->cpu_run_real_total;
cputime_to_timespec(tsk->utime + tsk->stime, &ts);
tmp += timespec_to_ns(&ts);
@@ -148,6 +160,8 @@ int __delayacct_add_tsk(struct taskstats
d->swapin_count += tsk->delays->swapin_count;
spin_unlock(&tsk->delays->lock);
+done:
+ spin_unlock(&tsk->delays_lock);
return 0;
}
Index: linux-2.6.17/kernel/taskstats.c
===================================================================
--- linux-2.6.17.orig/kernel/taskstats.c 2006-06-22 02:56:44.000000000 -0400
+++ linux-2.6.17/kernel/taskstats.c 2006-06-22 15:27:09.000000000 -0400
@@ -25,7 +25,6 @@
static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 };
static int family_registered = 0;
kmem_cache_t *taskstats_cache;
-DEFINE_MUTEX(taskstats_exit_mutex);
static struct genl_family family = {
.id = GENL_ID_GENERATE,
@@ -193,7 +192,6 @@ static int taskstats_send_stats(struct s
if (rc < 0)
return rc;
- mutex_lock(&taskstats_exit_mutex);
if (info->attrs[TASKSTATS_CMD_ATTR_PID]) {
u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]);
rc = fill_pid(pid, NULL, &stats);
@@ -219,7 +217,6 @@ static int taskstats_send_stats(struct s
goto err;
}
- mutex_unlock(&taskstats_exit_mutex);
nla_nest_end(rep_skb, na);
return send_reply(rep_skb, info->snd_pid, TASKSTATS_MSG_UNICAST);
@@ -228,7 +225,6 @@ nla_put_failure:
return genlmsg_cancel(rep_skb, reply);
err:
nlmsg_free(rep_skb);
- mutex_unlock(&taskstats_exit_mutex);
return rc;
}
@@ -246,8 +242,6 @@ void taskstats_exit_send(struct task_str
if (!family_registered || !tidstats)
return;
- mutex_lock(&taskstats_exit_mutex);
-
is_thread_group = !thread_group_empty(tsk);
rc = 0;
@@ -305,7 +299,6 @@ nla_put_failure:
err_skb:
nlmsg_free(rep_skb);
ret:
- mutex_unlock(&taskstats_exit_mutex);
return;
}
next reply other threads:[~2006-06-23 17:52 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-06-23 17:52 Shailabh Nagar [this message]
2006-06-23 20:27 ` [Patch] Revised locking for taskstats interface Jay Lan
2006-06-24 6:32 ` Andrew Morton
2006-06-24 7:36 ` Shailabh Nagar
2006-06-24 8:15 ` Andrew Morton
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=449C2A44.9000206@watson.ibm.com \
--to=nagar@watson.ibm.com \
--cc=akpm@osdl.org \
--cc=balbir@in.ibm.com \
--cc=csturtiv@sgi.com \
--cc=jlan@sgi.com \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox