From: Shailabh Nagar <nagar@watson.ibm.com>
To: linux-kernel <linux-kernel@vger.kernel.org>
Cc: LSE <lse-tech@lists.sourceforge.net>, Jay Lan <jlan@engr.sgi.com>
Subject: [Patch 2/8] Sync block I/O and swapin delay collection
Date: Fri, 21 Apr 2006 22:29:36 -0400 [thread overview]
Message-ID: <44499510.9040501@watson.ibm.com> (raw)
In-Reply-To: <444991EF.3080708@watson.ibm.com>
Changelog
Fixes comments by akpm
- avoid creating new per-process flag PF_SWAPIN
delayacct-blkio-swapin.patch
Collect per-task block I/O delay statistics.
Unlike earlier iterations of the delay accounting
patches, now delays are only collected for the actual
I/O waits rather than try and cover the delays seen in
I/O submission paths.
Account separately for block I/O delays
incurred as a result of swapin page faults whose
frequency can be affected by the task/process' rss limit.
Hence swapin delays can act as feedback for rss limit changes
independent of I/O priority changes.
Signed-off-by: Shailabh Nagar <nagar@watson.ibm.com>
include/linux/delayacct.h | 25 +++++++++++++++++++++++++
include/linux/sched.h | 6 ++++++
kernel/delayacct.c | 19 +++++++++++++++++++
kernel/sched.c | 5 +++++
mm/memory.c | 4 ++++
5 files changed, 59 insertions(+)
Index: linux-2.6.17-rc1/include/linux/delayacct.h
===================================================================
--- linux-2.6.17-rc1.orig/include/linux/delayacct.h 2006-04-21 22:27:18.000000000 -0400
+++ linux-2.6.17-rc1/include/linux/delayacct.h 2006-04-21 22:27:19.000000000 -0400
@@ -19,6 +19,13 @@
#include <linux/sched.h>
+/*
+ * Per-task flags relevant to delay accounting
+ * maintained privately to avoid exhausting similar flags in sched.h:PF_*
+ * Used to set current->delays->flags
+ */
+#define DELAYACCT_PF_SWAPIN 0x00000001 /* I am doing a swapin */
+
#ifdef CONFIG_TASK_DELAY_ACCT
extern int delayacct_on; /* Delay accounting turned on/off */
@@ -26,6 +33,8 @@ extern kmem_cache_t *delayacct_cache;
extern void delayacct_init(void);
extern void __delayacct_tsk_init(struct task_struct *);
extern void __delayacct_tsk_exit(struct task_struct *);
+extern void __delayacct_blkio_start(void);
+extern void __delayacct_blkio_end(void);
static inline void delayacct_set_flag(int flag)
{
@@ -53,6 +62,18 @@ static inline void delayacct_tsk_exit(st
__delayacct_tsk_exit(tsk);
}
+static inline void delayacct_blkio_start(void)
+{
+ if (current->delays)
+ __delayacct_blkio_start();
+}
+
+static inline void delayacct_blkio_end(void)
+{
+ if (current->delays)
+ __delayacct_blkio_end();
+}
+
#else
static inline void delayacct_set_flag(int flag)
{}
@@ -64,6 +85,10 @@ static inline void delayacct_tsk_init(st
{}
static inline void delayacct_tsk_exit(struct task_struct *tsk)
{}
+static inline void delayacct_blkio_start(void)
+{}
+static inline void delayacct_blkio_end(void)
+{}
#endif /* CONFIG_TASK_DELAY_ACCT */
#endif
Index: linux-2.6.17-rc1/kernel/delayacct.c
===================================================================
--- linux-2.6.17-rc1.orig/kernel/delayacct.c 2006-04-21 22:27:18.000000000 -0400
+++ linux-2.6.17-rc1/kernel/delayacct.c 2006-04-21 22:27:19.000000000 -0400
@@ -85,3 +85,22 @@ static inline void delayacct_end(struct
spin_unlock(¤t->delays->lock);
}
+void __delayacct_blkio_start(void)
+{
+ delayacct_start(¤t->delays->blkio_start);
+}
+
+void __delayacct_blkio_end(void)
+{
+ if (current->delays->flags & DELAYACCT_PF_SWAPIN)
+ /* Swapin block I/O */
+ delayacct_end(¤t->delays->blkio_start,
+ ¤t->delays->blkio_end,
+ ¤t->delays->swapin_delay,
+ ¤t->delays->swapin_count);
+ else /* Other block I/O */
+ delayacct_end(¤t->delays->blkio_start,
+ ¤t->delays->blkio_end,
+ ¤t->delays->blkio_delay,
+ ¤t->delays->blkio_count);
+}
Index: linux-2.6.17-rc1/include/linux/sched.h
===================================================================
--- linux-2.6.17-rc1.orig/include/linux/sched.h 2006-04-21 22:27:18.000000000 -0400
+++ linux-2.6.17-rc1/include/linux/sched.h 2006-04-21 22:27:19.000000000 -0400
@@ -550,6 +550,12 @@ struct task_delay_info {
* Atomicity of updates to XXX_delay, XXX_count protected by
* single lock above (split into XXX_lock if contention is an issue).
*/
+
+ struct timespec blkio_start, blkio_end; /* Shared by blkio, swapin */
+ u64 blkio_delay; /* wait for sync block io completion */
+ u64 swapin_delay; /* wait for swapin block io completion */
+ u32 blkio_count;
+ u32 swapin_count;
};
#endif
Index: linux-2.6.17-rc1/kernel/sched.c
===================================================================
--- linux-2.6.17-rc1.orig/kernel/sched.c 2006-04-21 22:27:18.000000000 -0400
+++ linux-2.6.17-rc1/kernel/sched.c 2006-04-21 22:27:19.000000000 -0400
@@ -50,6 +50,7 @@
#include <linux/times.h>
#include <linux/acct.h>
#include <linux/kprobes.h>
+#include <linux/delayacct.h>
#include <asm/tlb.h>
#include <asm/unistd.h>
@@ -4144,9 +4145,11 @@ void __sched io_schedule(void)
{
struct runqueue *rq = &per_cpu(runqueues, raw_smp_processor_id());
+ delayacct_blkio_start();
atomic_inc(&rq->nr_iowait);
schedule();
atomic_dec(&rq->nr_iowait);
+ delayacct_blkio_end();
}
EXPORT_SYMBOL(io_schedule);
@@ -4156,9 +4159,11 @@ long __sched io_schedule_timeout(long ti
struct runqueue *rq = &per_cpu(runqueues, raw_smp_processor_id());
long ret;
+ delayacct_blkio_start();
atomic_inc(&rq->nr_iowait);
ret = schedule_timeout(timeout);
atomic_dec(&rq->nr_iowait);
+ delayacct_blkio_end();
return ret;
}
Index: linux-2.6.17-rc1/mm/memory.c
===================================================================
--- linux-2.6.17-rc1.orig/mm/memory.c 2006-04-21 22:27:18.000000000 -0400
+++ linux-2.6.17-rc1/mm/memory.c 2006-04-21 22:27:19.000000000 -0400
@@ -48,6 +48,7 @@
#include <linux/rmap.h>
#include <linux/module.h>
#include <linux/init.h>
+#include <linux/delayacct.h>
#include <asm/pgalloc.h>
#include <asm/uaccess.h>
@@ -1880,6 +1881,7 @@ static int do_swap_page(struct mm_struct
entry = pte_to_swp_entry(orig_pte);
again:
+ delayacct_set_flag(DELAYACCT_PF_SWAPIN);
page = lookup_swap_cache(entry);
if (!page) {
swapin_readahead(entry, address, vma);
@@ -1892,6 +1894,7 @@ again:
page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
if (likely(pte_same(*page_table, orig_pte)))
ret = VM_FAULT_OOM;
+ delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
goto unlock;
}
@@ -1903,6 +1906,7 @@ again:
mark_page_accessed(page);
lock_page(page);
+ delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
if (!PageSwapCache(page)) {
/* Page migration has occured */
unlock_page(page);
next prev parent reply other threads:[~2006-04-22 21:18 UTC|newest]
Thread overview: 33+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-04-22 2:16 [Patch 0/8] per-task delay accounting Shailabh Nagar
2006-04-22 2:23 ` [Patch 1/8] Setup Shailabh Nagar
2006-04-24 2:02 ` Randy.Dunlap
2006-04-24 17:26 ` Shailabh Nagar
2006-04-22 2:29 ` Shailabh Nagar [this message]
2006-04-22 2:33 ` [Patch 3/8] cpu delay collection via schedstats Shailabh Nagar
2006-04-22 2:35 ` [Patch 4/8] Utilities for genetlink usage Shailabh Nagar
2006-04-22 2:35 ` Shailabh Nagar
2006-04-22 2:37 ` [Patch 5/8] taskstats interface Shailabh Nagar
2006-04-27 1:12 ` Jay Lan
2006-04-27 4:00 ` Shailabh Nagar
2006-04-27 6:42 ` [Lse-tech] " Balbir Singh
2006-04-27 17:52 ` Jay Lan
2006-04-27 18:27 ` Balbir Singh
2006-04-27 19:34 ` Jay Lan
2006-04-28 2:59 ` Balbir Singh
2006-04-28 18:20 ` Jay Lan
2006-04-28 18:35 ` Balbir Singh
2006-04-22 2:39 ` [Patch 6/8] delay accounting usage of " Shailabh Nagar
2006-04-22 2:40 ` [Patch 7/8] documentation Shailabh Nagar
2006-04-22 2:42 ` [Patch 8/8] /proc export of aggregated block I/O delays Shailabh Nagar
2006-04-22 7:46 ` [Lse-tech] " Andi Kleen
2006-04-25 15:07 ` [Patch 0/8] per-task delay accounting Shailabh Nagar
-- strict thread matches above, loose matches on Subject: below --
2006-05-02 6:14 [Patch 2/8] Sync block I/O and swapin delay collection Balbir Singh
2006-05-08 21:19 ` Andrew Morton
2006-05-09 3:53 ` Balbir Singh
2006-05-09 4:23 ` Nick Piggin
2006-05-09 5:45 ` Balbir Singh
2006-05-09 5:57 ` Nick Piggin
2006-05-09 8:06 ` Balbir Singh
2006-05-09 8:20 ` Nick Piggin
2006-05-09 17:27 ` Balbir Singh
2006-05-10 0:15 ` Nick Piggin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=44499510.9040501@watson.ibm.com \
--to=nagar@watson.ibm.com \
--cc=jlan@engr.sgi.com \
--cc=linux-kernel@vger.kernel.org \
--cc=lse-tech@lists.sourceforge.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.