public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [Patch 2/8] Sync block I/O and swapin delay collection
@ 2006-05-02  6:14 Balbir Singh
  2006-05-08 21:19 ` Andrew Morton
  0 siblings, 1 reply; 12+ messages in thread
From: Balbir Singh @ 2006-05-02  6:14 UTC (permalink / raw)
  To: linux-kernel; +Cc: lse-tech, jlan


Changelog

Fixes comments by akpm
- avoid creating new per-process flag PF_SWAPIN

Other changes
- do not mix spaces and tabs

delayacct-blkio-swapin.patch

Collect per-task block I/O delay statistics.

Unlike earlier iterations of the delay accounting
patches, now delays are only collected for the actual
I/O waits rather than try and cover the delays seen in
I/O submission paths.

Account separately for block I/O delays
incurred as a result of swapin page faults whose
frequency can be affected by the task/process' rss limit.
Hence swapin delays can act as feedback for rss limit changes
independent of I/O priority changes.

Signed-off-by: Shailabh Nagar <nagar@watson.ibm.com>
Signed-off-by: Balbir Singh <balbir@in.ibm.com>
---

 include/linux/delayacct.h |   25 +++++++++++++++++++++++++
 include/linux/sched.h     |    6 ++++++
 kernel/delayacct.c        |   19 +++++++++++++++++++
 kernel/sched.c            |    5 +++++
 mm/memory.c               |    4 ++++
 5 files changed, 59 insertions(+)

diff -puN include/linux/delayacct.h~delayacct-blkio-swapin include/linux/delayacct.h
--- linux-2.6.17-rc3/include/linux/delayacct.h~delayacct-blkio-swapin	2006-04-28 23:48:43.000000000 +0530
+++ linux-2.6.17-rc3-balbir/include/linux/delayacct.h	2006-05-02 07:05:21.000000000 +0530
@@ -19,6 +19,13 @@
 
 #include <linux/sched.h>
 
+/*
+ * Per-task flags relevant to delay accounting
+ * maintained privately to avoid exhausting similar flags in sched.h:PF_*
+ * Used to set current->delays->flags
+ */
+#define DELAYACCT_PF_SWAPIN	0x00000001	/* I am doing a swapin */
+
 #ifdef CONFIG_TASK_DELAY_ACCT
 
 extern int delayacct_on;	/* Delay accounting turned on/off */
@@ -26,6 +33,8 @@ extern kmem_cache_t *delayacct_cache;
 extern void delayacct_init(void);
 extern void __delayacct_tsk_init(struct task_struct *);
 extern void __delayacct_tsk_exit(struct task_struct *);
+extern void __delayacct_blkio_start(void);
+extern void __delayacct_blkio_end(void);
 
 static inline void delayacct_set_flag(int flag)
 {
@@ -53,6 +62,18 @@ static inline void delayacct_tsk_exit(st
 		__delayacct_tsk_exit(tsk);
 }
 
+static inline void delayacct_blkio_start(void)
+{
+	if (current->delays)
+		__delayacct_blkio_start();
+}
+
+static inline void delayacct_blkio_end(void)
+{
+	if (current->delays)
+		__delayacct_blkio_end();
+}
+
 #else
 static inline void delayacct_set_flag(int flag)
 {}
@@ -64,6 +85,10 @@ static inline void delayacct_tsk_init(st
 {}
 static inline void delayacct_tsk_exit(struct task_struct *tsk)
 {}
+static inline void delayacct_blkio_start(void)
+{}
+static inline void delayacct_blkio_end(void)
+{}
 #endif /* CONFIG_TASK_DELAY_ACCT */
 
 #endif
diff -puN include/linux/sched.h~delayacct-blkio-swapin include/linux/sched.h
--- linux-2.6.17-rc3/include/linux/sched.h~delayacct-blkio-swapin	2006-04-28 23:48:43.000000000 +0530
+++ linux-2.6.17-rc3-balbir/include/linux/sched.h	2006-05-02 07:05:25.000000000 +0530
@@ -550,6 +550,12 @@ struct task_delay_info {
 	 * Atomicity of updates to XXX_delay, XXX_count protected by
 	 * single lock above (split into XXX_lock if contention is an issue).
 	 */
+
+	struct timespec blkio_start, blkio_end;	/* Shared by blkio, swapin */
+	u64 blkio_delay;	/* wait for sync block io completion */
+	u64 swapin_delay;	/* wait for swapin block io completion */
+	u32 blkio_count;
+	u32 swapin_count;
 };
 #endif
 
diff -puN kernel/delayacct.c~delayacct-blkio-swapin kernel/delayacct.c
--- linux-2.6.17-rc3/kernel/delayacct.c~delayacct-blkio-swapin	2006-04-28 23:48:43.000000000 +0530
+++ linux-2.6.17-rc3-balbir/kernel/delayacct.c	2006-05-02 07:27:03.000000000 +0530
@@ -85,3 +85,22 @@ static inline void delayacct_end(struct 
 	spin_unlock(&current->delays->lock);
 }
 
+void __delayacct_blkio_start(void)
+{
+	delayacct_start(&current->delays->blkio_start);
+}
+
+void __delayacct_blkio_end(void)
+{
+	if (current->delays->flags & DELAYACCT_PF_SWAPIN)
+		/* Swapin block I/O */
+		delayacct_end(&current->delays->blkio_start,
+			&current->delays->blkio_end,
+			&current->delays->swapin_delay,
+			&current->delays->swapin_count);
+	else	/* Other block I/O */
+		delayacct_end(&current->delays->blkio_start,
+			&current->delays->blkio_end,
+			&current->delays->blkio_delay,
+			&current->delays->blkio_count);
+}
diff -puN kernel/sched.c~delayacct-blkio-swapin kernel/sched.c
--- linux-2.6.17-rc3/kernel/sched.c~delayacct-blkio-swapin	2006-04-28 23:48:43.000000000 +0530
+++ linux-2.6.17-rc3-balbir/kernel/sched.c	2006-05-02 07:05:25.000000000 +0530
@@ -50,6 +50,7 @@
 #include <linux/times.h>
 #include <linux/acct.h>
 #include <linux/kprobes.h>
+#include <linux/delayacct.h>
 #include <asm/tlb.h>
 
 #include <asm/unistd.h>
@@ -4170,9 +4171,11 @@ void __sched io_schedule(void)
 {
 	struct runqueue *rq = &per_cpu(runqueues, raw_smp_processor_id());
 
+	delayacct_blkio_start();
 	atomic_inc(&rq->nr_iowait);
 	schedule();
 	atomic_dec(&rq->nr_iowait);
+	delayacct_blkio_end();
 }
 
 EXPORT_SYMBOL(io_schedule);
@@ -4182,9 +4185,11 @@ long __sched io_schedule_timeout(long ti
 	struct runqueue *rq = &per_cpu(runqueues, raw_smp_processor_id());
 	long ret;
 
+	delayacct_blkio_start();
 	atomic_inc(&rq->nr_iowait);
 	ret = schedule_timeout(timeout);
 	atomic_dec(&rq->nr_iowait);
+	delayacct_blkio_end();
 	return ret;
 }
 
diff -puN mm/memory.c~delayacct-blkio-swapin mm/memory.c
--- linux-2.6.17-rc3/mm/memory.c~delayacct-blkio-swapin	2006-04-28 23:48:43.000000000 +0530
+++ linux-2.6.17-rc3-balbir/mm/memory.c	2006-04-28 23:48:43.000000000 +0530
@@ -48,6 +48,7 @@
 #include <linux/rmap.h>
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/delayacct.h>
 
 #include <asm/pgalloc.h>
 #include <asm/uaccess.h>
@@ -1880,6 +1881,7 @@ static int do_swap_page(struct mm_struct
 
 	entry = pte_to_swp_entry(orig_pte);
 again:
+	delayacct_set_flag(DELAYACCT_PF_SWAPIN);
 	page = lookup_swap_cache(entry);
 	if (!page) {
  		swapin_readahead(entry, address, vma);
@@ -1892,6 +1894,7 @@ again:
 			page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
 			if (likely(pte_same(*page_table, orig_pte)))
 				ret = VM_FAULT_OOM;
+			delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
 			goto unlock;
 		}
 
@@ -1903,6 +1906,7 @@ again:
 
 	mark_page_accessed(page);
 	lock_page(page);
+	delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
 	if (!PageSwapCache(page)) {
 		/* Page migration has occured */
 		unlock_page(page);
_

^ permalink raw reply	[flat|nested] 12+ messages in thread
* [Patch 0/8] per-task delay accounting
@ 2006-04-22  2:16 Shailabh Nagar
  2006-04-22  2:29 ` [Patch 2/8] Sync block I/O and swapin delay collection Shailabh Nagar
  0 siblings, 1 reply; 12+ messages in thread
From: Shailabh Nagar @ 2006-04-22  2:16 UTC (permalink / raw)
  To: linux-kernel
  Cc: LSE, Jes Sorensen, Peter Chubb, Erich Focht, Levent Serinol,
	Jay Lan



Here are the delay accounting patches again. I'm not using the
earlier email thread due to code being refactored a bit.

The previous posting
    http://www.uwsg.indiana.edu/hypermail/linux/kernel/0603.3/1776.html
of these patches elicited several review comments from Andrew Morton
all of which have been addressed.

The other main thread of the comments was whether other accounting
stakeholders would be ok with this interface. Towards this end,
I'd posted an overview of what the other packages do (which didn't seem
to make the archives) and some of the stakeholders responded.

I'll repost the analysis as a reply to this post. Meanwhile, here's
the list of the stakeholders identified by Andrew and a summary of status
of their comments.


1. CSA accounting/PAGG/JOB: Jay Lan <jlan@engr.sgi.com>

Raised several points
       http://www.uwsg.indiana.edu/hypermail/linux/kernel/0604.1/0397.html
all of which have been addressed in this set of patches.

2. per-process IO statistics: Levent Serinol <lserinol@gmail.com>

No reponse.
I'd ascertained that its needs are a subset of CSA.

3. per-cpu time statistics: Erich Focht <efocht@ess.nec.de>

No response.
I'd ascertained that its needs can be met by taskstats
interface whenever these statistics are submitted for inclusion.

4. Microstate accounting: Peter Chubb <peterc@gelato.unsw.edu.au>

Mentioned overlap of patches with delay accounting
http://www.uwsg.indiana.edu/hypermail/linux/kernel/0603.3/2286.html

and also that a /proc interface was preferable due to convenience.
My position is that the netlink interface is a superset of /proc due to
former's ability to supply exit-time data.


5. ELSA:  Guillaume Thouvenin <guillaume.thouvenin@bull.net>

Confirmed that ELSA is not a direct user of a new kernel statistics
interface since it is a consumer of CSA or BSD accounting's statistics.


6. pnotify: Jes Sorensen <jes@sgi.com>
(taken over pnotify from Erik Jacobson)

Informed over private email that pnotify replacement is
being worked on.

I'd ascertained that pnotify (or its replacemenent) will not be
concerned with exporting data to userspace or collecting any stats.
Thats left to the kernel module that uses pnotify to get
notifications. CSA is one expected user of pnotify.
Hence CSA's concerns are the only ones relevant to pnotify as well.


7. Scalable statistics counters with /proc reporting:
 Ravikiran G Thirumalai, Dipankar Sarma <dipankar@in.ibm.com>

Confirmed these counters aren't relevant to this discussion.



--Shailabh


Series

delayacct-setup.patch
delayacct-blkio-swapin.patch
delayacct-schedstats.patch
genetlink-utils.patch
taskstats-setup.patch
delayacct-taskstats.patch
delayacct-doc.patch
delayacct-procfs.patch

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2006-05-10 10:24 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-05-02  6:14 [Patch 2/8] Sync block I/O and swapin delay collection Balbir Singh
2006-05-08 21:19 ` Andrew Morton
2006-05-09  3:53   ` Balbir Singh
2006-05-09  4:23     ` Nick Piggin
2006-05-09  5:45       ` Balbir Singh
2006-05-09  5:57         ` Nick Piggin
2006-05-09  8:06           ` Balbir Singh
2006-05-09  8:20             ` Nick Piggin
2006-05-09 17:27               ` Balbir Singh
2006-05-10  0:15                 ` Nick Piggin
2006-05-10 10:20   ` [PATCH][delayacct] Add comments on units for the delay fields (was Re: [Patch 2/8] Sync block I/O and swapin delay collection) Balbir Singh
  -- strict thread matches above, loose matches on Subject: below --
2006-04-22  2:16 [Patch 0/8] per-task delay accounting Shailabh Nagar
2006-04-22  2:29 ` [Patch 2/8] Sync block I/O and swapin delay collection Shailabh Nagar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox