public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [Patch 3/8] cpu delay collection via schedstats
@ 2006-05-02  6:15 Balbir Singh
  2006-05-08 21:26 ` Andrew Morton
  0 siblings, 1 reply; 5+ messages in thread
From: Balbir Singh @ 2006-05-02  6:15 UTC (permalink / raw)
  To: linux-kernel; +Cc: lse-tech, jlan


Changelog

Fixes comments by akpm
- comments about locking used in rq_sched_info_arrive/depart

No fix needed/possible
- redundant extern declaration of delayacct_on in sched.h
suggested location (delayacct.h) cannot be used as it includes sched.h
extern declaration moved to where its needed
- move unlikely declaration inside sched_info_on
Function only returns constants. Cannot be done.
- removal of #if defined in sched_fork (Dave Hansen)
Refactoring suggested does not work if only SCHEDSTATS is configured

delayacct-shedstats.patch

Make the task-related schedstats functions
callable by delay accounting even if schedstats
collection isn't turned on. This removes the
dependency of delay accounting on schedstats.

Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com>
Signed-off-by: Shailabh Nagar <nagar@watson.ibm.com>
Signed-off-by: Balbir Singh <balbir@in.ibm.com>
---

 include/linux/sched.h |   20 ++++++++++++++---
 kernel/sched.c        |   58 +++++++++++++++++++++++++++++++++++---------------
 2 files changed, 58 insertions(+), 20 deletions(-)

diff -puN include/linux/sched.h~delayacct-schedstats include/linux/sched.h
--- linux-2.6.17-rc3/include/linux/sched.h~delayacct-schedstats	2006-05-02 07:31:18.000000000 +0530
+++ linux-2.6.17-rc3-balbir/include/linux/sched.h	2006-05-02 07:34:27.000000000 +0530
@@ -521,7 +521,7 @@ typedef struct prio_array prio_array_t;
 struct backing_dev_info;
 struct reclaim_state;
 
-#ifdef CONFIG_SCHEDSTATS
+#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
 struct sched_info {
 	/* cumulative counters */
 	unsigned long	cpu_time,	/* time spent on the cpu */
@@ -532,9 +532,11 @@ struct sched_info {
 	unsigned long	last_arrival,	/* when we last ran on a cpu */
 			last_queued;	/* when we were last queued to run */
 };
+#endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */
 
+#ifdef CONFIG_SCHEDSTATS
 extern struct file_operations proc_schedstat_operations;
-#endif
+#endif /* CONFIG_SCHEDSTATS */
 
 #ifdef CONFIG_TASK_DELAY_ACCT
 struct task_delay_info {
@@ -557,7 +559,19 @@ struct task_delay_info {
 	u32 blkio_count;
 	u32 swapin_count;
 };
+#endif	/* CONFIG_TASK_DELAY_ACCT */
+
+static inline int sched_info_on(void)
+{
+#ifdef CONFIG_SCHEDSTATS
+	return 1;
+#elif defined(CONFIG_TASK_DELAY_ACCT)
+	extern int delayacct_on;
+	return delayacct_on;
+#else
+	return 0;
 #endif
+}
 
 enum idle_type
 {
@@ -744,7 +758,7 @@ struct task_struct {
 	cpumask_t cpus_allowed;
 	unsigned int time_slice, first_time_slice;
 
-#ifdef CONFIG_SCHEDSTATS
+#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
 	struct sched_info sched_info;
 #endif
 
diff -puN kernel/sched.c~delayacct-schedstats kernel/sched.c
--- linux-2.6.17-rc3/kernel/sched.c~delayacct-schedstats	2006-05-02 07:31:18.000000000 +0530
+++ linux-2.6.17-rc3-balbir/kernel/sched.c	2006-05-02 07:31:18.000000000 +0530
@@ -469,9 +469,34 @@ struct file_operations proc_schedstat_op
 	.release = single_release,
 };
 
+/*
+ * Expects runqueue lock to be held for atomicity of update
+ */
+static inline void rq_sched_info_arrive(struct runqueue *rq,
+						unsigned long diff)
+{
+	if (rq) {
+		rq->rq_sched_info.run_delay += diff;
+		rq->rq_sched_info.pcnt++;
+	}
+}
+
+/*
+ * Expects runqueue lock to be held for atomicity of update
+ */
+static inline void rq_sched_info_depart(struct runqueue *rq,
+						unsigned long diff)
+{
+	if (rq)
+		rq->rq_sched_info.cpu_time += diff;
+}
 # define schedstat_inc(rq, field)	do { (rq)->field++; } while (0)
 # define schedstat_add(rq, field, amt)	do { (rq)->field += (amt); } while (0)
 #else /* !CONFIG_SCHEDSTATS */
+static inline void rq_sched_info_arrive(struct runqueue *rq, unsigned long diff)
+{}
+static inline void rq_sched_info_depart(struct runqueue *rq, unsigned long diff)
+{}
 # define schedstat_inc(rq, field)	do { } while (0)
 # define schedstat_add(rq, field, amt)	do { } while (0)
 #endif
@@ -491,7 +516,7 @@ static inline runqueue_t *this_rq_lock(v
 	return rq;
 }
 
-#ifdef CONFIG_SCHEDSTATS
+#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
 /*
  * Called when a process is dequeued from the active array and given
  * the cpu.  We should note that with the exception of interactive
@@ -520,7 +545,6 @@ static inline void sched_info_dequeued(t
 static void sched_info_arrive(task_t *t)
 {
 	unsigned long now = jiffies, diff = 0;
-	struct runqueue *rq = task_rq(t);
 
 	if (t->sched_info.last_queued)
 		diff = now - t->sched_info.last_queued;
@@ -529,11 +553,7 @@ static void sched_info_arrive(task_t *t)
 	t->sched_info.last_arrival = now;
 	t->sched_info.pcnt++;
 
-	if (!rq)
-		return;
-
-	rq->rq_sched_info.run_delay += diff;
-	rq->rq_sched_info.pcnt++;
+	rq_sched_info_arrive(task_rq(t), diff);
 }
 
 /*
@@ -553,8 +573,9 @@ static void sched_info_arrive(task_t *t)
  */
 static inline void sched_info_queued(task_t *t)
 {
-	if (!t->sched_info.last_queued)
-		t->sched_info.last_queued = jiffies;
+	if (unlikely(sched_info_on()))
+		if (!t->sched_info.last_queued)
+			t->sched_info.last_queued = jiffies;
 }
 
 /*
@@ -563,13 +584,10 @@ static inline void sched_info_queued(tas
  */
 static inline void sched_info_depart(task_t *t)
 {
-	struct runqueue *rq = task_rq(t);
 	unsigned long diff = jiffies - t->sched_info.last_arrival;
 
 	t->sched_info.cpu_time += diff;
-
-	if (rq)
-		rq->rq_sched_info.cpu_time += diff;
+	rq_sched_info_depart(task_rq(t), diff);
 }
 
 /*
@@ -577,7 +595,7 @@ static inline void sched_info_depart(tas
  * their time slice.  (This may also be called when switching to or from
  * the idle task.)  We are only called when prev != next.
  */
-static inline void sched_info_switch(task_t *prev, task_t *next)
+static inline void __sched_info_switch(task_t *prev, task_t *next)
 {
 	struct runqueue *rq = task_rq(prev);
 
@@ -592,10 +610,15 @@ static inline void sched_info_switch(tas
 	if (next != rq->idle)
 		sched_info_arrive(next);
 }
+static inline void sched_info_switch(task_t *prev, task_t *next)
+{
+	if (unlikely(sched_info_on()))
+		__sched_info_switch(prev, next);
+}
 #else
 #define sched_info_queued(t)		do { } while (0)
 #define sched_info_switch(t, next)	do { } while (0)
-#endif /* CONFIG_SCHEDSTATS */
+#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
 
 /*
  * Adding/removing a task to/from a priority array:
@@ -1393,8 +1416,9 @@ void fastcall sched_fork(task_t *p, int 
 	p->state = TASK_RUNNING;
 	INIT_LIST_HEAD(&p->run_list);
 	p->array = NULL;
-#ifdef CONFIG_SCHEDSTATS
-	memset(&p->sched_info, 0, sizeof(p->sched_info));
+#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
+	if (unlikely(sched_info_on()))
+		memset(&p->sched_info, 0, sizeof(p->sched_info));
 #endif
 #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
 	p->oncpu = 0;
_

^ permalink raw reply	[flat|nested] 5+ messages in thread
* [Patch 0/8] per-task delay accounting
@ 2006-04-22  2:16 Shailabh Nagar
  2006-04-22  2:33 ` [Patch 3/8] cpu delay collection via schedstats Shailabh Nagar
  0 siblings, 1 reply; 5+ messages in thread
From: Shailabh Nagar @ 2006-04-22  2:16 UTC (permalink / raw)
  To: linux-kernel
  Cc: LSE, Jes Sorensen, Peter Chubb, Erich Focht, Levent Serinol,
	Jay Lan



Here are the delay accounting patches again. I'm not using the
earlier email thread due to code being refactored a bit.

The previous posting
    http://www.uwsg.indiana.edu/hypermail/linux/kernel/0603.3/1776.html
of these patches elicited several review comments from Andrew Morton
all of which have been addressed.

The other main thread of the comments was whether other accounting
stakeholders would be ok with this interface. Towards this end,
I'd posted an overview of what the other packages do (which didn't seem
to make the archives) and some of the stakeholders responded.

I'll repost the analysis as a reply to this post. Meanwhile, here's
the list of the stakeholders identified by Andrew and a summary of status
of their comments.


1. CSA accounting/PAGG/JOB: Jay Lan <jlan@engr.sgi.com>

Raised several points
       http://www.uwsg.indiana.edu/hypermail/linux/kernel/0604.1/0397.html
all of which have been addressed in this set of patches.

2. per-process IO statistics: Levent Serinol <lserinol@gmail.com>

No reponse.
I'd ascertained that its needs are a subset of CSA.

3. per-cpu time statistics: Erich Focht <efocht@ess.nec.de>

No response.
I'd ascertained that its needs can be met by taskstats
interface whenever these statistics are submitted for inclusion.

4. Microstate accounting: Peter Chubb <peterc@gelato.unsw.edu.au>

Mentioned overlap of patches with delay accounting
http://www.uwsg.indiana.edu/hypermail/linux/kernel/0603.3/2286.html

and also that a /proc interface was preferable due to convenience.
My position is that the netlink interface is a superset of /proc due to
former's ability to supply exit-time data.


5. ELSA:  Guillaume Thouvenin <guillaume.thouvenin@bull.net>

Confirmed that ELSA is not a direct user of a new kernel statistics
interface since it is a consumer of CSA or BSD accounting's statistics.


6. pnotify: Jes Sorensen <jes@sgi.com>
(taken over pnotify from Erik Jacobson)

Informed over private email that pnotify replacement is
being worked on.

I'd ascertained that pnotify (or its replacemenent) will not be
concerned with exporting data to userspace or collecting any stats.
Thats left to the kernel module that uses pnotify to get
notifications. CSA is one expected user of pnotify.
Hence CSA's concerns are the only ones relevant to pnotify as well.


7. Scalable statistics counters with /proc reporting:
 Ravikiran G Thirumalai, Dipankar Sarma <dipankar@in.ibm.com>

Confirmed these counters aren't relevant to this discussion.



--Shailabh


Series

delayacct-setup.patch
delayacct-blkio-swapin.patch
delayacct-schedstats.patch
genetlink-utils.patch
taskstats-setup.patch
delayacct-taskstats.patch
delayacct-doc.patch
delayacct-procfs.patch

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2006-05-10 10:28 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-05-02  6:15 [Patch 3/8] cpu delay collection via schedstats Balbir Singh
2006-05-08 21:26 ` Andrew Morton
2006-05-09  4:00   ` Balbir Singh
2006-05-10 10:24   ` [PATCH][delayacct] Use better names in schedstats (was Re: [Patch 3/8] cpu delay collection via schedstats) Balbir Singh
  -- strict thread matches above, loose matches on Subject: below --
2006-04-22  2:16 [Patch 0/8] per-task delay accounting Shailabh Nagar
2006-04-22  2:33 ` [Patch 3/8] cpu delay collection via schedstats Shailabh Nagar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox