* [PATCH] Optimize struct task_delay_info
@ 2007-07-11 7:13 Zhang, Yanmin
2007-07-11 11:46 ` Balbir Singh
` (2 more replies)
0 siblings, 3 replies; 9+ messages in thread
From: Zhang, Yanmin @ 2007-07-11 7:13 UTC (permalink / raw)
To: nagar; +Cc: LKML
struct task_delay_info is used by per process block I/O delay statistics
feature which is useful in kernel. This struct is not optimized.
My patch against kernel 2.6.22 shrinks it a half.
1) Delete blkio_start and blkio_end. As the collection happens in
io_schedule and io_schedule_timeout, we use local variables to
replace them;
2) Delete lock. The change to the protected data has no nested cases.
In addition, the result is for performance data collection, so it’s
unnecessary to add such lock.
3) Delete flags. It just has one value. Use the most significant bit of
blkio_delay (64 bits) to mark it..
Signed-off-by: Zhang Yanmin <yanmin.zhang@intel.com>
---
diff -Nraup linux-2.6.22/include/linux/delayacct.h linux-2.6.22_blkio_delay/include/linux/delayacct.h
--- linux-2.6.22/include/linux/delayacct.h 2007-07-09 07:32:17.000000000 +0800
+++ linux-2.6.22_blkio_delay/include/linux/delayacct.h 2007-07-11 05:56:58.000000000 +0800
@@ -23,9 +23,9 @@
/*
* Per-task flags relevant to delay accounting
* maintained privately to avoid exhausting similar flags in sched.h:PF_*
- * Used to set current->delays->flags
+ * Used to set the most significant bit of current>delays->blkio_delay
*/
-#define DELAYACCT_PF_SWAPIN 0x00000001 /* I am doing a swapin */
+#define DELAYACCT_PF_SWAPIN (0x1LLU<<63) /* I am doing a swapin */
#ifdef CONFIG_TASK_DELAY_ACCT
@@ -39,16 +39,25 @@ extern void __delayacct_blkio_end(void);
extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);
extern __u64 __delayacct_blkio_ticks(struct task_struct *);
-static inline void delayacct_set_flag(int flag)
+static inline int delayacct_is_swapin(void)
{
if (current->delays)
- current->delays->flags |= flag;
+ return current->delays->blkio_delay & DELAYACCT_PF_SWAPIN
+ ? 1 : 0;
+ else
+ return 0;
+}
+
+static inline void delayacct_set_swapin(void)
+{
+ if (current->delays)
+ current->delays->blkio_delay |= DELAYACCT_PF_SWAPIN;
}
-static inline void delayacct_clear_flag(int flag)
+static inline void delayacct_clear_swapin(void)
{
if (current->delays)
- current->delays->flags &= ~flag;
+ current->delays->blkio_delay |= DELAYACCT_PF_SWAPIN;
}
static inline void delayacct_tsk_init(struct task_struct *tsk)
@@ -96,10 +105,19 @@ static inline __u64 delayacct_blkio_tick
return 0;
}
+void delayacct_start(struct timespec *start);
+void delayacct_end(struct timespec *start);
+
+#define DECLARE_DELAYACCT_START_VAR struct timespec delayacct_start_time
+#define DELAYACCT_START delayacct_start(&delayacct_start_time)
+#define DELAYACCT_END delayacct_end(&delayacct_start_time)
+
#else
-static inline void delayacct_set_flag(int flag)
+static inline int delayacct_is_swapin(void)
+{ return 0; }
+static inline void delayacct_set_swapin(void)
{}
-static inline void delayacct_clear_flag(int flag)
+static inline void delayacct_clear_swapin(void)
{}
static inline void delayacct_init(void)
{}
@@ -107,15 +125,21 @@ static inline void delayacct_tsk_init(st
{}
static inline void delayacct_tsk_free(struct task_struct *tsk)
{}
-static inline void delayacct_blkio_start(void)
-{}
-static inline void delayacct_blkio_end(void)
-{}
static inline int delayacct_add_tsk(struct taskstats *d,
struct task_struct *tsk)
{ return 0; }
static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
{ return 0; }
+
+static inline void delayacct_start(struct timespec *start)
+{}
+static inline void delayacct_end(struct timespec *start)
+{}
+
+#define DECLARE_DELAYACCT_START_VAR
+#define DELAYACCT_START
+#define DELAYACCT_END
+
#endif /* CONFIG_TASK_DELAY_ACCT */
#endif
diff -Nraup linux-2.6.22/include/linux/sched.h linux-2.6.22_blkio_delay/include/linux/sched.h
--- linux-2.6.22/include/linux/sched.h 2007-07-09 07:32:17.000000000 +0800
+++ linux-2.6.22_blkio_delay/include/linux/sched.h 2007-07-11 04:59:48.000000000 +0800
@@ -599,25 +599,11 @@ extern const struct file_operations proc
#ifdef CONFIG_TASK_DELAY_ACCT
struct task_delay_info {
- spinlock_t lock;
- unsigned int flags; /* Private per-task flags */
-
- /* For each stat XXX, add following, aligned appropriately
- *
- * struct timespec XXX_start, XXX_end;
- * u64 XXX_delay;
- * u32 XXX_count;
- *
- * Atomicity of updates to XXX_delay, XXX_count protected by
- * single lock above (split into XXX_lock if contention is an issue).
- */
-
/*
* XXX_count is incremented on every XXX operation, the delay
* associated with the operation is added to XXX_delay.
* XXX_delay contains the accumulated delay time in nanoseconds.
*/
- struct timespec blkio_start, blkio_end; /* Shared by blkio, swapin */
u64 blkio_delay; /* wait for sync block io completion */
u64 swapin_delay; /* wait for swapin block io completion */
u32 blkio_count; /* total count of the number of sync block */
diff -Nraup linux-2.6.22/kernel/delayacct.c linux-2.6.22_blkio_delay/kernel/delayacct.c
--- linux-2.6.22/kernel/delayacct.c 2007-07-09 07:32:17.000000000 +0800
+++ linux-2.6.22_blkio_delay/kernel/delayacct.c 2007-07-11 06:03:42.000000000 +0800
@@ -38,8 +38,6 @@ void delayacct_init(void)
void __delayacct_tsk_init(struct task_struct *tsk)
{
tsk->delays = kmem_cache_zalloc(delayacct_cache, GFP_KERNEL);
- if (tsk->delays)
- spin_lock_init(&tsk->delays->lock);
}
/*
@@ -47,53 +45,34 @@ void __delayacct_tsk_init(struct task_st
* its starting timestamp (@start)
*/
-static inline void delayacct_start(struct timespec *start)
+void delayacct_start(struct timespec *start)
{
do_posix_clock_monotonic_gettime(start);
}
/*
* Finish delay accounting for a statistic using
- * its timestamps (@start, @end), accumalator (@total) and @count
+ * its timestamps (@start)
*/
-static void delayacct_end(struct timespec *start, struct timespec *end,
- u64 *total, u32 *count)
+void delayacct_end(struct timespec *start)
{
- struct timespec ts;
+ struct timespec end, ts;
s64 ns;
- unsigned long flags;
- do_posix_clock_monotonic_gettime(end);
- ts = timespec_sub(*end, *start);
+ do_posix_clock_monotonic_gettime(&end);
+ ts = timespec_sub(end, *start);
ns = timespec_to_ns(&ts);
if (ns < 0)
return;
- spin_lock_irqsave(¤t->delays->lock, flags);
- *total += ns;
- (*count)++;
- spin_unlock_irqrestore(¤t->delays->lock, flags);
-}
-
-void __delayacct_blkio_start(void)
-{
- delayacct_start(¤t->delays->blkio_start);
-}
-
-void __delayacct_blkio_end(void)
-{
- if (current->delays->flags & DELAYACCT_PF_SWAPIN)
- /* Swapin block I/O */
- delayacct_end(¤t->delays->blkio_start,
- ¤t->delays->blkio_end,
- ¤t->delays->swapin_delay,
- ¤t->delays->swapin_count);
- else /* Other block I/O */
- delayacct_end(¤t->delays->blkio_start,
- ¤t->delays->blkio_end,
- ¤t->delays->blkio_delay,
- ¤t->delays->blkio_count);
+ if (delayacct_is_swapin()) {
+ current->delays->swapin_delay += ns;
+ current->delays->swapin_count ++;
+ } else { /* Other block I/O */
+ current->delays->blkio_delay += ns;
+ current->delays->blkio_count ++;
+ }
}
int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
@@ -101,7 +80,6 @@ int __delayacct_add_tsk(struct taskstats
s64 tmp;
struct timespec ts;
unsigned long t1,t2,t3;
- unsigned long flags;
/* Though tsk->delays accessed later, early exit avoids
* unnecessary returning of other data
@@ -134,14 +112,12 @@ int __delayacct_add_tsk(struct taskstats
/* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
- spin_lock_irqsave(&tsk->delays->lock, flags);
- tmp = d->blkio_delay_total + tsk->delays->blkio_delay;
+ tmp = d->blkio_delay_total + ( tsk->delays->blkio_delay & ~DELAYACCT_PF_SWAPIN);
d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
d->blkio_count += tsk->delays->blkio_count;
d->swapin_count += tsk->delays->swapin_count;
- spin_unlock_irqrestore(&tsk->delays->lock, flags);
done:
return 0;
@@ -150,12 +126,9 @@ done:
__u64 __delayacct_blkio_ticks(struct task_struct *tsk)
{
__u64 ret;
- unsigned long flags;
- spin_lock_irqsave(&tsk->delays->lock, flags);
- ret = nsec_to_clock_t(tsk->delays->blkio_delay +
- tsk->delays->swapin_delay);
- spin_unlock_irqrestore(&tsk->delays->lock, flags);
+ ret = nsec_to_clock_t( (tsk->delays->blkio_delay & ~DELAYACCT_PF_SWAPIN)
+ + tsk->delays->swapin_delay);
return ret;
}
diff -Nraup linux-2.6.22/kernel/sched.c linux-2.6.22_blkio_delay/kernel/sched.c
--- linux-2.6.22/kernel/sched.c 2007-07-09 07:32:17.000000000 +0800
+++ linux-2.6.22_blkio_delay/kernel/sched.c 2007-07-11 05:40:25.000000000 +0800
@@ -4862,12 +4862,13 @@ EXPORT_SYMBOL(yield);
void __sched io_schedule(void)
{
struct rq *rq = &__raw_get_cpu_var(runqueues);
+ DECLARE_DELAYACCT_START_VAR;
- delayacct_blkio_start();
+ DELAYACCT_START;
atomic_inc(&rq->nr_iowait);
schedule();
atomic_dec(&rq->nr_iowait);
- delayacct_blkio_end();
+ DELAYACCT_END;
}
EXPORT_SYMBOL(io_schedule);
@@ -4875,12 +4876,13 @@ long __sched io_schedule_timeout(long ti
{
struct rq *rq = &__raw_get_cpu_var(runqueues);
long ret;
+ DECLARE_DELAYACCT_START_VAR;
- delayacct_blkio_start();
+ DELAYACCT_START;
atomic_inc(&rq->nr_iowait);
ret = schedule_timeout(timeout);
atomic_dec(&rq->nr_iowait);
- delayacct_blkio_end();
+ DELAYACCT_END;
return ret;
}
diff -Nraup linux-2.6.22/mm/memory.c linux-2.6.22_blkio_delay/mm/memory.c
--- linux-2.6.22/mm/memory.c 2007-07-09 07:32:17.000000000 +0800
+++ linux-2.6.22_blkio_delay/mm/memory.c 2007-07-11 05:41:53.000000000 +0800
@@ -2134,7 +2134,7 @@ static int do_swap_page(struct mm_struct
migration_entry_wait(mm, pmd, address);
goto out;
}
- delayacct_set_flag(DELAYACCT_PF_SWAPIN);
+ delayacct_set_swapin();
page = lookup_swap_cache(entry);
if (!page) {
grab_swap_token(); /* Contend for token _before_ read-in */
@@ -2148,7 +2148,7 @@ static int do_swap_page(struct mm_struct
page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
if (likely(pte_same(*page_table, orig_pte)))
ret = VM_FAULT_OOM;
- delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+ delayacct_clear_swapin();
goto unlock;
}
@@ -2157,7 +2157,7 @@ static int do_swap_page(struct mm_struct
count_vm_event(PGMAJFAULT);
}
- delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+ delayacct_clear_swapin();
mark_page_accessed(page);
lock_page(page);
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: [PATCH] Optimize struct task_delay_info
2007-07-11 7:13 [PATCH] Optimize struct task_delay_info Zhang, Yanmin
@ 2007-07-11 11:46 ` Balbir Singh
2007-07-18 4:30 ` Zhang, Yanmin
2007-07-11 12:27 ` Andi Kleen
2007-07-23 5:14 ` Balbir Singh
2 siblings, 1 reply; 9+ messages in thread
From: Balbir Singh @ 2007-07-11 11:46 UTC (permalink / raw)
To: Zhang, Yanmin; +Cc: Shailabh Nagar, LKML
Zhang, Yanmin wrote:
> struct task_delay_info is used by per process block I/O delay statistics
> feature which is useful in kernel. This struct is not optimized.
>
> My patch against kernel 2.6.22 shrinks it a half.
>
> 1) Delete blkio_start and blkio_end. As the collection happens in
> io_schedule and io_schedule_timeout, we use local variables to
> replace them;
> 2) Delete lock. The change to the protected data has no nested cases.
> In addition, the result is for performance data collection, so it’s
> unnecessary to add such lock.
> 3) Delete flags. It just has one value. Use the most significant bit of
> blkio_delay (64 bits) to mark it..
>
>
> Signed-off-by: Zhang Yanmin <yanmin.zhang@intel.com>
Hi, Yanmin,
Did you see any particular performance issues with the delay accounting
patches? Is the patch tested; could you please provide test results?
Meanwhile, I'll review these patches and I am correcting Shailabh's id
to his new email id.
--
Warm Regards,
Balbir Singh
Linux Technology Center
IBM, ISTL
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] Optimize struct task_delay_info
2007-07-11 11:46 ` Balbir Singh
@ 2007-07-18 4:30 ` Zhang, Yanmin
2007-07-23 1:08 ` Zhang, Yanmin
0 siblings, 1 reply; 9+ messages in thread
From: Zhang, Yanmin @ 2007-07-18 4:30 UTC (permalink / raw)
To: balbir; +Cc: Shailabh Nagar, LKML
On Wed, 2007-07-11 at 17:16 +0530, Balbir Singh wrote:
> Zhang, Yanmin wrote:
> > struct task_delay_info is used by per process block I/O delay statistics
> > feature which is useful in kernel. This struct is not optimized.
> >
> > My patch against kernel 2.6.22 shrinks it a half.
> >
> > 1) Delete blkio_start and blkio_end. As the collection happens in
> > io_schedule and io_schedule_timeout, we use local variables to
> > replace them;
> > 2) Delete lock. The change to the protected data has no nested cases.
> > In addition, the result is for performance data collection, so it’s
> > unnecessary to add such lock.
> > 3) Delete flags. It just has one value. Use the most significant bit of
> > blkio_delay (64 bits) to mark it..
> >
> >
> > Signed-off-by: Zhang Yanmin <yanmin.zhang@intel.com>
>
> Hi, Yanmin,
>
> Did you see any particular performance issues with the delay accounting
> patches? Is the patch tested; could you please provide test results?
It's hard to find an appropriate benchmark to test it. Anyway, I used sysbench
to test it on my x86_64 machine. My machine has 16 logical cpu, dual-core+hyperThread.
memory is 8GB and disk is one SATA.
I tested both sequence and rand.
1) seq read/write:use command line:
echo "3">/proc/sys/vm/drop_caches; sysbench --test=fileio --file-test-mode=seqrewr
--num-threads=32 --file-total-size=1500M --max-requests=150000 --max-time=3000 run;
Run the command for 20 times and get average result:
Without patch: 49.7511Mb/sec
With patch: 51.6557Mb/sec
Improvement: 3.8%
2) Rand read/write:use command line:
echo "3">/proc/sys/vm/drop_caches; sysbench --test=fileio --file-test-mode=rndrw
--num-threads=32 --file-total-size=800M --max-requests=15000 --max-time=3000 run;
Run the command for 10 times and get average result:
Without patch: 7.25657Mb/sec
With patch: 7.35052Mb/sec
Improvement: 1.3%
I didn't use application to read the delay accounting info. If I did, I guess the improvement
is better.
>
> Meanwhile, I'll review these patches and I am correcting Shailabh's id
> to his new email id.
Thanks.
Yanmin
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] Optimize struct task_delay_info
2007-07-18 4:30 ` Zhang, Yanmin
@ 2007-07-23 1:08 ` Zhang, Yanmin
0 siblings, 0 replies; 9+ messages in thread
From: Zhang, Yanmin @ 2007-07-23 1:08 UTC (permalink / raw)
To: balbir; +Cc: Shailabh Nagar, LKML
On Wed, 2007-07-18 at 12:30 +0800, Zhang, Yanmin wrote:
> On Wed, 2007-07-11 at 17:16 +0530, Balbir Singh wrote:
> > Zhang, Yanmin wrote:
> > > struct task_delay_info is used by per process block I/O delay statistics
> > > feature which is useful in kernel. This struct is not optimized.
> > >
> > > My patch against kernel 2.6.22 shrinks it a half.
> > >
> > > 1) Delete blkio_start and blkio_end. As the collection happens in
> > > io_schedule and io_schedule_timeout, we use local variables to
> > > replace them;
> > > 2) Delete lock. The change to the protected data has no nested cases.
> > > In addition, the result is for performance data collection, so it’s
> > > unnecessary to add such lock.
> > > 3) Delete flags. It just has one value. Use the most significant bit of
> > > blkio_delay (64 bits) to mark it..
> > >
> > >
> > > Signed-off-by: Zhang Yanmin <yanmin.zhang@intel.com>
> >
> > Hi, Yanmin,
> >
> > Did you see any particular performance issues with the delay accounting
> > patches? Is the patch tested; could you please provide test results?
> It's hard to find an appropriate benchmark to test it. Anyway, I used sysbench
> to test it on my x86_64 machine. My machine has 16 logical cpu, dual-core+hyperThread.
> memory is 8GB and disk is one SATA.
>
> I tested both sequence and rand.
> 1) seq read/write:use command line:
> echo "3">/proc/sys/vm/drop_caches; sysbench --test=fileio --file-test-mode=seqrewr
> --num-threads=32 --file-total-size=1500M --max-requests=150000 --max-time=3000 run;
>
> Run the command for 20 times and get average result:
> Without patch: 49.7511Mb/sec
> With patch: 51.6557Mb/sec
> Improvement: 3.8%
>
>
> 2) Rand read/write:use command line:
> echo "3">/proc/sys/vm/drop_caches; sysbench --test=fileio --file-test-mode=rndrw
> --num-threads=32 --file-total-size=800M --max-requests=15000 --max-time=3000 run;
>
> Run the command for 10 times and get average result:
> Without patch: 7.25657Mb/sec
> With patch: 7.35052Mb/sec
> Improvement: 1.3%
>
>
> I didn't use application to read the delay accounting info. If I did, I guess the improvement
> is better.
>
> >
> > Meanwhile, I'll review these patches and I am correcting Shailabh's id
> > to his new email id.
Man,
What's your comment about the patch?
Thanks,
Yanmin
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] Optimize struct task_delay_info
2007-07-11 7:13 [PATCH] Optimize struct task_delay_info Zhang, Yanmin
2007-07-11 11:46 ` Balbir Singh
@ 2007-07-11 12:27 ` Andi Kleen
2007-07-12 8:37 ` Zhang, Yanmin
2007-07-23 5:14 ` Balbir Singh
2 siblings, 1 reply; 9+ messages in thread
From: Andi Kleen @ 2007-07-11 12:27 UTC (permalink / raw)
To: Zhang, Yanmin; +Cc: nagar, LKML
"Zhang, Yanmin" <yanmin_zhang@linux.intel.com> writes:
> replace them;
> 2) Delete lock. The change to the protected data has no nested cases.
> In addition, the result is for performance data collection, so it’s
> unnecessary to add such lock.
Not sure that's a good idea. People expect their performance counts
to be accurate too. You could possibly use atomics though, but
when there are multiple counters updated the spinlock will be likely
faster.
-Andi
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] Optimize struct task_delay_info
2007-07-11 12:27 ` Andi Kleen
@ 2007-07-12 8:37 ` Zhang, Yanmin
2007-07-12 18:21 ` Frank Ch. Eigler
0 siblings, 1 reply; 9+ messages in thread
From: Zhang, Yanmin @ 2007-07-12 8:37 UTC (permalink / raw)
To: Andi Kleen; +Cc: nagar, LKML
On Wed, 2007-07-11 at 14:27 +0200, Andi Kleen wrote:
> "Zhang, Yanmin" <yanmin_zhang@linux.intel.com> writes:
> > replace them;
> > 2) Delete lock. The change to the protected data has no nested cases.
> > In addition, the result is for performance data collection, so it’s
> > unnecessary to add such lock.
>
> Not sure that's a good idea. People expect their performance counts
> to be accurate too. You could possibly use atomics though, but
> when there are multiple counters updated the spinlock will be likely
> faster.
Accurancy here has 2 meanings.
1) From data update point of view. The data is correct because only the
process itself updates them and there is no nested update.
2) If the reader could get the correct data when the process updates the data. It
might be an issue. But the issue is not important. Mostly, the application tool
reads the data in an interval.
Yanmin
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] Optimize struct task_delay_info
2007-07-12 8:37 ` Zhang, Yanmin
@ 2007-07-12 18:21 ` Frank Ch. Eigler
2007-07-13 1:52 ` Zhang, Yanmin
0 siblings, 1 reply; 9+ messages in thread
From: Frank Ch. Eigler @ 2007-07-12 18:21 UTC (permalink / raw)
To: Zhang, Yanmin; +Cc: Andi Kleen, nagar, LKML
"Zhang, Yanmin" <yanmin_zhang@linux.intel.com> writes:
> [...]
> > > 2) Delete lock. [..]
> > > In addition, the result is for performance data collection, so it's
> > > unnecessary to add such lock.
> > Not sure that's a good idea. People expect their performance counts
> > to be accurate too. [...]
> [...]
> 2) If the reader could get the correct data when the process updates
> the data. It might be an issue. But the issue is not
> important. Mostly, the application tool reads the data in an
> interval.
Can you elaborate on that some more? Is it OK for the sample
monitoring program to return inconsistent data sometimes? Is there
reason to believe that this sample user-space program will remain the
sole consumer of this data? Might a seqlock be appropriate here?
- FChE
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] Optimize struct task_delay_info
2007-07-12 18:21 ` Frank Ch. Eigler
@ 2007-07-13 1:52 ` Zhang, Yanmin
0 siblings, 0 replies; 9+ messages in thread
From: Zhang, Yanmin @ 2007-07-13 1:52 UTC (permalink / raw)
To: Frank Ch. Eigler; +Cc: Andi Kleen, Shailabh Nagar, LKML, Balbir Singh
On Thu, 2007-07-12 at 14:21 -0400, Frank Ch. Eigler wrote:
> "Zhang, Yanmin" <yanmin_zhang@linux.intel.com> writes:
>
> > [...]
> > > > 2) Delete lock. [..]
> > > > In addition, the result is for performance data collection, so it's
> > > > unnecessary to add such lock.
> > > Not sure that's a good idea. People expect their performance counts
> > > to be accurate too. [...]
> > [...]
> > 2) If the reader could get the correct data when the process updates
> > the data. It might be an issue. But the issue is not
> > important. Mostly, the application tool reads the data in an
> > interval.
>
> Can you elaborate on that some more?
If there is no the lock, the blkio_delay/swapin_delay might have the latest
value but the count is 1 less than the correct value.
> Is it OK for the sample
> monitoring program to return inconsistent data sometimes?
That needs Shailabh Nagar's comments.
What I tried to say is the inconsistent issue is not severe and happened rarely.
The mostly-used scenario is a performance tool reads the data per time-slot in
a loop, so I think it wouldn't hurt the usage model.
> Is there
> reason to believe that this sample user-space program will remain the
> sole consumer of this data? Might a seqlock be appropriate here?
seqlock is helpful, but my start point is to save memory space without hurt to application.
As you know, every process (thread) has its own struct task_delay_info.
Yanmin
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] Optimize struct task_delay_info
2007-07-11 7:13 [PATCH] Optimize struct task_delay_info Zhang, Yanmin
2007-07-11 11:46 ` Balbir Singh
2007-07-11 12:27 ` Andi Kleen
@ 2007-07-23 5:14 ` Balbir Singh
2 siblings, 0 replies; 9+ messages in thread
From: Balbir Singh @ 2007-07-23 5:14 UTC (permalink / raw)
To: Zhang, Yanmin; +Cc: nagar, LKML
Zhang, Yanmin wrote:
> struct task_delay_info is used by per process block I/O delay statistics
> feature which is useful in kernel. This struct is not optimized.
>
> My patch against kernel 2.6.22 shrinks it a half.
>
> 1) Delete blkio_start and blkio_end. As the collection happens in
> io_schedule and io_schedule_timeout, we use local variables to
> replace them;
I am not sure if it's a good idea to push items on the stack.
Remember we are moving to 4K stacks.
> 2) Delete lock. The change to the protected data has no nested cases.
> In addition, the result is for performance data collection, so it’s
> unnecessary to add such lock.
This is a cause of concern, we cannot afford to have incorrect data
collected. Incorrect/unreliable data which is worthless.
> 3) Delete flags. It just has one value. Use the most significant bit of
> blkio_delay (64 bits) to mark it..
>
Yes, thats true right now, but I am not sure if we should go optimize
that so early. We could end up adding other accounting/extending the
framework, we'll need to add the flags back then.
> -static inline void delayacct_clear_flag(int flag)
> +static inline void delayacct_clear_swapin(void)
> {
> if (current->delays)
> - current->delays->flags &= ~flag;
> + current->delays->blkio_delay |= DELAYACCT_PF_SWAPIN;
BTW, you should be clearing the flag here.
Overall, the lock removal is not acceptable. I don't like the bit
hacking for flags and moving counters to the stack either.
--
Warm Regards,
Balbir Singh
Linux Technology Center
IBM, ISTL
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2007-07-23 5:14 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-07-11 7:13 [PATCH] Optimize struct task_delay_info Zhang, Yanmin
2007-07-11 11:46 ` Balbir Singh
2007-07-18 4:30 ` Zhang, Yanmin
2007-07-23 1:08 ` Zhang, Yanmin
2007-07-11 12:27 ` Andi Kleen
2007-07-12 8:37 ` Zhang, Yanmin
2007-07-12 18:21 ` Frank Ch. Eigler
2007-07-13 1:52 ` Zhang, Yanmin
2007-07-23 5:14 ` Balbir Singh
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox