* [RFC] blkstat
@ 2005-06-13 9:46 Nick Piggin
2005-06-13 9:54 ` Con Kolivas
2005-06-13 11:21 ` Andi Kleen
0 siblings, 2 replies; 6+ messages in thread
From: Nick Piggin @ 2005-06-13 9:46 UTC (permalink / raw)
To: linux-kernel
[-- Attachment #1: Type: text/plain, Size: 862 bytes --]
I have made a simple tool to measure idle and busy time for
block devices.
I have been wanting something like this for a while, because
the absolute throughput/seek numbers don't always help you
determine whether or not a workload is becoming IO bound.
It requires a small kernel patch, and I've also attached my
lame userspace program for it. It is kind of like vmstat.
Oh, and before I go further, does anyone know of any program
or statistic that allows the same functionality? Any comments?
** sample output **
npiggin@didi:~/blk$ ./blkstat hda
bdev bi % bo % io % id %
hda 0.0 0.0 0.0 100.0
hda 0.0 0.0 0.0 100.0
hda 1.2 0.0 1.2 98.8
hda 48.1 0.0 48.1 51.9
hda 53.6 0.0 53.6 46.4
hda 47.1 0.0 47.1 52.9
Nick
--
SUSE Labs, Novell Inc.
[-- Attachment #2: blk-start-time.patch --]
[-- Type: text/plain, Size: 2805 bytes --]
Index: linux-2.6/include/linux/blkdev.h
===================================================================
--- linux-2.6.orig/include/linux/blkdev.h 2005-06-10 21:11:21.000000000 +1000
+++ linux-2.6/include/linux/blkdev.h 2005-06-10 21:20:53.000000000 +1000
@@ -137,7 +137,6 @@ struct request {
int rq_status; /* should split this into a few status bits */
struct gendisk *rq_disk;
int errors;
- unsigned long start_time;
/* Number of scatter-gather DMA addr+len pairs after
* physical address coalescing is performed.
@@ -368,6 +367,11 @@ struct request_queue
struct kobject kobj;
/*
+ * Jiffies. Time the current request was started
+ */
+ unsigned long start_time;
+
+ /*
* queue settings
*/
unsigned long nr_requests; /* Max # of requests */
Index: linux-2.6/drivers/block/ll_rw_blk.c
===================================================================
--- linux-2.6.orig/drivers/block/ll_rw_blk.c 2005-06-10 21:10:26.000000000 +1000
+++ linux-2.6/drivers/block/ll_rw_blk.c 2005-06-11 16:23:38.000000000 +1000
@@ -1914,10 +1914,13 @@ static struct request *get_request(reque
}
get_rq:
+ if (rl->count[READ] + rl->count[WRITE] == 0)
+ q->start_time = jiffies;
rl->count[rw]++;
rl->starved[rw] = 0;
if (rl->count[rw] >= queue_congestion_on_threshold(q))
set_queue_congested(q, rw);
+
spin_unlock_irq(q->queue_lock);
rq = blk_alloc_request(q, rw, gfp_mask);
@@ -2487,15 +2490,6 @@ static int attempt_merge(request_queue_t
if (!q->merge_requests_fn(q, req, next))
return 0;
- /*
- * At this point we have either done a back merge
- * or front merge. We need the smaller start_time of
- * the merged requests to be the current request
- * for accounting purposes.
- */
- if (time_after(req->start_time, next->start_time))
- req->start_time = next->start_time;
-
req->biotail->bi_next = next->bio;
req->biotail = next->biotail;
@@ -2703,7 +2697,6 @@ get_rq:
req->waiting = NULL;
req->bio = req->biotail = bio;
req->rq_disk = bio->bi_bdev->bd_disk;
- req->start_time = jiffies;
add_request(q, req);
out:
@@ -3195,13 +3188,16 @@ EXPORT_SYMBOL(end_that_request_chunk);
*/
void end_that_request_last(struct request *req)
{
+ request_queue_t *q = req->q;
+ unsigned long now = jiffies;
struct gendisk *disk = req->rq_disk;
if (unlikely(laptop_mode) && blk_fs_request(req))
laptop_io_completion();
if (disk && blk_fs_request(req)) {
- unsigned long duration = jiffies - req->start_time;
+ unsigned long duration = now - q->start_time;
+
switch (rq_data_dir(req)) {
case WRITE:
__disk_stat_inc(disk, writes);
@@ -3215,6 +3211,8 @@ void end_that_request_last(struct reques
disk_round_stats(disk);
disk->in_flight--;
}
+ q->start_time = now;
+
if (req->end_io)
req->end_io(req);
else
[-- Attachment #3: blkstat.c --]
[-- Type: text/x-csrc, Size: 2329 bytes --]
#include <sys/types.h>
#include <sys/time.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <time.h>
#define PATH_MAX 4096
int main(int argc, char *argv[])
{
unsigned int iter;
unsigned long o_read_ms, o_write_ms, o_time_ms;
char path[PATH_MAX];
int fd;
if (argc != 2) {
printf("Usage: %s <bdev>\n", argv[0]);
exit(0);
}
if (!snprintf(path, PATH_MAX, "/sys/block/%s/stat", argv[1]))
fprintf(stderr, "Could not build path\n"), exit(1);
iter = 0;
for (;;) {
unsigned long d_read_ms, d_write_ms, d_time_ms;
unsigned long read_ms, write_ms, time_ms;
struct timeval tv;
int ret;
char *token;
char mem[1024];
fd = open(path, O_RDONLY);
if (fd == -1)
perror("open"), exit(1);
if (gettimeofday(&tv, NULL) == -1)
perror("gettimeofday"), exit(1);
time_ms = tv.tv_sec * 1000 + tv.tv_usec / 1000;
d_time_ms = time_ms - o_time_ms;
do {
ret = read(fd, mem, 1024);
} while (ret == -1 && errno == EINTR);
if (ret == -1)
perror("read"), exit(1);
if (close(fd) == -1)
perror("close"), exit(1);
strtok(mem, " ");
strtok(NULL, " ");
strtok(NULL, " ");
token = strtok(NULL, " ");
if (!token)
fprintf(stderr, "strtok failed\n"), exit(1);
errno = 0;
read_ms = strtoul(token, NULL, 10);
if (errno == ERANGE)
fprintf(stderr, "strtoul failed\n"), exit(1);
strtok(NULL, " ");
strtok(NULL, " ");
strtok(NULL, " ");
token = strtok(NULL, " ");
if (!token)
fprintf(stderr, "strtok failed\n"), exit(1);
errno = 0;
write_ms = strtoul(token, NULL, 10);
if (errno == ERANGE)
fprintf(stderr, "strtoul failed\n"), exit(1);
d_read_ms = read_ms - o_read_ms;
d_write_ms = write_ms - o_write_ms;
if (d_read_ms + d_write_ms > d_time_ms)
d_time_ms = d_read_ms + d_write_ms;
if (iter % 20 == 0)
printf("bdev\t bi %%\t bo %%\t io %%\t id %%\n");
if (iter && d_time_ms) {
printf("%s\t%6.1f\t%6.1f\t%6.1f\t%6.1f\n",
argv[1],
100.0f * d_read_ms / d_time_ms,
100.0f * d_write_ms / d_time_ms,
100.0f * (d_read_ms + d_write_ms) / d_time_ms,
100.0f - 100.0f * (d_read_ms | d_write_ms) / d_time_ms);
}
o_read_ms = read_ms;
o_write_ms = write_ms;
o_time_ms = time_ms;
sleep(1);
iter++;
}
return 0;
}
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [RFC] blkstat
2005-06-13 9:46 [RFC] blkstat Nick Piggin
@ 2005-06-13 9:54 ` Con Kolivas
2005-06-13 10:02 ` Nick Piggin
2005-06-13 11:21 ` Andi Kleen
1 sibling, 1 reply; 6+ messages in thread
From: Con Kolivas @ 2005-06-13 9:54 UTC (permalink / raw)
To: linux-kernel; +Cc: Nick Piggin
[-- Attachment #1: Type: text/plain, Size: 652 bytes --]
On Mon, 13 Jun 2005 19:46, Nick Piggin wrote:
> I have made a simple tool to measure idle and busy time for
> block devices.
>
> I have been wanting something like this for a while, because
> the absolute throughput/seek numbers don't always help you
> determine whether or not a workload is becoming IO bound.
>
> It requires a small kernel patch, and I've also attached my
> lame userspace program for it. It is kind of like vmstat.
>
> Oh, and before I go further, does anyone know of any program
> or statistic that allows the same functionality? Any comments?
Would something like iostat give similar results?
http://linux.inet.hr/
Cheers,
Con
[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RFC] blkstat
2005-06-13 9:54 ` Con Kolivas
@ 2005-06-13 10:02 ` Nick Piggin
2005-06-13 10:05 ` Con Kolivas
0 siblings, 1 reply; 6+ messages in thread
From: Nick Piggin @ 2005-06-13 10:02 UTC (permalink / raw)
To: Con Kolivas; +Cc: linux-kernel
Con Kolivas wrote:
> On Mon, 13 Jun 2005 19:46, Nick Piggin wrote:
>>Oh, and before I go further, does anyone know of any program
>>or statistic that allows the same functionality? Any comments?
>
>
> Would something like iostat give similar results?
>
The problem with that is that it does not give you a % idle
figure on the block device, so you basically can't see if
the device is becoming a bottleneck.
You can kind of guess if you take into account the seeks,
and the throughput, but you're still missing things like
head position (eg. changes throughput), settle time and
rotational latency, and lots of other stuff.
Thanks,
Nick
Also, BTW. the way I have done the kernel patch make a
device show 100% utilisation even if it is not doing anything
but waiting for a plug, or an anticipatory scheduler. This
is basically all the end user wants to know, although for
development purposes it may be interesting to know the other
metric too.
--
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RFC] blkstat
2005-06-13 10:02 ` Nick Piggin
@ 2005-06-13 10:05 ` Con Kolivas
2005-06-13 10:19 ` Nick Piggin
0 siblings, 1 reply; 6+ messages in thread
From: Con Kolivas @ 2005-06-13 10:05 UTC (permalink / raw)
To: Nick Piggin; +Cc: linux-kernel
[-- Attachment #1: Type: text/plain, Size: 1240 bytes --]
On Mon, 13 Jun 2005 20:02, Nick Piggin wrote:
> Con Kolivas wrote:
> > On Mon, 13 Jun 2005 19:46, Nick Piggin wrote:
> >>Oh, and before I go further, does anyone know of any program
> >>or statistic that allows the same functionality? Any comments?
> >
> > Would something like iostat give similar results?
>
> The problem with that is that it does not give you a % idle
> figure on the block device, so you basically can't see if
> the device is becoming a bottleneck.
I've often wondered how iostat gives a %busy figure and whether this
translated accurately without further info from the kernel.
> You can kind of guess if you take into account the seeks,
> and the throughput, but you're still missing things like
> head position (eg. changes throughput), settle time and
> rotational latency, and lots of other stuff.
>
> Thanks,
> Nick
>
> Also, BTW. the way I have done the kernel patch make a
> device show 100% utilisation even if it is not doing anything
> but waiting for a plug, or an anticipatory scheduler. This
> is basically all the end user wants to know, although for
> development purposes it may be interesting to know the other
> metric too.
That sounds quite useful.
Cheers,
Con
[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RFC] blkstat
2005-06-13 10:05 ` Con Kolivas
@ 2005-06-13 10:19 ` Nick Piggin
0 siblings, 0 replies; 6+ messages in thread
From: Nick Piggin @ 2005-06-13 10:19 UTC (permalink / raw)
To: Con Kolivas; +Cc: linux-kernel
Con Kolivas wrote:
> On Mon, 13 Jun 2005 20:02, Nick Piggin wrote:
>
>>The problem with that is that it does not give you a % idle
>>figure on the block device, so you basically can't see if
>>the device is becoming a bottleneck.
>
>
> I've often wondered how iostat gives a %busy figure and whether this
> translated accurately without further info from the kernel.
>
>
Oh, it does havea "%util" in its extended stats. Sorry I missed
that so you might be right.
Hmm, so I guess it uses io_ticks and that does appear to give
the valid measure. I'll have a think about that - perhaps there
is still a place for the split read/write statistics I'm using...
Thanks,
Nick
--
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RFC] blkstat
2005-06-13 9:46 [RFC] blkstat Nick Piggin
2005-06-13 9:54 ` Con Kolivas
@ 2005-06-13 11:21 ` Andi Kleen
1 sibling, 0 replies; 6+ messages in thread
From: Andi Kleen @ 2005-06-13 11:21 UTC (permalink / raw)
To: Nick Piggin; +Cc: linux-kernel
Nick Piggin <nickpiggin@yahoo.com.au> writes:
> I have made a simple tool to measure idle and busy time for
> block devices.
It would be better to put the new statistic into iostat/sard
instead of requiring new tools. Then eventually it would all
end up in distributions too and could be easily used everywhere.
-Andi
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2005-06-13 11:21 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-06-13 9:46 [RFC] blkstat Nick Piggin
2005-06-13 9:54 ` Con Kolivas
2005-06-13 10:02 ` Nick Piggin
2005-06-13 10:05 ` Con Kolivas
2005-06-13 10:19 ` Nick Piggin
2005-06-13 11:21 ` Andi Kleen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox