* [RFC][PATCH] per-task I/O throttling
@ 2008-01-10 22:45 Andrea Righi
2008-01-11 1:50 ` Bill Davidsen
` (2 more replies)
0 siblings, 3 replies; 24+ messages in thread
From: Andrea Righi @ 2008-01-10 22:45 UTC (permalink / raw)
To: LKML; +Cc: Jens Axboe
Allow to limit the bandwidth of I/O-intensive processes, like backup
tools running in background, large files copy, checksums on huge files,
etc.
This kind of processes can noticeably impact the system responsiveness
for some time and playing with tasks' priority is not always an
acceptable solution.
This patch allows to specify a maximum I/O rate in sectors per second
for each single process via /proc/<PID>/io_throttle (default is zero,
that specify no limit).
Signed-off-by: Andrea Righi <a.righi@cineca.it>
---
diff -urpN linux-2.6.24-rc7/block/ll_rw_blk.c linux-2.6.24-rc7-task-io-throttle/block/ll_rw_blk.c
--- linux-2.6.24-rc7/block/ll_rw_blk.c 2008-01-06 22:45:38.000000000 +0100
+++ linux-2.6.24-rc7-task-io-throttle/block/ll_rw_blk.c 2008-01-10 23:23:41.000000000 +0100
@@ -31,6 +31,7 @@
#include <linux/blktrace_api.h>
#include <linux/fault-inject.h>
#include <linux/scatterlist.h>
+#include <linux/jiffies.h>
/*
* for max sense size
@@ -3184,6 +3185,41 @@ static inline int bio_check_eod(struct b
return 0;
}
+#ifdef CONFIG_TASK_IO_THROTTLE
+static inline void task_io_throttle(int nr_sectors)
+{
+ unsigned long delta;
+ long sleep;
+
+ if (!current->io_throttle) {
+ return;
+ }
+
+ if (!current->io_throttle_timestamp) {
+ current->io_throttle_timestamp = jiffies;
+ }
+ delta = jiffies_to_msecs((long)jiffies -
+ (long)(current->io_throttle_timestamp)) * 1000;
+
+ current->io_throttle_req += nr_sectors;
+
+ sleep = current->io_throttle_req -
+ current->io_throttle * max(delta, (unsigned long)1);
+ if (sleep > 0) {
+ schedule_timeout_uninterruptible(sleep);
+ }
+
+ if (delta) {
+ current->io_throttle_timestamp = jiffies;
+ current->io_throttle_req = 0;
+ }
+}
+#else
+static inline void task_io_throttle(int nr_sectors)
+{
+}
+#endif /* CONFIG_TASK_IO_THROTTLE */
+
/**
* generic_make_request: hand a buffer to its device driver for I/O
* @bio: The bio describing the location in memory and on the device.
@@ -3221,6 +3257,8 @@ static inline void __generic_make_reques
if (bio_check_eod(bio, nr_sectors))
goto end_io;
+ task_io_throttle(nr_sectors);
+
/*
* Resolve the mapping until finished. (drivers are
* still free to implement/resolve their own stacking
diff -urpN linux-2.6.24-rc7/fs/proc/base.c linux-2.6.24-rc7-task-io-throttle/fs/proc/base.c
--- linux-2.6.24-rc7/fs/proc/base.c 2008-01-06 22:45:38.000000000 +0100
+++ linux-2.6.24-rc7-task-io-throttle/fs/proc/base.c 2008-01-10 23:24:43.000000000 +0100
@@ -864,6 +864,56 @@ static const struct file_operations proc
.write = oom_adjust_write,
};
+#ifdef CONFIG_TASK_IO_THROTTLE
+static ssize_t io_throttle_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+ char buffer[PROC_NUMBUF];
+ size_t len;
+ unsigned long io_throttle;
+
+ if (!task)
+ return -ESRCH;
+ io_throttle = task->io_throttle;
+ put_task_struct(task);
+
+ len = snprintf(buffer, sizeof(buffer), "%lu\n", io_throttle);
+
+ return simple_read_from_buffer(buf, count, ppos, buffer, len);
+}
+
+static ssize_t io_throttle_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct task_struct *task;
+ char buffer[PROC_NUMBUF], *end;
+ unsigned long io_throttle;
+
+ memset(buffer, 0, sizeof(buffer));
+ if (count > sizeof(buffer) - 1)
+ count = sizeof(buffer) - 1;
+ if (copy_from_user(buffer, buf, count))
+ return -EFAULT;
+ io_throttle = simple_strtoul(buffer, &end, 0);
+ if (*end == '\n')
+ end++;
+ task = get_proc_task(file->f_path.dentry->d_inode);
+ if (!task)
+ return -ESRCH;
+ task->io_throttle = io_throttle;
+ put_task_struct(task);
+ if (end - buffer == 0)
+ return -EIO;
+ return end - buffer;
+}
+
+static const struct file_operations proc_io_throttle_operations = {
+ .read = io_throttle_read,
+ .write = io_throttle_write,
+};
+#endif /* CONFIG_TASK_IO_THROTTLE */
+
#ifdef CONFIG_MMU
static ssize_t clear_refs_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
@@ -2250,6 +2300,9 @@ static const struct pid_entry tgid_base_
#ifdef CONFIG_TASK_IO_ACCOUNTING
INF("io", S_IRUGO, pid_io_accounting),
#endif
+#ifdef CONFIG_TASK_IO_THROTTLE
+ REG("io_throttle", S_IRUGO|S_IWUSR, io_throttle),
+#endif
};
static int proc_tgid_base_readdir(struct file * filp,
diff -urpN linux-2.6.24-rc7/include/linux/sched.h linux-2.6.24-rc7-task-io-throttle/include/linux/sched.h
--- linux-2.6.24-rc7/include/linux/sched.h 2008-01-06 22:45:38.000000000 +0100
+++ linux-2.6.24-rc7-task-io-throttle/include/linux/sched.h 2008-01-10 23:23:41.000000000 +0100
@@ -1167,6 +1167,15 @@ struct task_struct {
atomic_t fs_excl; /* holding fs exclusive resources */
struct rcu_head rcu;
+#ifdef CONFIG_TASK_IO_THROTTLE
+ /*
+ * per-process I/O throttle
+ */
+ unsigned long io_throttle;
+ unsigned long io_throttle_req;
+ unsigned long io_throttle_timestamp;
+#endif
+
/*
* cache last used pipe for splice
*/
diff -urpN linux-2.6.24-rc7/init/Kconfig linux-2.6.24-rc7-task-io-throttle/init/Kconfig
--- linux-2.6.24-rc7/init/Kconfig 2008-01-06 22:45:38.000000000 +0100
+++ linux-2.6.24-rc7-task-io-throttle/init/Kconfig 2008-01-10 23:23:41.000000000 +0100
@@ -206,6 +206,14 @@ config TASK_IO_ACCOUNTING
Say N if unsure.
+config TASK_IO_THROTTLE
+ bool "Enable per-task I/O throttling (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ help
+ Allow to limit the maximum I/O rate for specific process(es).
+
+ Say N if unsure.
+
config USER_NS
bool "User Namespaces (EXPERIMENTAL)"
default n
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [RFC][PATCH] per-task I/O throttling 2008-01-10 22:45 [RFC][PATCH] per-task I/O throttling Andrea Righi @ 2008-01-11 1:50 ` Bill Davidsen 2008-01-11 10:28 ` Andrea Righi 2008-01-11 14:05 ` David Newall 2008-01-11 15:59 ` Balbir Singh 2 siblings, 1 reply; 24+ messages in thread From: Bill Davidsen @ 2008-01-11 1:50 UTC (permalink / raw) To: righiandr; +Cc: LKML, Jens Axboe Andrea Righi wrote: > Allow to limit the bandwidth of I/O-intensive processes, like backup > tools running in background, large files copy, checksums on huge files, > etc. > > This kind of processes can noticeably impact the system responsiveness > for some time and playing with tasks' priority is not always an > acceptable solution. > > This patch allows to specify a maximum I/O rate in sectors per second > for each single process via /proc/<PID>/io_throttle (default is zero, > that specify no limit). > It would seem to me that this would be vastly more useful in the real world if there were a settable default, so that administrators could avoid having to find and tune individual user processes. And it would seem far less common that the admin would want to set the limit *up* for a given process, and it's likely to be one known to the admin, at least by name. Of course if you want to do the effort to make it fully tunable, it could have a default by UID or GID. Usful on machines shared by students or managers. -- Bill Davidsen <davidsen@tmr.com> "We have more to fear from the bungling of the incompetent than from the machinations of the wicked." - from Slashdot ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH] per-task I/O throttling 2008-01-11 1:50 ` Bill Davidsen @ 2008-01-11 10:28 ` Andrea Righi 2008-01-11 14:20 ` Peter Zijlstra 0 siblings, 1 reply; 24+ messages in thread From: Andrea Righi @ 2008-01-11 10:28 UTC (permalink / raw) To: Bill Davidsen; +Cc: LKML, Jens Axboe Bill Davidsen wrote: > Andrea Righi wrote: >> Allow to limit the bandwidth of I/O-intensive processes, like backup >> tools running in background, large files copy, checksums on huge files, >> etc. >> >> This kind of processes can noticeably impact the system responsiveness >> for some time and playing with tasks' priority is not always an >> acceptable solution. >> >> This patch allows to specify a maximum I/O rate in sectors per second >> for each single process via /proc/<PID>/io_throttle (default is zero, >> that specify no limit). >> > It would seem to me that this would be vastly more useful in the real > world if there were a settable default, so that administrators could > avoid having to find and tune individual user processes. And it would > seem far less common that the admin would want to set the limit *up* for > a given process, and it's likely to be one known to the admin, at least > by name. > > Of course if you want to do the effort to make it fully tunable, it > could have a default by UID or GID. Usful on machines shared by students > or managers. At the moment I'm simply using it to backup my PC by this wrapper: $ cat iothrottle #!/bin/sh [ $# -lt 2 ] && echo "usage: $0 RATE CMD" && exit 1 rate=$1 shift $* & trap "kill -9 $!" SIGINT SIGTERM [ -e /proc/$!/io_throttle ] && echo $rate >/proc/$!/io_throttle wait %1 $ ./iothrottle 100 tar ... But I totally agree with you that setting the limits per-UID/per-GID, instead of per-task, would be actually more useful. Maybe a nice approach would be to define the UID/GID upper bounds via configfs (for example) and allow the users to tune the max I/O rate of their single tasks according to the defined ranges. In this way it could be even possible to define I/O shaping policies, i.e. give a bandwidth of 10MB/s to user A, 20MB/s to user B, 30MB/s to group X, etc. Anyway, I'm wondering if it's possible (and how) to already do this with process containers... -Andrea ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH] per-task I/O throttling 2008-01-11 10:28 ` Andrea Righi @ 2008-01-11 14:20 ` Peter Zijlstra 2008-01-11 15:29 ` Andrea Righi 0 siblings, 1 reply; 24+ messages in thread From: Peter Zijlstra @ 2008-01-11 14:20 UTC (permalink / raw) To: righiandr; +Cc: Bill Davidsen, LKML, Jens Axboe On Fri, 2008-01-11 at 11:28 +0100, Andrea Righi wrote: > Bill Davidsen wrote: > > Andrea Righi wrote: > >> Allow to limit the bandwidth of I/O-intensive processes, like backup > >> tools running in background, large files copy, checksums on huge files, > >> etc. > >> > >> This kind of processes can noticeably impact the system responsiveness > >> for some time and playing with tasks' priority is not always an > >> acceptable solution. > >> > >> This patch allows to specify a maximum I/O rate in sectors per second > >> for each single process via /proc/<PID>/io_throttle (default is zero, > >> that specify no limit). > >> > > It would seem to me that this would be vastly more useful in the real > > world if there were a settable default, so that administrators could > > avoid having to find and tune individual user processes. And it would > > seem far less common that the admin would want to set the limit *up* for > > a given process, and it's likely to be one known to the admin, at least > > by name. > > > > Of course if you want to do the effort to make it fully tunable, it > > could have a default by UID or GID. Usful on machines shared by students > > or managers. > > At the moment I'm simply using it to backup my PC by this wrapper: > > $ cat iothrottle > #!/bin/sh > [ $# -lt 2 ] && echo "usage: $0 RATE CMD" && exit 1 > rate=$1 > shift > $* & > trap "kill -9 $!" SIGINT SIGTERM > [ -e /proc/$!/io_throttle ] && echo $rate >/proc/$!/io_throttle > wait %1 > $ ./iothrottle 100 tar ... > > But I totally agree with you that setting the limits per-UID/per-GID, > instead of per-task, would be actually more useful. > > Maybe a nice approach would be to define the UID/GID upper bounds via > configfs (for example) and allow the users to tune the max I/O rate of > their single tasks according to the defined ranges. In this way it could > be even possible to define I/O shaping policies, i.e. give a bandwidth > of 10MB/s to user A, 20MB/s to user B, 30MB/s to group X, etc. > > Anyway, I'm wondering if it's possible (and how) to already do this with > process containers... I think there is an IO controller somewhere based on CFQ. I don't like this patch, because it throttles requests/s, and that doesn't say much. If a task would generate a very seeky load it could still tie up the disk even with a relatively low setting. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH] per-task I/O throttling 2008-01-11 14:20 ` Peter Zijlstra @ 2008-01-11 15:29 ` Andrea Righi 0 siblings, 0 replies; 24+ messages in thread From: Andrea Righi @ 2008-01-11 15:29 UTC (permalink / raw) To: Peter Zijlstra; +Cc: Bill Davidsen, LKML, Jens Axboe Peter Zijlstra wrote: >> >> Anyway, I'm wondering if it's possible (and how) to already do this with >> process containers... > > I think there is an IO controller somewhere based on CFQ. > > I don't like this patch, because it throttles requests/s, and that > doesn't say much. If a task would generate a very seeky load it could > still tie up the disk even with a relatively low setting. > Very true. A seeky intensive process wouldn't be limited at all. And I'm sure there're better ways/models to satisfy my needs. A suggestion (off-list) has been to try with ionice that seems to be the right solution to limit the I/O activity of single processes, but it doens't allow to define policies based on UIDs or GIDs. BTW I don't have any number to compare the effectiveness of the priority approach vs the throttling approach. Here is a very quick test made on my PC (not sure if glxgears is the right benchmark to evaluate the system responsiveness): >>>>>> starting: glxgears <<<<<< 3564 frames in 5.0 seconds = 711.722 FPS 3953 frames in 5.0 seconds = 790.598 FPS 3969 frames in 5.0 seconds = 793.794 FPS >>>>>> starting: md5sum /usr/lib/* <<<<<< 3769 frames in 5.0 seconds = 753.189 FPS 2877 frames in 5.0 seconds = 572.843 FPS 3481 frames in 5.0 seconds = 696.071 FPS 3775 frames in 5.0 seconds = 751.404 FPS 2781 frames in 5.0 seconds = 556.118 FPS 3209 frames in 5.0 seconds = 641.064 FPS 2843 frames in 5.0 seconds = 565.697 FPS >>>>>> starting: echo 100 > /proc/`pidof md5sum`/io_throttle <<<<<< 3652 frames in 5.0 seconds = 730.253 FPS 3669 frames in 5.0 seconds = 733.734 FPS 3797 frames in 5.0 seconds = 759.234 FPS 3883 frames in 5.0 seconds = 776.488 FPS 3895 frames in 5.0 seconds = 778.868 FPS 3845 frames in 5.0 seconds = 768.968 FPS 3829 frames in 5.0 seconds = 765.793 FPS >>>>>> flush caches (/proc/sys/vm/drop_caches) <<<<<< >>>>>> starting: glxgears <<<<<< 3763 frames in 5.0 seconds = 752.539 FPS 3818 frames in 5.0 seconds = 763.483 FPS >>>>>> starting: ionice -c3 md5sum /usr/lib/* <<<<<< 3443 frames in 5.0 seconds = 688.597 FPS 3202 frames in 5.0 seconds = 640.390 FPS 3807 frames in 5.0 seconds = 761.391 FPS 3053 frames in 5.0 seconds = 610.539 FPS 2759 frames in 5.0 seconds = 551.790 FPS 2975 frames in 5.0 seconds = 594.873 FPS 2993 frames in 5.0 seconds = 596.709 FPS 3250 frames in 5.0 seconds = 649.857 FPS 3494 frames in 5.0 seconds = 698.688 FPS -Andrea ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH] per-task I/O throttling 2008-01-10 22:45 [RFC][PATCH] per-task I/O throttling Andrea Righi 2008-01-11 1:50 ` Bill Davidsen @ 2008-01-11 14:05 ` David Newall 2008-01-11 15:44 ` Andrea Righi 2008-01-11 15:59 ` Balbir Singh 2 siblings, 1 reply; 24+ messages in thread From: David Newall @ 2008-01-11 14:05 UTC (permalink / raw) To: righiandr; +Cc: LKML, Jens Axboe Andrea Righi wrote: > [I/O-intensive] processes can noticeably impact the system responsiveness > for some time and playing with tasks' priority is not always an > acceptable solution. > Why? ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH] per-task I/O throttling 2008-01-11 14:05 ` David Newall @ 2008-01-11 15:44 ` Andrea Righi 2008-01-16 19:21 ` David Newall 0 siblings, 1 reply; 24+ messages in thread From: Andrea Righi @ 2008-01-11 15:44 UTC (permalink / raw) To: David Newall; +Cc: LKML, Jens Axboe David Newall wrote: > Andrea Righi wrote: >> [I/O-intensive] processes can noticeably impact the system responsiveness >> for some time and playing with tasks' priority is not always an >> acceptable solution. >> > > Why? > Well, I mean, we can't use 'nice' to grant less priority for the I/O intensive app, because the I/O intensive app itself doesn't need a lot of CPU. Instead, the I/O-bound app eats all the available I/O bandwidth, that's a different issue. -Andrea ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH] per-task I/O throttling 2008-01-11 15:44 ` Andrea Righi @ 2008-01-16 19:21 ` David Newall 0 siblings, 0 replies; 24+ messages in thread From: David Newall @ 2008-01-16 19:21 UTC (permalink / raw) To: righiandr; +Cc: David Newall, LKML, Jens Axboe Andrea Righi wrote: > David Newall wrote: > >> Andrea Righi wrote: >> >>> [I/O-intensive] processes can noticeably impact the system responsiveness >>> for some time and playing with tasks' priority is not always an >>> acceptable solution. >>> >>> >> Why? >> >> > > Well, I mean, we can't use 'nice' to grant less priority for the I/O > intensive app, because the I/O intensive app itself doesn't need a lot > of CPU. Instead, the I/O-bound app eats all the available I/O bandwidth, > that's a different issue. That's what I was thinking. Your original, "not always an acceptable solution," made me wonder if you were referring to something obscure. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH] per-task I/O throttling 2008-01-10 22:45 [RFC][PATCH] per-task I/O throttling Andrea Righi 2008-01-11 1:50 ` Bill Davidsen 2008-01-11 14:05 ` David Newall @ 2008-01-11 15:59 ` Balbir Singh 2008-01-11 16:32 ` Andrea Righi 2 siblings, 1 reply; 24+ messages in thread From: Balbir Singh @ 2008-01-11 15:59 UTC (permalink / raw) To: righiandr; +Cc: LKML, Jens Axboe On Jan 11, 2008 4:15 AM, Andrea Righi <righiandr@users.sourceforge.net> wrote: > Allow to limit the bandwidth of I/O-intensive processes, like backup > tools running in background, large files copy, checksums on huge files, > etc. > > This kind of processes can noticeably impact the system responsiveness > for some time and playing with tasks' priority is not always an > acceptable solution. > > This patch allows to specify a maximum I/O rate in sectors per second > for each single process via /proc/<PID>/io_throttle (default is zero, > that specify no limit). > > Signed-off-by: Andrea Righi <a.righi@cineca.it> Hi, Andrea, We have been thinking of doing control group based I/O control. I have not reviewed your patch in detail. I can suggest looking at openvz's IO controller. I/O bandwidth control is definitely interesting. How did you test your solution? Balbir ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH] per-task I/O throttling 2008-01-11 15:59 ` Balbir Singh @ 2008-01-11 16:32 ` Andrea Righi 2008-01-12 4:57 ` Valdis.Kletnieks 0 siblings, 1 reply; 24+ messages in thread From: Andrea Righi @ 2008-01-11 16:32 UTC (permalink / raw) To: Balbir Singh; +Cc: LKML, Jens Axboe Balbir Singh wrote: > On Jan 11, 2008 4:15 AM, Andrea Righi <righiandr@users.sourceforge.net> wrote: >> Allow to limit the bandwidth of I/O-intensive processes, like backup >> tools running in background, large files copy, checksums on huge files, >> etc. >> >> This kind of processes can noticeably impact the system responsiveness >> for some time and playing with tasks' priority is not always an >> acceptable solution. >> >> This patch allows to specify a maximum I/O rate in sectors per second >> for each single process via /proc/<PID>/io_throttle (default is zero, >> that specify no limit). >> >> Signed-off-by: Andrea Righi <a.righi@cineca.it> > > Hi, Andrea, > > We have been thinking of doing control group based I/O control. I have > not reviewed your patch in detail. I can suggest looking at openvz's > IO controller. I/O bandwidth control is definitely interesting. How > did you test your solution? I don't have meaningful values right now, just did some quick tests with my pc. Regarding openvz it seems to use the CFQ priority-based approach (with the 3 priority classes: real time, best effort and idle class). The interesting feature is that it allows to set a priority for each process container, but AFAIK it doesn't allow to "partition" the bandwidth between different containers (that would be a nice feature IMHO). For example it would be great to be able to define per-container limits, like assign 10MB/s for processes in container A, 30MB/s to container B, 20MB/s to container C, etc. -Andrea ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH] per-task I/O throttling 2008-01-11 16:32 ` Andrea Righi @ 2008-01-12 4:57 ` Valdis.Kletnieks 2008-01-12 9:46 ` Peter Zijlstra 0 siblings, 1 reply; 24+ messages in thread From: Valdis.Kletnieks @ 2008-01-12 4:57 UTC (permalink / raw) To: righiandr; +Cc: Balbir Singh, LKML, Jens Axboe [-- Attachment #1: Type: text/plain, Size: 691 bytes --] On Fri, 11 Jan 2008 17:32:49 +0100, Andrea Righi said: > The interesting feature is that it allows to set a priority for each > process container, but AFAIK it doesn't allow to "partition" the > bandwidth between different containers (that would be a nice feature > IMHO). For example it would be great to be able to define per-container > limits, like assign 10MB/s for processes in container A, 30MB/s to > container B, 20MB/s to container C, etc. Has anybody considered allocating based on *seeks* rather than bytes moved, or counting seeks as "virtual bytes" for the purposes of accounting (if the disk can do 50mbytes/sec, and a seek takes 5millisecs, then count it as 100K of data)? [-- Attachment #2: Type: application/pgp-signature, Size: 226 bytes --] ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH] per-task I/O throttling 2008-01-12 4:57 ` Valdis.Kletnieks @ 2008-01-12 9:46 ` Peter Zijlstra 2008-01-12 10:57 ` Balbir Singh 0 siblings, 1 reply; 24+ messages in thread From: Peter Zijlstra @ 2008-01-12 9:46 UTC (permalink / raw) To: Valdis.Kletnieks; +Cc: righiandr, Balbir Singh, LKML, Jens Axboe On Fri, 2008-01-11 at 23:57 -0500, Valdis.Kletnieks@vt.edu wrote: > On Fri, 11 Jan 2008 17:32:49 +0100, Andrea Righi said: > > > The interesting feature is that it allows to set a priority for each > > process container, but AFAIK it doesn't allow to "partition" the > > bandwidth between different containers (that would be a nice feature > > IMHO). For example it would be great to be able to define per-container > > limits, like assign 10MB/s for processes in container A, 30MB/s to > > container B, 20MB/s to container C, etc. > > Has anybody considered allocating based on *seeks* rather than bytes moved, > or counting seeks as "virtual bytes" for the purposes of accounting (if the > disk can do 50mbytes/sec, and a seek takes 5millisecs, then count it as 100K > of data)? I was considering a time scheduler, you can fill your time slot with seeks or data, it might be what CFQ does, but I've never even read the code. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH] per-task I/O throttling 2008-01-12 9:46 ` Peter Zijlstra @ 2008-01-12 10:57 ` Balbir Singh 2008-01-12 11:10 ` Peter Zijlstra 0 siblings, 1 reply; 24+ messages in thread From: Balbir Singh @ 2008-01-12 10:57 UTC (permalink / raw) To: Peter Zijlstra; +Cc: Valdis.Kletnieks, righiandr, LKML, Jens Axboe * Peter Zijlstra <a.p.zijlstra@chello.nl> [2008-01-12 10:46:37]: > > On Fri, 2008-01-11 at 23:57 -0500, Valdis.Kletnieks@vt.edu wrote: > > On Fri, 11 Jan 2008 17:32:49 +0100, Andrea Righi said: > > > > > The interesting feature is that it allows to set a priority for each > > > process container, but AFAIK it doesn't allow to "partition" the > > > bandwidth between different containers (that would be a nice feature > > > IMHO). For example it would be great to be able to define per-container > > > limits, like assign 10MB/s for processes in container A, 30MB/s to > > > container B, 20MB/s to container C, etc. > > > > Has anybody considered allocating based on *seeks* rather than bytes moved, > > or counting seeks as "virtual bytes" for the purposes of accounting (if the > > disk can do 50mbytes/sec, and a seek takes 5millisecs, then count it as 100K > > of data)? > > I was considering a time scheduler, you can fill your time slot with > seeks or data, it might be what CFQ does, but I've never even read the > code. > So far the definition of I/O bandwidth has been w.r.t time. Not all IO devices have sectors; I'd prefer bytes over a period of time. -- Warm Regards, Balbir Singh Linux Technology Center IBM, ISTL ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH] per-task I/O throttling 2008-01-12 10:57 ` Balbir Singh @ 2008-01-12 11:10 ` Peter Zijlstra 2008-01-12 18:01 ` Andrea Righi 0 siblings, 1 reply; 24+ messages in thread From: Peter Zijlstra @ 2008-01-12 11:10 UTC (permalink / raw) To: balbir; +Cc: Valdis.Kletnieks, righiandr, LKML, Jens Axboe On Sat, 2008-01-12 at 16:27 +0530, Balbir Singh wrote: > * Peter Zijlstra <a.p.zijlstra@chello.nl> [2008-01-12 10:46:37]: > > > > > On Fri, 2008-01-11 at 23:57 -0500, Valdis.Kletnieks@vt.edu wrote: > > > On Fri, 11 Jan 2008 17:32:49 +0100, Andrea Righi said: > > > > > > > The interesting feature is that it allows to set a priority for each > > > > process container, but AFAIK it doesn't allow to "partition" the > > > > bandwidth between different containers (that would be a nice feature > > > > IMHO). For example it would be great to be able to define per-container > > > > limits, like assign 10MB/s for processes in container A, 30MB/s to > > > > container B, 20MB/s to container C, etc. > > > > > > Has anybody considered allocating based on *seeks* rather than bytes moved, > > > or counting seeks as "virtual bytes" for the purposes of accounting (if the > > > disk can do 50mbytes/sec, and a seek takes 5millisecs, then count it as 100K > > > of data)? > > > > I was considering a time scheduler, you can fill your time slot with > > seeks or data, it might be what CFQ does, but I've never even read the > > code. > > > > So far the definition of I/O bandwidth has been w.r.t time. Not all IO > devices have sectors; I'd prefer bytes over a period of time. Doing a time based one would only require knowing the (avg) delay of seeks, whereas doing a bytes based one would also require knowing the (avg) speed of the device. That is, if you're also interested in providing a latency guarantee. Because that'd force you to convert bytes to time again. I'm not sure thats a good way to go with as long as a majority of devices still have a non-0 seek penalty (SSDs just aren't there yet for most of us). ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH] per-task I/O throttling 2008-01-12 11:10 ` Peter Zijlstra @ 2008-01-12 18:01 ` Andrea Righi 2008-01-13 4:46 ` Balbir Singh 0 siblings, 1 reply; 24+ messages in thread From: Andrea Righi @ 2008-01-12 18:01 UTC (permalink / raw) To: Peter Zijlstra; +Cc: balbir, Valdis.Kletnieks, LKML, Jens Axboe Peter Zijlstra wrote: > On Sat, 2008-01-12 at 16:27 +0530, Balbir Singh wrote: >> * Peter Zijlstra <a.p.zijlstra@chello.nl> [2008-01-12 10:46:37]: >> >>> On Fri, 2008-01-11 at 23:57 -0500, Valdis.Kletnieks@vt.edu wrote: >>>> On Fri, 11 Jan 2008 17:32:49 +0100, Andrea Righi said: >>>> >>>>> The interesting feature is that it allows to set a priority for each >>>>> process container, but AFAIK it doesn't allow to "partition" the >>>>> bandwidth between different containers (that would be a nice feature >>>>> IMHO). For example it would be great to be able to define per-container >>>>> limits, like assign 10MB/s for processes in container A, 30MB/s to >>>>> container B, 20MB/s to container C, etc. >>>> Has anybody considered allocating based on *seeks* rather than bytes moved, >>>> or counting seeks as "virtual bytes" for the purposes of accounting (if the >>>> disk can do 50mbytes/sec, and a seek takes 5millisecs, then count it as 100K >>>> of data)? >>> I was considering a time scheduler, you can fill your time slot with >>> seeks or data, it might be what CFQ does, but I've never even read the >>> code. >>> >> So far the definition of I/O bandwidth has been w.r.t time. Not all IO >> devices have sectors; I'd prefer bytes over a period of time. > > Doing a time based one would only require knowing the (avg) delay of > seeks, whereas doing a bytes based one would also require knowing the > (avg) speed of the device. > > That is, if you're also interested in providing a latency guarantee. > Because that'd force you to convert bytes to time again. So, what about considering both bytes/sec and io-operations/sec? In this way we should be able to limit huge streams of data and seek storms (or any mix of them). Regarding CFQ, AFAIK it's only possible to configure an I/O priorty for a process, but there's no way for example to limit the bandwidth (or I/O operations/sec) for a particular user or group. -Andrea ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH] per-task I/O throttling 2008-01-12 18:01 ` Andrea Righi @ 2008-01-13 4:46 ` Balbir Singh 2008-01-15 16:49 ` [RFC][PATCH] per-uid/gid I/O throttling (was Re: [RFC][PATCH] per-task I/O throttling) Andrea Righi 0 siblings, 1 reply; 24+ messages in thread From: Balbir Singh @ 2008-01-13 4:46 UTC (permalink / raw) To: Andrea Righi; +Cc: Peter Zijlstra, Valdis.Kletnieks, LKML, Jens Axboe * Andrea Righi <righiandr@users.sourceforge.net> [2008-01-12 19:01:14]: > Peter Zijlstra wrote: > > On Sat, 2008-01-12 at 16:27 +0530, Balbir Singh wrote: > >> * Peter Zijlstra <a.p.zijlstra@chello.nl> [2008-01-12 10:46:37]: > >> > >>> On Fri, 2008-01-11 at 23:57 -0500, Valdis.Kletnieks@vt.edu wrote: > >>>> On Fri, 11 Jan 2008 17:32:49 +0100, Andrea Righi said: > >>>> > >>>>> The interesting feature is that it allows to set a priority for each > >>>>> process container, but AFAIK it doesn't allow to "partition" the > >>>>> bandwidth between different containers (that would be a nice feature > >>>>> IMHO). For example it would be great to be able to define per-container > >>>>> limits, like assign 10MB/s for processes in container A, 30MB/s to > >>>>> container B, 20MB/s to container C, etc. > >>>> Has anybody considered allocating based on *seeks* rather than bytes moved, > >>>> or counting seeks as "virtual bytes" for the purposes of accounting (if the > >>>> disk can do 50mbytes/sec, and a seek takes 5millisecs, then count it as 100K > >>>> of data)? > >>> I was considering a time scheduler, you can fill your time slot with > >>> seeks or data, it might be what CFQ does, but I've never even read the > >>> code. > >>> > >> So far the definition of I/O bandwidth has been w.r.t time. Not all IO > >> devices have sectors; I'd prefer bytes over a period of time. > > > > Doing a time based one would only require knowing the (avg) delay of > > seeks, whereas doing a bytes based one would also require knowing the > > (avg) speed of the device. > > > > That is, if you're also interested in providing a latency guarantee. > > Because that'd force you to convert bytes to time again. > > So, what about considering both bytes/sec and io-operations/sec? In this > way we should be able to limit huge streams of data and seek storms (or > any mix of them). > > Regarding CFQ, AFAIK it's only possible to configure an I/O priorty for > a process, but there's no way for example to limit the bandwidth (or I/O > operations/sec) for a particular user or group. > Limiting usage is also a very useful feature. Andrea could you please port your patches over to control groups. -- Warm Regards, Balbir Singh Linux Technology Center IBM, ISTL ^ permalink raw reply [flat|nested] 24+ messages in thread
* [RFC][PATCH] per-uid/gid I/O throttling (was Re: [RFC][PATCH] per-task I/O throttling) 2008-01-13 4:46 ` Balbir Singh @ 2008-01-15 16:49 ` Andrea Righi 2008-01-11 17:58 ` Pavel Machek 2008-01-16 10:45 ` Balbir Singh 0 siblings, 2 replies; 24+ messages in thread From: Andrea Righi @ 2008-01-15 16:49 UTC (permalink / raw) To: Balbir Singh; +Cc: Peter Zijlstra, Valdis.Kletnieks, LKML Allow to limit the I/O bandwidth for specific uid(s) or gid(s) imposing additional delays on those processes that exceed the limits defined in a configfs tree. Examples: Limit the I/O bandwidth for user www-data (UID 33) to 4MB/s: root@linux:/config/io-throttle# mkdir uid:33 root@linux:/config/io-throttle# cd uid:33/ root@linux:/config/io-throttle/uid:33# cat io-rate io-rate: 0 KiB/sec requested: 0 KiB last_request: 0 jiffies delta: 388202 jiffies root@linux:/config/io-throttle/uid:33# echo 4096 > io-rate root@linux:/config/io-throttle/uid:33# cat io-rate io-rate: 4096 KiB/sec requested: 0 KiB last_request: 389271 jiffies delta: 91 jiffies Limit the I/O bandwidth of group backup (GID 34) to 512KB/s: root@linux:/config/io-throttle# mkdir gid:34 root@linux:/config/io-throttle# cd gid:34/ root@linux:/config/io-throttle/gid:34# cat io-rate io-rate: 0 KiB/sec requested: 0 KiB last_request: 0 jiffies delta: 403160 jiffies root@linux:/config/io-throttle/gid:34# echo 512 > io-rate root@linux:/config/io-throttle/gid:34# cat io-rate io-rate: 512 KiB/sec requested: 0 KiB last_request: 403618 jiffies delta: 80 jiffies Remove the I/O limit for user www-data: root@linux:/config/io-throttle# echo 0 > uid:33/io-rate root@linux:/config/io-throttle# cat uid:33/io-rate io-rate: 0 KiB/sec requested: 0 KiB last_request: 419009 jiffies delta: 568 jiffies or: root@linux:/config/io-throttle# rmdir uid:33 Future improvements: * allow to limit also I/O operations per second (instead of KB/s only) * extend grouping criteria (allow to define rules based on process containers, process command, etc.) Signed-off-by: Andrea Righi <a.righi@cineca.it> --- diff -urpN linux-2.6.24-rc7/block/io-throttle.c linux-2.6.24-rc7-io-throttle/block/io-throttle.c --- linux-2.6.24-rc7/block/io-throttle.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.24-rc7-io-throttle/block/io-throttle.c 2008-01-15 17:25:06.000000000 +0100 @@ -0,0 +1,282 @@ +/* + * io-throttle.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Copyright (C) 2008 Andrea Righi <righiandr@users.sourceforge.net> + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/configfs.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/jiffies.h> +#include <linux/io-throttle.h> + +/* This should work for any reasonable size of uid_t */ +#define MAXUIDCHAR (sizeof(uid_t) * 5 / 2) + +/* + * Basic structure that identifies a single I/O throttling rule + */ +struct iothrottle { + struct config_item item; + unsigned long iorate; + unsigned long req; + unsigned long last_request; +}; + +/* Get an I/O-throttling item from a configfs item */ +static inline struct iothrottle *to_iothrottle(struct config_item *item) +{ + return item ? container_of(item, struct iothrottle, item) : NULL; +} + +static ssize_t iothrottle_attr_show(struct config_item *item, + struct configfs_attribute *attr, + char *page); +static ssize_t iothrottle_attr_store(struct config_item *item, + struct configfs_attribute *attr, + const char *page, size_t count); +static struct config_item *iothrottle_make_item(struct config_group *group, + const char *name); +static void iothrottle_release(struct config_item *item); + +/* I/O throttling item in configfs (identify a single I/O throttling rule) */ +static struct configfs_attribute iothrottle_attr_iorate = { + .ca_owner = THIS_MODULE, + .ca_name = "io-rate", + .ca_mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH, +}; + +/* I/O throttling element in configfs */ +static struct configfs_group_operations iothrottle_group_ops = { + .make_item = iothrottle_make_item, +}; + +static struct config_item_type iothrottle_group_type = { + .ct_group_ops = &iothrottle_group_ops, +}; + +/* Entire I/O throttling subsystem under configfs (/config/io-throttle) */ +struct configfs_subsystem iothrottle_subsys = { + .su_group = { + .cg_item = { + .ci_namebuf = "io-throttle", + .ci_type = &iothrottle_group_type, + }, + }, +}; + +/* List of configfs elements per group */ +static struct configfs_attribute *iothrottle_attrs[] = { + &iothrottle_attr_iorate, + NULL, +}; + +static struct configfs_item_operations iothrottle_item_ops = { + .release = iothrottle_release, + .show_attribute = iothrottle_attr_show, + .store_attribute = iothrottle_attr_store, +}; + +static struct config_item_type iothrottle_type = { + .ct_item_ops = &iothrottle_item_ops, + .ct_attrs = iothrottle_attrs, + .ct_owner = THIS_MODULE, +}; + +/* Print a I/O throttling rule details */ +static ssize_t iothrottle_attr_show(struct config_item *item, + struct configfs_attribute *attr, + char *page) +{ + ssize_t count; + unsigned long delta; + struct iothrottle *iot = to_iothrottle(item); + + delta = (long)jiffies - (long)iot->last_request; + /* Print additional debugging stuff */ + count = sprintf(page, " io-rate: %lu KiB/sec\n" + " requested: %lu KiB\n" + "last_request: %lu jiffies\n" + " delta: %lu jiffies\n", + iot->iorate, iot->req << 1, iot->last_request, delta); + + return count; +} + +/* Configure the attributes of an I/O throttling rule */ +static ssize_t iothrottle_attr_store(struct config_item *item, + struct configfs_attribute *attr, + const char *page, size_t count) +{ + struct iothrottle *iot = to_iothrottle(item); + unsigned long tmp; + char *p = (char *) page; + + tmp = simple_strtoul(p, &p, 10); + if (!p || (*p && (*p != '\n'))) + return -EINVAL; + + iot->iorate = tmp; + iot->req = 0; + iot->last_request = jiffies; + + return count; +} + +/* Register a new I/O throttling rule */ +static struct config_item *iothrottle_make_item(struct config_group *group, + const char *name) +{ + struct iothrottle *iot; + + iot = kzalloc(sizeof(*iot), GFP_KERNEL); + if (unlikely(!iot)) + return NULL; + + iot->last_request = jiffies; + + config_item_init_type_name(&iot->item, name, &iothrottle_type); + + return &iot->item; +} + +/* Unregister an I/O throttling rule */ +static void iothrottle_release(struct config_item *item) +{ + kfree(to_iothrottle(item)); +} + +/* Get the opportune "struct iothrottle" item from configfs if present */ +static inline struct iothrottle *iothrottle_get_config(void) +{ + char idstr[MAXUIDCHAR + 1]; + struct iothrottle *p = NULL; + + memset(idstr, 0, sizeof(idstr)); + + /* UID-based rule */ + snprintf(idstr, MAXUIDCHAR, "uid:%u", current->uid); + p = to_iothrottle( + config_group_find_item(&iothrottle_subsys.su_group, idstr)); + if (p) + goto out_get_config; + + /* GID-based rule */ + snprintf(idstr, MAXUIDCHAR, "gid:%u", current->gid); + p = to_iothrottle( + config_group_find_item(&iothrottle_subsys.su_group, idstr)); + +out_get_config: + return p; +} + +/* + * Here is the main function of the I/O throttling mechanism. + * + * FIXME: potential race if the struct iothrottle *iot item is removed/modified + * in the middle of the io_throttle() execution. + */ +void io_throttle(int nr_sectors) +{ + struct iothrottle *iot; + unsigned long delta, n; + long sleep; + + iot = iothrottle_get_config(); + if (!iot || !iot->iorate) + return; + + /* + * The concept is the following: evaluate the actual I/O rate of a + * process, looking at the sectors requested over the time elapsed from + * the last request. If the actual I/O rate is beyond the maximum + * allowed I/O rate then sleep the current task for the correct amount + * of time, in order to reduce the actual I/O rate under the allowed + * limit. + * + * The time to sleep is evaluated as: + * + * sleep = (sectors_requested / allowed_iorate) - time_elapsed + */ + delta = (long)jiffies - (long)iot->last_request; + n = iot->req += nr_sectors; + do_div(n, iot->iorate); + + /* + * Unable to evaluate delta (due to a too small interval of time + * between two requests) or n (due to a too small request). + * + * Account the requested sectors in iot->req and sum them to the + * sectors of the next request. + */ + if (!delta || !n) + return; + + /* + * Convert n in jiffies (remember that iot->iorate is in KB/s and we + * need to convert it in sectors/jiffies) + */ + sleep = msecs_to_jiffies(n * 1000 / 2) - delta; + if (sleep > 0) { + printk(KERN_DEBUG + "io-throttle: process %i (%s) must sleep %lu jiffies\n", + task_pid_nr(current), current->comm, sleep); + schedule_timeout_uninterruptible(sleep); + } + + /* OK, we stay under the limits. Reset statistics. */ + iot->req = 0; + iot->last_request = jiffies; +} +EXPORT_SYMBOL(io_throttle); + +/* Register I/O throttling subsystem */ +static int __init iothrottle_init(void) +{ + int ret; + + config_group_init(&iothrottle_subsys.su_group); + mutex_init(&iothrottle_subsys.su_mutex); + ret = configfs_register_subsystem(&iothrottle_subsys); + if (ret) { + printk(KERN_ERR "%s: error %d while registering subsystem\n", + iothrottle_subsys.su_group.cg_item.ci_namebuf, + ret); + goto out_unregister; + } + + return 0; + +out_unregister: + configfs_unregister_subsystem(&iothrottle_subsys); + + return ret; +} + +/* Unregister I/O throttling subsystem */ +static void __exit iothrottle_exit(void) +{ + configfs_unregister_subsystem(&iothrottle_subsys); + mutex_destroy(&iothrottle_subsys.su_mutex); +} + +module_init(iothrottle_init); +module_exit(iothrottle_exit); +MODULE_LICENSE("GPL"); diff -urpN linux-2.6.24-rc7/block/Kconfig linux-2.6.24-rc7-io-throttle/block/Kconfig --- linux-2.6.24-rc7/block/Kconfig 2008-01-06 22:45:38.000000000 +0100 +++ linux-2.6.24-rc7-io-throttle/block/Kconfig 2008-01-15 15:30:21.000000000 +0100 @@ -40,6 +40,16 @@ config BLK_DEV_IO_TRACE git://brick.kernel.dk/data/git/blktrace.git +config IO_THROTTLE + tristate "Enable I/O throttling (EXPERIMENTAL)" + depends on EXPERIMENTAL + select CONFIGFS_FS + help + This allows to limit the maximum I/O rate for specific UID(s) or + GID(s). + + Say N if unsure. + config LSF bool "Support for Large Single Files" depends on !64BIT diff -urpN linux-2.6.24-rc7/block/ll_rw_blk.c linux-2.6.24-rc7-io-throttle/block/ll_rw_blk.c --- linux-2.6.24-rc7/block/ll_rw_blk.c 2008-01-06 22:45:38.000000000 +0100 +++ linux-2.6.24-rc7-io-throttle/block/ll_rw_blk.c 2008-01-15 13:59:21.000000000 +0100 @@ -31,6 +31,7 @@ #include <linux/blktrace_api.h> #include <linux/fault-inject.h> #include <linux/scatterlist.h> +#include <linux/io-throttle.h> /* * for max sense size @@ -3221,6 +3222,8 @@ static inline void __generic_make_reques if (bio_check_eod(bio, nr_sectors)) goto end_io; + io_throttle(nr_sectors); + /* * Resolve the mapping until finished. (drivers are * still free to implement/resolve their own stacking diff -urpN linux-2.6.24-rc7/block/Makefile linux-2.6.24-rc7-io-throttle/block/Makefile --- linux-2.6.24-rc7/block/Makefile 2008-01-06 22:45:38.000000000 +0100 +++ linux-2.6.24-rc7-io-throttle/block/Makefile 2008-01-15 13:59:21.000000000 +0100 @@ -12,3 +12,5 @@ obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o + +obj-$(CONFIG_IO_THROTTLE) += io-throttle.o diff -urpN linux-2.6.24-rc7/include/linux/io-throttle.h linux-2.6.24-rc7-io-throttle/include/linux/io-throttle.h --- linux-2.6.24-rc7/include/linux/io-throttle.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.24-rc7-io-throttle/include/linux/io-throttle.h 2008-01-15 13:59:21.000000000 +0100 @@ -0,0 +1,10 @@ +#ifndef IO_THROTTLE_H +#define IO_THROTTLE_H + +#ifdef CONFIG_IO_THROTTLE +extern void io_throttle(int nr_sectors); +#else +static inline void io_throttle(int nr_sectors) { } +#endif /* CONFIG_IO_THROTTLE */ + +#endif ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH] per-uid/gid I/O throttling (was Re: [RFC][PATCH] per-task I/O throttling) 2008-01-15 16:49 ` [RFC][PATCH] per-uid/gid I/O throttling (was Re: [RFC][PATCH] per-task I/O throttling) Andrea Righi @ 2008-01-11 17:58 ` Pavel Machek 2008-01-23 15:41 ` Andrea Righi 2008-01-16 10:45 ` Balbir Singh 1 sibling, 1 reply; 24+ messages in thread From: Pavel Machek @ 2008-01-11 17:58 UTC (permalink / raw) To: Andrea Righi; +Cc: Balbir Singh, Peter Zijlstra, Valdis.Kletnieks, LKML On Tue 2008-01-15 17:49:36, Andrea Righi wrote: > Allow to limit the I/O bandwidth for specific uid(s) or gid(s) imposing > additional delays on those processes that exceed the limits defined in a > configfs tree. > > Examples: > > Limit the I/O bandwidth for user www-data (UID 33) to 4MB/s: > > root@linux:/config/io-throttle# mkdir uid:33 > root@linux:/config/io-throttle# cd uid:33/ > root@linux:/config/io-throttle/uid:33# cat io-rate > io-rate: 0 KiB/sec > requested: 0 KiB > last_request: 0 jiffies > delta: 388202 jiffies > root@linux:/config/io-throttle/uid:33# echo 4096 > io-rate > root@linux:/config/io-throttle/uid:33# cat io-rate > io-rate: 4096 KiB/sec > requested: 0 KiB > last_request: 389271 jiffies > delta: 91 jiffies > > Limit the I/O bandwidth of group backup (GID 34) to 512KB/s: Maybe ionice from cfq should be improved, instead? -- (english) http://www.livejournal.com/~pavelmachek (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH] per-uid/gid I/O throttling (was Re: [RFC][PATCH] per-task I/O throttling) 2008-01-11 17:58 ` Pavel Machek @ 2008-01-23 15:41 ` Andrea Righi 0 siblings, 0 replies; 24+ messages in thread From: Andrea Righi @ 2008-01-23 15:41 UTC (permalink / raw) To: Pavel Machek; +Cc: Balbir Singh, Peter Zijlstra, Valdis.Kletnieks, LKML Pavel Machek wrote: > On Tue 2008-01-15 17:49:36, Andrea Righi wrote: >> Allow to limit the I/O bandwidth for specific uid(s) or gid(s) imposing >> additional delays on those processes that exceed the limits defined in a >> configfs tree. >> >> Examples: >> >> Limit the I/O bandwidth for user www-data (UID 33) to 4MB/s: >> >> root@linux:/config/io-throttle# mkdir uid:33 >> root@linux:/config/io-throttle# cd uid:33/ >> root@linux:/config/io-throttle/uid:33# cat io-rate >> io-rate: 0 KiB/sec >> requested: 0 KiB >> last_request: 0 jiffies >> delta: 388202 jiffies >> root@linux:/config/io-throttle/uid:33# echo 4096 > io-rate >> root@linux:/config/io-throttle/uid:33# cat io-rate >> io-rate: 4096 KiB/sec >> requested: 0 KiB >> last_request: 389271 jiffies >> delta: 91 jiffies >> >> Limit the I/O bandwidth of group backup (GID 34) to 512KB/s: > > Maybe ionice from cfq should be improved, instead? IMHO it would be interesting to have also a way to use the limiting approach, instead of i/o priority-based only (i.e. checks to ensure that servicing the requests will not cause the associated user's maximum quality of service to be exceeded). see also http://lkml.org/lkml/2008/1/20/157 -Andrea ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH] per-uid/gid I/O throttling (was Re: [RFC][PATCH] per-task I/O throttling) 2008-01-15 16:49 ` [RFC][PATCH] per-uid/gid I/O throttling (was Re: [RFC][PATCH] per-task I/O throttling) Andrea Righi 2008-01-11 17:58 ` Pavel Machek @ 2008-01-16 10:45 ` Balbir Singh 2008-01-16 11:30 ` Valdis.Kletnieks 2008-01-16 12:58 ` Andrea Righi 1 sibling, 2 replies; 24+ messages in thread From: Balbir Singh @ 2008-01-16 10:45 UTC (permalink / raw) To: Andrea Righi; +Cc: Peter Zijlstra, Valdis.Kletnieks, LKML * Andrea Righi <righiandr@users.sourceforge.net> [2008-01-15 17:49:36]: > Allow to limit the I/O bandwidth for specific uid(s) or gid(s) imposing > additional delays on those processes that exceed the limits defined in a > configfs tree. > > Examples: > > Limit the I/O bandwidth for user www-data (UID 33) to 4MB/s: > > root@linux:/config/io-throttle# mkdir uid:33 > root@linux:/config/io-throttle# cd uid:33/ > root@linux:/config/io-throttle/uid:33# cat io-rate > io-rate: 0 KiB/sec > requested: 0 KiB > last_request: 0 jiffies > delta: 388202 jiffies > root@linux:/config/io-throttle/uid:33# echo 4096 > io-rate > root@linux:/config/io-throttle/uid:33# cat io-rate > io-rate: 4096 KiB/sec > requested: 0 KiB > last_request: 389271 jiffies > delta: 91 jiffies > > Limit the I/O bandwidth of group backup (GID 34) to 512KB/s: > > root@linux:/config/io-throttle# mkdir gid:34 > root@linux:/config/io-throttle# cd gid:34/ > root@linux:/config/io-throttle/gid:34# cat io-rate > io-rate: 0 KiB/sec > requested: 0 KiB > last_request: 0 jiffies > delta: 403160 jiffies > root@linux:/config/io-throttle/gid:34# echo 512 > io-rate > root@linux:/config/io-throttle/gid:34# cat io-rate > io-rate: 512 KiB/sec > requested: 0 KiB > last_request: 403618 jiffies > delta: 80 jiffies > > Remove the I/O limit for user www-data: > > root@linux:/config/io-throttle# echo 0 > uid:33/io-rate > root@linux:/config/io-throttle# cat uid:33/io-rate > io-rate: 0 KiB/sec > requested: 0 KiB > last_request: 419009 jiffies > delta: 568 jiffies > > or: > > root@linux:/config/io-throttle# rmdir uid:33 > > Future improvements: > * allow to limit also I/O operations per second (instead of KB/s only) > * extend grouping criteria (allow to define rules based on process containers, > process command, etc.) > > Signed-off-by: Andrea Righi <a.righi@cineca.it> Hi, Andrea, Thanks for doing this. I am going to review the patches in greater detail and also test them. Why do you use configfs when we have a control group filesystem available for grouping tasks and providing a file system based interface for control and accounting? -- Warm Regards, Balbir Singh Linux Technology Center IBM, ISTL ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH] per-uid/gid I/O throttling (was Re: [RFC][PATCH] per-task I/O throttling) 2008-01-16 10:45 ` Balbir Singh @ 2008-01-16 11:30 ` Valdis.Kletnieks 2008-01-16 12:05 ` Balbir Singh 2008-01-16 12:58 ` Andrea Righi 1 sibling, 1 reply; 24+ messages in thread From: Valdis.Kletnieks @ 2008-01-16 11:30 UTC (permalink / raw) To: balbir; +Cc: Andrea Righi, Peter Zijlstra, LKML [-- Attachment #1: Type: text/plain, Size: 640 bytes --] On Wed, 16 Jan 2008 16:15:41 +0530, Balbir Singh said: > Thanks for doing this. I am going to review the patches in greater > detail and also test them. Why do you use configfs when we have a > control group filesystem available for grouping tasks and providing a > file system based interface for control and accounting? And here I thought "There's more than one way to do it" was the Perl slogan. :) An equally valid question would be: "Why are we carrying around a control group filesystem when we have configfs?" (Honestly, I didn't know we *were* carrying around such a filesystem - and quite likely Andrea Righi didn't either...) [-- Attachment #2: Type: application/pgp-signature, Size: 226 bytes --] ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH] per-uid/gid I/O throttling (was Re: [RFC][PATCH] per-task I/O throttling) 2008-01-16 11:30 ` Valdis.Kletnieks @ 2008-01-16 12:05 ` Balbir Singh 2008-01-16 12:24 ` Valdis.Kletnieks 0 siblings, 1 reply; 24+ messages in thread From: Balbir Singh @ 2008-01-16 12:05 UTC (permalink / raw) To: Valdis.Kletnieks; +Cc: Andrea Righi, Peter Zijlstra, LKML * Valdis.Kletnieks@vt.edu <Valdis.Kletnieks@vt.edu> [2008-01-16 06:30:31]: > On Wed, 16 Jan 2008 16:15:41 +0530, Balbir Singh said: > > > Thanks for doing this. I am going to review the patches in greater > > detail and also test them. Why do you use configfs when we have a > > control group filesystem available for grouping tasks and providing a > > file system based interface for control and accounting? > > And here I thought "There's more than one way to do it" was the Perl slogan. :) > Yes, there are several ways to do it, but the discussion over the last year or so has been centered around control groups. We've discussed all approaches on lkml on there was consensus on using control groups. Please read the lkml archives for the discussion details. > An equally valid question would be: "Why are we carrying around a control > group filesystem when we have configfs?" (Honestly, I didn't know we *were* > carrying around such a filesystem - and quite likely Andrea Righi didn't > either...) Control groups is derived from cpusets and for those interested in grouping tasks for control, is the preferred method of providing control. -- Warm Regards, Balbir Singh Linux Technology Center IBM, ISTL ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH] per-uid/gid I/O throttling (was Re: [RFC][PATCH] per-task I/O throttling) 2008-01-16 12:05 ` Balbir Singh @ 2008-01-16 12:24 ` Valdis.Kletnieks 0 siblings, 0 replies; 24+ messages in thread From: Valdis.Kletnieks @ 2008-01-16 12:24 UTC (permalink / raw) To: balbir; +Cc: Andrea Righi, Peter Zijlstra, LKML [-- Attachment #1: Type: text/plain, Size: 541 bytes --] On Wed, 16 Jan 2008 17:35:33 +0530, Balbir Singh said: > Control groups is derived from cpusets and for those interested in > grouping tasks for control, is the preferred method of providing > control. Ahh, that's why I didn't notice it - "cpusets" didn't seem to do much for the 1 and 2 CPU systems I usually deal with so I filtered it out (including the parts where it started becoming relevant to things I do). Often, the right tool for a job is something you've never heard of because it originated in some other specialized area... [-- Attachment #2: Type: application/pgp-signature, Size: 226 bytes --] ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [RFC][PATCH] per-uid/gid I/O throttling (was Re: [RFC][PATCH] per-task I/O throttling) 2008-01-16 10:45 ` Balbir Singh 2008-01-16 11:30 ` Valdis.Kletnieks @ 2008-01-16 12:58 ` Andrea Righi 1 sibling, 0 replies; 24+ messages in thread From: Andrea Righi @ 2008-01-16 12:58 UTC (permalink / raw) To: Balbir Singh; +Cc: Peter Zijlstra, Valdis.Kletnieks, LKML Balbir Singh wrote: > * Andrea Righi <righiandr@users.sourceforge.net> [2008-01-15 17:49:36]: > >> Allow to limit the I/O bandwidth for specific uid(s) or gid(s) imposing >> additional delays on those processes that exceed the limits defined in a >> configfs tree. >> >> Examples: >> >> Limit the I/O bandwidth for user www-data (UID 33) to 4MB/s: >> >> root@linux:/config/io-throttle# mkdir uid:33 >> root@linux:/config/io-throttle# cd uid:33/ >> root@linux:/config/io-throttle/uid:33# cat io-rate >> io-rate: 0 KiB/sec >> requested: 0 KiB >> last_request: 0 jiffies >> delta: 388202 jiffies >> root@linux:/config/io-throttle/uid:33# echo 4096 > io-rate >> root@linux:/config/io-throttle/uid:33# cat io-rate >> io-rate: 4096 KiB/sec >> requested: 0 KiB >> last_request: 389271 jiffies >> delta: 91 jiffies >> >> Limit the I/O bandwidth of group backup (GID 34) to 512KB/s: >> >> root@linux:/config/io-throttle# mkdir gid:34 >> root@linux:/config/io-throttle# cd gid:34/ >> root@linux:/config/io-throttle/gid:34# cat io-rate >> io-rate: 0 KiB/sec >> requested: 0 KiB >> last_request: 0 jiffies >> delta: 403160 jiffies >> root@linux:/config/io-throttle/gid:34# echo 512 > io-rate >> root@linux:/config/io-throttle/gid:34# cat io-rate >> io-rate: 512 KiB/sec >> requested: 0 KiB >> last_request: 403618 jiffies >> delta: 80 jiffies >> >> Remove the I/O limit for user www-data: >> >> root@linux:/config/io-throttle# echo 0 > uid:33/io-rate >> root@linux:/config/io-throttle# cat uid:33/io-rate >> io-rate: 0 KiB/sec >> requested: 0 KiB >> last_request: 419009 jiffies >> delta: 568 jiffies >> >> or: >> >> root@linux:/config/io-throttle# rmdir uid:33 >> >> Future improvements: >> * allow to limit also I/O operations per second (instead of KB/s only) >> * extend grouping criteria (allow to define rules based on process containers, >> process command, etc.) >> >> Signed-off-by: Andrea Righi <a.righi@cineca.it> > > Hi, Andrea, > > Thanks for doing this. I am going to review the patches in greater > detail and also test them. Why do you use configfs when we have a > control group filesystem available for grouping tasks and providing a > file system based interface for control and accounting? > Well... I didn't choose configfs for a technical reason, but simply because I'm more familiar with it, respect to the other equivalent ways to implement this. But I'll try to look also at the control group approach, I don't know in details all the advantages/disadvantages, but it seems interesting anyway. -Andrea ^ permalink raw reply [flat|nested] 24+ messages in thread
end of thread, other threads:[~2008-01-23 15:42 UTC | newest] Thread overview: 24+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2008-01-10 22:45 [RFC][PATCH] per-task I/O throttling Andrea Righi 2008-01-11 1:50 ` Bill Davidsen 2008-01-11 10:28 ` Andrea Righi 2008-01-11 14:20 ` Peter Zijlstra 2008-01-11 15:29 ` Andrea Righi 2008-01-11 14:05 ` David Newall 2008-01-11 15:44 ` Andrea Righi 2008-01-16 19:21 ` David Newall 2008-01-11 15:59 ` Balbir Singh 2008-01-11 16:32 ` Andrea Righi 2008-01-12 4:57 ` Valdis.Kletnieks 2008-01-12 9:46 ` Peter Zijlstra 2008-01-12 10:57 ` Balbir Singh 2008-01-12 11:10 ` Peter Zijlstra 2008-01-12 18:01 ` Andrea Righi 2008-01-13 4:46 ` Balbir Singh 2008-01-15 16:49 ` [RFC][PATCH] per-uid/gid I/O throttling (was Re: [RFC][PATCH] per-task I/O throttling) Andrea Righi 2008-01-11 17:58 ` Pavel Machek 2008-01-23 15:41 ` Andrea Righi 2008-01-16 10:45 ` Balbir Singh 2008-01-16 11:30 ` Valdis.Kletnieks 2008-01-16 12:05 ` Balbir Singh 2008-01-16 12:24 ` Valdis.Kletnieks 2008-01-16 12:58 ` Andrea Righi
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox