[RFC] cfq: adapt slice to number of processes doing I/O (v2.1)

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Corrado Zoccolo <czoccolo@gmail.com>
To: "Linux-Kernel" <linux-kernel@vger.kernel.org>,
	Jens Axboe <jens.axboe@oracle.com>,
	Jeff Moyer <jmoyer@redhat.com>
Subject: [RFC] cfq: adapt slice to number of processes doing I/O (v2.1)
Date: Tue, 15 Sep 2009 22:12:53 +0200	[thread overview]
Message-ID: <200909152212.53883.czoccolo@gmail.com> (raw)

When the number of processes performing I/O concurrently increases,
a fixed time slice per process will cause large latencies.

This (v2.1) patch will scale the time slice assigned to each process,
according to a target latency (tunable from sysfs, default 300ms).

In order to keep fairness among processes, we adopt two devices, w.r.t. v1.

* The number of active processes is computed using a special form of
running average, that quickly follows sudden increases (to keep latency low),
and decrease slowly (to have fairness in spite of rapid decreases of this 
value).

* The idle time is computed using remaining slice as a maximum.

To safeguard sequential bandwidth, we impose a minimum time slice
(computed using 2*cfq_slice_idle as base, adjusted according to priority
and async-ness).

Signed-off-by: Corrado Zoccolo <czoccolo@gmail.com>

---
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 0e3814b..ca90d42 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -27,6 +27,8 @@ static const int cfq_slice_sync = HZ / 10;
 static int cfq_slice_async = HZ / 25;
 static const int cfq_slice_async_rq = 2;
 static int cfq_slice_idle = HZ / 125;
+static int cfq_target_latency = HZ * 3/10; /* 300 ms */
+static int cfq_hist_divisor = 4;
 
 /*
  * offset from end of service tree
@@ -134,6 +136,9 @@ struct cfq_data {
 	struct rb_root prio_trees[CFQ_PRIO_LISTS];
 
 	unsigned int busy_queues;
+	unsigned int busy_queues_avg;
+	unsigned int busy_rt_queues;
+	unsigned int busy_rt_queues_avg;
 
 	int rq_in_driver[2];
 	int sync_flight;
@@ -173,6 +178,8 @@ struct cfq_data {
 	unsigned int cfq_slice[2];
 	unsigned int cfq_slice_async_rq;
 	unsigned int cfq_slice_idle;
+	unsigned int cfq_target_latency;
+	unsigned int cfq_hist_divisor;
 
 	struct list_head cic_list;
 
@@ -301,10 +308,40 @@ cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
 }
 
+static inline unsigned
+cfq_get_interested_queues(struct cfq_data *cfqd, bool rt) {
+	unsigned min_q, max_q;
+	unsigned mult = cfqd->cfq_hist_divisor - 1;
+	unsigned round = cfqd->cfq_hist_divisor / 2;
+	if (rt) {
+		min_q = min(cfqd->busy_rt_queues_avg, cfqd->busy_rt_queues);
+		max_q = max(cfqd->busy_rt_queues_avg, cfqd->busy_rt_queues);
+		cfqd->busy_rt_queues_avg = (mult * max_q + min_q + round) /
+			cfqd->cfq_hist_divisor;
+		return cfqd->busy_rt_queues_avg;
+	} else {
+		min_q = min(cfqd->busy_queues_avg, cfqd->busy_queues);
+		max_q = max(cfqd->busy_queues_avg, cfqd->busy_queues);
+		cfqd->busy_queues_avg = (mult * max_q + min_q + round) /
+			cfqd->cfq_hist_divisor;
+		return cfqd->busy_queues_avg;
+	}
+}
+
 static inline void
 cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
-	cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies;
+	unsigned process_thr = cfqd->cfq_target_latency / cfqd->cfq_slice[1];
+	unsigned iq = cfq_get_interested_queues(cfqd, cfq_class_rt(cfqq));
+	unsigned slice = cfq_prio_to_slice(cfqd, cfqq);
+
+	if (iq > process_thr) {
+		unsigned low_slice = 2 * slice * cfqd->cfq_slice_idle
+			/ cfqd->cfq_slice[1];
+		slice = max(slice * process_thr / iq, min(slice, low_slice));
+	}
+
+	cfqq->slice_end = jiffies + slice;
 	cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies);
 }
 
@@ -646,6 +683,8 @@ static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	BUG_ON(cfq_cfqq_on_rr(cfqq));
 	cfq_mark_cfqq_on_rr(cfqq);
 	cfqd->busy_queues++;
+	if (cfq_class_rt(cfqq))
+		cfqd->busy_rt_queues++;
 
 	cfq_resort_rr_list(cfqd, cfqq);
 }
@@ -669,6 +708,8 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 
 	BUG_ON(!cfqd->busy_queues);
 	cfqd->busy_queues--;
+	if (cfq_class_rt(cfqq))
+		cfqd->busy_rt_queues--;
 }
 
 /*
@@ -1092,7 +1133,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
 	 * fair distribution of slice time for a process doing back-to-back
 	 * seeks. so allow a little bit of time for him to submit a new rq
 	 */
-	sl = cfqd->cfq_slice_idle;
+	sl = min_t(unsigned, cfqd->cfq_slice_idle, cfqq->slice_end - jiffies);
 	if (sample_valid(cic->seek_samples) && CIC_SEEKY(cic))
 		sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT));
 
@@ -2480,6 +2521,8 @@ static void *cfq_init_queue(struct request_queue *q)
 	cfqd->cfq_slice[1] = cfq_slice_sync;
 	cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
 	cfqd->cfq_slice_idle = cfq_slice_idle;
+	cfqd->cfq_target_latency = cfq_target_latency;
+	cfqd->cfq_hist_divisor = cfq_hist_divisor;
 	cfqd->hw_tag = 1;
 
 	return cfqd;
@@ -2549,6 +2592,8 @@ SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1);
 SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
 SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
 SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
+SHOW_FUNCTION(cfq_target_latency_show, cfqd->cfq_target_latency, 1);
+SHOW_FUNCTION(cfq_hist_divisor_show, cfqd->cfq_hist_divisor, 0);
 #undef SHOW_FUNCTION
 
 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
@@ -2580,6 +2625,10 @@ STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1);
 STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
 STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1,
 		UINT_MAX, 0);
+
+STORE_FUNCTION(cfq_target_latency_store, &cfqd->cfq_target_latency, 1, 1000, 1);
+STORE_FUNCTION(cfq_hist_divisor_store, &cfqd->cfq_hist_divisor, 1, 100, 0);
+
 #undef STORE_FUNCTION
 
 #define CFQ_ATTR(name) \
@@ -2595,6 +2644,8 @@ static struct elv_fs_entry cfq_attrs[] = {
 	CFQ_ATTR(slice_async),
 	CFQ_ATTR(slice_async_rq),
 	CFQ_ATTR(slice_idle),
+	CFQ_ATTR(target_latency),
+	CFQ_ATTR(hist_divisor),
 	__ATTR_NULL
 };

                 reply	other threads:[~2009-09-15 20:12 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:0e3814b dfblob:ca90d42 )
 OR (
bs:"[RFC] cfq: adapt slice to number of processes doing I/O (v2.1)" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200909152212.53883.czoccolo@gmail.com \
    --to=czoccolo@gmail.com \
    --cc=jens.axboe@oracle.com \
    --cc=jmoyer@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.