Re: Status of the IO scheduler fixes for 2.4

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Chris Mason <mason@suse.com>
To: Marc-Christian Petersen <m.c.p@wolk-project.de>
Cc: lkml <linux-kernel@vger.kernel.org>,
	Andrea Arcangeli <andrea@suse.de>,
	Nick Piggin <piggin@cyberone.com.au>
Subject: Re: Status of the IO scheduler fixes for 2.4
Date: 06 Jul 2003 14:51:38 -0400	[thread overview]
Message-ID: <1057517497.20904.1322.camel@tiny.suse.com> (raw)
In-Reply-To: <200307060958.36642.m.c.p@wolk-project.de>

[-- Attachment #1: Type: text/plain, Size: 831 bytes --]

On Sun, 2003-07-06 at 03:58, Marc-Christian Petersen wrote:
> On Friday 04 July 2003 23:37, Chris Mason wrote:
> 
> Hi Chris,
> 
> > > If the IO fairness still doesnt
> > > get somewhat better for general use (well get isolated user reports and
> > > benchmarks for both the two patches), then I might consider the q->full
> > > patch (it has throughtput drawbacks and I prefer avoiding a tunable
> > > there).
> now there is io-stalls-10 in .22-pre3 (lowlat elev. + fixpausing). Could you 
> please send "q->full patch" as ontop of -pre3? :-)

Attached, this defaults to q->full off and keeps the elvtune changes. 
So to turn on the q->full low latency fixes, you need to:

elvtune -b 1 /dev/xxxx

Note that for lvm and md, you need to elvtune each underlying device. 
Running it on an lvm/md device doesn't do anything.

-chris


[-- Attachment #2: q_full.diff --]
[-- Type: text/plain, Size: 7259 bytes --]

--- 1.9/drivers/block/blkpg.c	Sat Mar 30 06:58:05 2002
+++ edited/drivers/block/blkpg.c	Sun Jul  6 14:34:31 2003
@@ -261,10 +261,10 @@
 			return blkpg_ioctl(dev, (struct blkpg_ioctl_arg *) arg);
 			
 		case BLKELVGET:
-			return blkelvget_ioctl(&blk_get_queue(dev)->elevator,
+			return blkelvget_ioctl(blk_get_queue(dev),
 					       (blkelv_ioctl_arg_t *) arg);
 		case BLKELVSET:
-			return blkelvset_ioctl(&blk_get_queue(dev)->elevator,
+			return blkelvset_ioctl(blk_get_queue(dev),
 					       (blkelv_ioctl_arg_t *) arg);
 
 		case BLKBSZGET:
--- 1.8/drivers/block/elevator.c	Tue Oct 22 00:29:25 2002
+++ edited/drivers/block/elevator.c	Sun Jul  6 14:34:31 2003
@@ -180,23 +180,28 @@
 
 void elevator_noop_merge_req(struct request *req, struct request *next) {}
 
-int blkelvget_ioctl(elevator_t * elevator, blkelv_ioctl_arg_t * arg)
+int blkelvget_ioctl(request_queue_t *q, blkelv_ioctl_arg_t * arg)
 {
+	elevator_t *elevator = &q->elevator;
 	blkelv_ioctl_arg_t output;
 
 	output.queue_ID			= elevator->queue_ID;
 	output.read_latency		= elevator->read_latency;
 	output.write_latency		= elevator->write_latency;
-	output.max_bomb_segments	= 0;
-
+	output.max_bomb_segments	= q->max_queue_sectors;
+	if (q->low_latency)
+		output.max_bomb_segments |= MAX_BOMB_LATENCY_MASK;	
+	else
+		output.max_bomb_segments &= ~MAX_BOMB_LATENCY_MASK;	
 	if (copy_to_user(arg, &output, sizeof(blkelv_ioctl_arg_t)))
 		return -EFAULT;
 
 	return 0;
 }
 
-int blkelvset_ioctl(elevator_t * elevator, const blkelv_ioctl_arg_t * arg)
+int blkelvset_ioctl(request_queue_t *q, const blkelv_ioctl_arg_t * arg)
 {
+	elevator_t *elevator = &q->elevator;
 	blkelv_ioctl_arg_t input;
 
 	if (copy_from_user(&input, arg, sizeof(blkelv_ioctl_arg_t)))
@@ -206,9 +211,19 @@
 		return -EINVAL;
 	if (input.write_latency < 0)
 		return -EINVAL;
+	if (input.max_bomb_segments < 0)
+		return -EINVAL;
 
 	elevator->read_latency		= input.read_latency;
 	elevator->write_latency		= input.write_latency;
+	q->low_latency = input.max_bomb_segments & MAX_BOMB_LATENCY_MASK ? 1:0;
+	printk("queue %d: low latency mode is now %s\n", elevator->queue_ID, 
+		q->low_latency ? "on" : "off");
+	input.max_bomb_segments &= ~MAX_BOMB_LATENCY_MASK;
+	if (input.max_bomb_segments)
+		q->max_queue_sectors = input.max_bomb_segments;
+	printk("queue %d: max queue sectors is now %d\n", elevator->queue_ID, 
+		q->max_queue_sectors);
 	return 0;
 }
 
--- 1.46/drivers/block/ll_rw_blk.c	Fri Jul  4 13:35:08 2003
+++ edited/drivers/block/ll_rw_blk.c	Sun Jul  6 14:34:30 2003
@@ -379,8 +379,22 @@
 {
 	if (q->plugged) {
 		q->plugged = 0;
-		if (!list_empty(&q->queue_head))
+		if (!list_empty(&q->queue_head)) {
+
+			/* we don't want merges later on to come in 
+			 * and significantly increase the amount of
+			 * work during an unplug, it can lead to high
+			 * latencies while some poor waiter tries to
+			 * run an ever increasing chunk of io.
+			 * This does lower throughput some though.
+			 */
+			if (q->low_latency) {
+				struct request *rq;
+				rq = blkdev_entry_prev_request(&q->queue_head),
+				rq->elevator_sequence = 0;
+			}
 			q->request_fn(q);
+		}
 	}
 }
 
@@ -527,7 +541,9 @@
 	q->plug_tq.routine	= &generic_unplug_device;
 	q->plug_tq.data		= q;
 	q->plugged        	= 0;
+	q->full			= 0;
 	q->can_throttle		= 0;
+	q->low_latency		= 0;
 
 	/*
 	 * These booleans describe the queue properties.  We set the
@@ -546,7 +562,7 @@
  * Get a free request. io_request_lock must be held and interrupts
  * disabled on the way in.  Returns NULL if there are no free requests.
  */
-static struct request *get_request(request_queue_t *q, int rw)
+static struct request *__get_request(request_queue_t *q, int rw)
 {
 	struct request *rq = NULL;
 	struct request_list *rl;
@@ -560,11 +576,34 @@
 		rq->cmd = rw;
 		rq->special = NULL;
 		rq->q = q;
-	}
+	} else if (q->low_latency)
+		q->full = 1;
 	return rq;
 }
 
 /*
+ * get a free request, honoring the queue_full condition
+ */
+static inline struct request *get_request(request_queue_t *q, int rw)
+{
+	if (q->full)
+		return NULL;
+	return __get_request(q, rw);
+}
+
+/* 
+ * helper func to do memory barriers and wakeups when we finally decide
+ * to clear the queue full condition
+ */
+static inline void clear_full_and_wake(request_queue_t *q)
+{
+	q->full = 0;
+	mb();
+	if (waitqueue_active(&q->wait_for_requests))
+		wake_up(&q->wait_for_requests);
+}
+
+/*
  * Here's the request allocation design, low latency version:
  *
  * 1: Blocking on request exhaustion is a key part of I/O throttling.
@@ -612,24 +651,30 @@
 {
 	register struct request *rq;
 	DECLARE_WAITQUEUE(wait, current);
+	int oversized;
 
 	add_wait_queue_exclusive(&q->wait_for_requests, &wait);
 
 	do {
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		spin_lock_irq(&io_request_lock);
-		if (blk_oversized_queue(q)) {
-			__generic_unplug_device(q);
+		oversized = blk_oversized_queue(q);
+		if (q->full || oversized) {
+			if (oversized)
+				__generic_unplug_device(q);
 			spin_unlock_irq(&io_request_lock);
 			schedule();
 			spin_lock_irq(&io_request_lock);
 		}
-		rq = get_request(q, rw);
+		rq = __get_request(q, rw);
 		spin_unlock_irq(&io_request_lock);
 	} while (rq == NULL);
 	remove_wait_queue(&q->wait_for_requests, &wait);
 	current->state = TASK_RUNNING;
 
+	if (!waitqueue_active(&q->wait_for_requests))
+		clear_full_and_wake(q);
+
 	return rq;
 }
 
@@ -876,6 +921,8 @@
 			smp_mb();
 			if (waitqueue_active(&q->wait_for_requests))
 				wake_up(&q->wait_for_requests);
+			else
+				clear_full_and_wake(q);
 		}
 	}
 }
--- 1.24/include/linux/blkdev.h	Fri Jul  4 13:18:12 2003
+++ edited/include/linux/blkdev.h	Sun Jul  6 14:34:38 2003
@@ -136,12 +136,25 @@
 	int			head_active:1;
 
 	/*
+	 * Booleans that indicate whether the queue's free requests have
+	 * been exhausted and is waiting to drop below the batch_requests
+	 * threshold
+	 */
+	int			full:1;
+	
+	/*
 	 * Boolean that indicates you will use blk_started_sectors
 	 * and blk_finished_sectors in addition to blk_started_io
 	 * and blk_finished_io.  It enables the throttling code to 
 	 * help keep the sectors in flight to a reasonable value
 	 */
 	int			can_throttle:1;
+
+	/*
+	 * Boolean that indicates the queue should prefer low
+	 * latency over throughput.  This enables the q->full checks
+	 */
+	int			low_latency:1;
 
 	unsigned long		bounce_pfn;
 
--- 1.6/include/linux/elevator.h	Fri Jul  4 13:12:45 2003
+++ edited/include/linux/elevator.h	Sun Jul  6 14:34:31 2003
@@ -35,14 +35,19 @@
 	int queue_ID;
 	int read_latency;
 	int write_latency;
+/*
+ * (max_bomb_segments & MAX_BOMB_LATENCY_MASK) == 1 indicates low latency
+ * mode.  We're using any odd number to indicate low latency is on.
+ */
+#define MAX_BOMB_LATENCY_MASK 1
 	int max_bomb_segments;
 } blkelv_ioctl_arg_t;
 
 #define BLKELVGET   _IOR(0x12,106,sizeof(blkelv_ioctl_arg_t))
 #define BLKELVSET   _IOW(0x12,107,sizeof(blkelv_ioctl_arg_t))
 
-extern int blkelvget_ioctl(elevator_t *, blkelv_ioctl_arg_t *);
-extern int blkelvset_ioctl(elevator_t *, const blkelv_ioctl_arg_t *);
+extern int blkelvget_ioctl(request_queue_t *, blkelv_ioctl_arg_t *);
+extern int blkelvset_ioctl(request_queue_t *, const blkelv_ioctl_arg_t *);
 
 extern void elevator_init(elevator_t *, elevator_t);

next prev parent reply	other threads:[~2003-07-06 18:38 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-07-02 22:26 Status of the IO scheduler fixes for 2.4 Marcelo Tosatti
2003-07-02 22:28 ` Marcelo Tosatti
2003-07-03  2:02   ` Chris Mason
2003-07-03 10:58     ` Stephan von Krawczynski
2003-07-03 12:01       ` Chris Mason
2003-07-03 12:07       ` Marc-Christian Petersen
2003-07-03 12:28         ` Stephan von Krawczynski
2003-07-03 12:31     ` Marc-Christian Petersen
2003-07-03 13:11       ` Chris Mason
2003-07-04 20:01     ` Marcelo Tosatti
2003-07-04 21:37       ` Chris Mason
2003-07-05  0:05         ` Andrea Arcangeli
2003-07-05  0:47           ` Chris Mason
2003-07-05  1:04             ` Andrea Arcangeli
2003-07-06  7:58         ` Marc-Christian Petersen
2003-07-06 18:51           ` Chris Mason [this message]
2003-07-07  4:06             ` Nick Piggin
2003-07-05  0:00       ` Andrea Arcangeli
2003-07-05 15:59         ` Marcelo Tosatti
2003-07-05 16:36           ` Andrea Arcangeli
2003-07-03 18:07   ` Chris Mason

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1057517497.20904.1322.camel@tiny.suse.com \
    --to=mason@suse.com \
    --cc=andrea@suse.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=m.c.p@wolk-project.de \
    --cc=piggin@cyberone.com.au \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.