All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jens Axboe <axboe@suse.de>
To: Linux Kernel <linux-kernel@vger.kernel.org>
Cc: Andrew Morton <akpm@osdl.org>, kenneth.w.chen@intel.com
Subject: [PATCH] per-backing dev unplugging #2
Date: Thu, 11 Mar 2004 09:36:19 +0100	[thread overview]
Message-ID: <20040311083619.GH6955@suse.de> (raw)

Hi,

Final version, unless something stupid pops up. Changes:

- Adapt to 2.6.4-mm1
- Cleaned up the dm bits, much nicer with the lockless unplugging
  (thanks Joe)
- md and loop unplugging, stacked devices should unplug their targets.
  Otherwise they'll end up waiting for the unplug timer, which sucks.
- XFS fixed up, I hope. XFS folks still encouraged to look at this,
  looks better this time around though (and works, I tested).
- blk_run_* inlined in blkdev.h

Against 2.6.4-mm1 (note you need other attached patch to boot it).

diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/drivers/block/ll_rw_blk.c linux-2.6.4-mm1/drivers/block/ll_rw_blk.c
--- /opt/kernel/linux-2.6.4-mm1/drivers/block/ll_rw_blk.c	2004-03-11 09:10:00.000000000 +0100
+++ linux-2.6.4-mm1/drivers/block/ll_rw_blk.c	2004-03-11 09:07:12.000000000 +0100
@@ -42,12 +42,6 @@
  */
 static kmem_cache_t *request_cachep;
 
-/*
- * plug management
- */
-static LIST_HEAD(blk_plug_list);
-static spinlock_t blk_plug_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
-
 static wait_queue_head_t congestion_wqh[2];
 
 /*
@@ -248,8 +242,6 @@
 	 */
 	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
 
-	INIT_LIST_HEAD(&q->plug_list);
-
 	blk_queue_activity_fn(q, NULL, NULL);
 }
 
@@ -1101,13 +1093,11 @@
 	 * don't plug a stopped queue, it must be paired with blk_start_queue()
 	 * which will restart the queueing
 	 */
-	if (!blk_queue_plugged(q)
-	    && !test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags)) {
-		spin_lock(&blk_plug_lock);
-		list_add_tail(&q->plug_list, &blk_plug_list);
+	if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags))
+		return;
+
+	if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
 		mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
-		spin_unlock(&blk_plug_lock);
-	}
 }
 
 EXPORT_SYMBOL(blk_plug_device);
@@ -1119,15 +1109,12 @@
 int blk_remove_plug(request_queue_t *q)
 {
 	WARN_ON(!irqs_disabled());
-	if (blk_queue_plugged(q)) {
-		spin_lock(&blk_plug_lock);
-		list_del_init(&q->plug_list);
-		del_timer(&q->unplug_timer);
-		spin_unlock(&blk_plug_lock);
-		return 1;
-	}
 
-	return 0;
+	if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
+		return 0;
+
+	del_timer(&q->unplug_timer);
+	return 1;
 }
 
 EXPORT_SYMBOL(blk_remove_plug);
@@ -1158,14 +1145,11 @@
  *   Linux uses plugging to build bigger requests queues before letting
  *   the device have at them. If a queue is plugged, the I/O scheduler
  *   is still adding and merging requests on the queue. Once the queue
- *   gets unplugged (either by manually calling this function, or by
- *   calling blk_run_queues()), the request_fn defined for the
- *   queue is invoked and transfers started.
+ *   gets unplugged, the request_fn defined for the queue is invoked and
+ *   transfers started.
  **/
-void generic_unplug_device(void *data)
+void generic_unplug_device(request_queue_t *q)
 {
-	request_queue_t *q = data;
-
 	spin_lock_irq(q->queue_lock);
 	__generic_unplug_device(q);
 	spin_unlock_irq(q->queue_lock);
@@ -1173,9 +1157,23 @@
 
 EXPORT_SYMBOL(generic_unplug_device);
 
+static inline void blk_backing_dev_unplug(struct backing_dev_info *bdi)
+{
+	request_queue_t *q = bdi->unplug_io_data;
+
+	/*
+	 * devices don't necessarily have an ->unplug_fn defined
+	 */
+	if (q->unplug_fn)
+		q->unplug_fn(q);
+}
+
+EXPORT_SYMBOL(blk_backing_dev_unplug);
+
 static void blk_unplug_work(void *data)
 {
 	request_queue_t *q = data;
+
 	q->unplug_fn(q);
 }
 
@@ -1253,42 +1251,6 @@
 EXPORT_SYMBOL(blk_run_queue);
 
 /**
- * blk_run_queues - fire all plugged queues
- *
- * Description:
- *   Start I/O on all plugged queues known to the block layer. Queues that
- *   are currently stopped are ignored. This is equivalent to the older
- *   tq_disk task queue run.
- **/
-#define blk_plug_entry(entry) list_entry((entry), request_queue_t, plug_list)
-void blk_run_queues(void)
-{
-	LIST_HEAD(local_plug_list);
-
-	spin_lock_irq(&blk_plug_lock);
-
-	/*
-	 * this will happen fairly often
-	 */
-	if (list_empty(&blk_plug_list))
-		goto out;
-
-	list_splice_init(&blk_plug_list, &local_plug_list);
-	
-	while (!list_empty(&local_plug_list)) {
-		request_queue_t *q = blk_plug_entry(local_plug_list.next);
-
-		spin_unlock_irq(&blk_plug_lock);
-		q->unplug_fn(q);
-		spin_lock_irq(&blk_plug_lock);
-	}
-out:
-	spin_unlock_irq(&blk_plug_lock);
-}
-
-EXPORT_SYMBOL(blk_run_queues);
-
-/**
  * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed
  * @q:    the request queue to be released
  *
@@ -1393,6 +1355,10 @@
 	memset(q, 0, sizeof(*q));
 	init_timer(&q->unplug_timer);
 	atomic_set(&q->refcnt, 1);
+
+	q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
+	q->backing_dev_info.unplug_io_data = q;
+
 	return q;
 }
 
@@ -2046,7 +2012,6 @@
 	DEFINE_WAIT(wait);
 	wait_queue_head_t *wqh = &congestion_wqh[rw];
 
-	blk_run_queues();
 	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
 	ret = io_schedule_timeout(timeout);
 	finish_wait(wqh, &wait);
@@ -2302,7 +2267,7 @@
 	if (blk_queue_plugged(q)) {
 		int nr_queued = q->rq.count[READ] + q->rq.count[WRITE];
 
-		if (nr_queued == q->unplug_thresh)
+		if (nr_queued == q->unplug_thresh || bio_sync(bio))
 			__generic_unplug_device(q);
 	}
 	spin_unlock_irq(q->queue_lock);
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/drivers/block/loop.c linux-2.6.4-mm1/drivers/block/loop.c
--- /opt/kernel/linux-2.6.4-mm1/drivers/block/loop.c	2004-03-11 09:10:00.000000000 +0100
+++ linux-2.6.4-mm1/drivers/block/loop.c	2004-03-11 09:07:12.000000000 +0100
@@ -434,6 +434,17 @@
 	goto out;
 }
 
+/*
+ * kick off io on the underlying address space
+ */
+static void loop_unplug(request_queue_t *q)
+{
+	struct loop_device *lo = q->queuedata;
+
+	clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags);
+	blk_run_address_space(lo->lo_backing_file->f_mapping);
+}
+
 struct switch_request {
 	struct file *file;
 	struct completion wait;
@@ -614,7 +625,6 @@
 {
 	struct file	*file;
 	struct inode	*inode;
-	struct block_device *lo_device = NULL;
 	struct address_space *mapping;
 	unsigned lo_blocksize;
 	int		lo_flags = 0;
@@ -671,7 +681,7 @@
 	set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
 
 	lo->lo_blocksize = lo_blocksize;
-	lo->lo_device = lo_device;
+	lo->lo_device = bdev;
 	lo->lo_flags = lo_flags;
 	lo->lo_backing_file = file;
 	lo->transfer = NULL;
@@ -689,6 +699,7 @@
 	 */
 	blk_queue_make_request(lo->lo_queue, loop_make_request);
 	lo->lo_queue->queuedata = lo;
+	lo->lo_queue->unplug_fn = loop_unplug;
 
 	set_capacity(disks[lo->lo_number], size);
 
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/drivers/md/dm.c linux-2.6.4-mm1/drivers/md/dm.c
--- /opt/kernel/linux-2.6.4-mm1/drivers/md/dm.c	2004-03-11 09:10:01.000000000 +0100
+++ linux-2.6.4-mm1/drivers/md/dm.c	2004-03-11 09:07:12.000000000 +0100
@@ -575,6 +575,14 @@
 	return 0;
 }
 
+static void dm_unplug_all(request_queue_t *q)
+{
+	struct mapped_device *md = q->queuedata;
+
+	clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags);
+	dm_table_unplug_all(md->map);
+}
+
 static int dm_any_congested(void *congested_data, int bdi_bits)
 {
 	int r;
@@ -672,6 +680,7 @@
 	md->queue->backing_dev_info.congested_fn = dm_any_congested;
 	md->queue->backing_dev_info.congested_data = md;
 	blk_queue_make_request(md->queue, dm_request);
+	md->queue->unplug_fn = dm_unplug_all;
 
 	md->io_pool = mempool_create(MIN_IOS, mempool_alloc_slab,
 				     mempool_free_slab, _io_cache);
@@ -900,7 +909,7 @@
 	 * Then we wait for the already mapped ios to
 	 * complete.
 	 */
-	blk_run_queues();
+	dm_table_unplug_all(md->map);
 	while (1) {
 		set_current_state(TASK_INTERRUPTIBLE);
 
@@ -947,7 +956,7 @@
 	up_write(&md->lock);
 	dm_table_put(map);
 
-	blk_run_queues();
+	dm_table_unplug_all(md->map);
 
 	return 0;
 }
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/drivers/md/dm-crypt.c linux-2.6.4-mm1/drivers/md/dm-crypt.c
--- /opt/kernel/linux-2.6.4-mm1/drivers/md/dm-crypt.c	2004-03-11 09:10:01.000000000 +0100
+++ linux-2.6.4-mm1/drivers/md/dm-crypt.c	2004-03-11 09:07:12.000000000 +0100
@@ -668,7 +668,7 @@
 
 		/* out of memory -> run queues */
 		if (remaining)
-			blk_run_queues();
+			blk_congestion_wait(bio_data_dir(clone), HZ/100);
 	}
 
 	/* drop reference, clones could have returned before we reach this */
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/drivers/md/dm.h linux-2.6.4-mm1/drivers/md/dm.h
--- /opt/kernel/linux-2.6.4-mm1/drivers/md/dm.h	2004-03-11 09:10:01.000000000 +0100
+++ linux-2.6.4-mm1/drivers/md/dm.h	2004-03-11 09:07:12.000000000 +0100
@@ -116,6 +116,7 @@
 void dm_table_suspend_targets(struct dm_table *t);
 void dm_table_resume_targets(struct dm_table *t);
 int dm_table_any_congested(struct dm_table *t, int bdi_bits);
+void dm_table_unplug_all(struct dm_table *t);
 
 /*-----------------------------------------------------------------
  * A registry of target types.
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/drivers/md/dm-table.c linux-2.6.4-mm1/drivers/md/dm-table.c
--- /opt/kernel/linux-2.6.4-mm1/drivers/md/dm-table.c	2004-03-11 09:10:01.000000000 +0100
+++ linux-2.6.4-mm1/drivers/md/dm-table.c	2004-03-11 09:07:12.000000000 +0100
@@ -885,6 +885,21 @@
 	return r;
 }
 
+void dm_table_unplug_all(struct dm_table *t)
+{
+	struct list_head *d, *devices = dm_table_get_devices(t);
+
+	for (d = devices->next; d != devices; d = d->next) {
+		struct dm_dev *dd = list_entry(d, struct dm_dev, list);
+		request_queue_t *q = bdev_get_queue(dd->bdev);
+
+		if (q->unplug_fn) {
+			set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags);
+			q->unplug_fn(q);
+		}
+	}
+}
+
 EXPORT_SYMBOL(dm_vcalloc);
 EXPORT_SYMBOL(dm_get_device);
 EXPORT_SYMBOL(dm_put_device);
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/drivers/md/md.c linux-2.6.4-mm1/drivers/md/md.c
--- /opt/kernel/linux-2.6.4-mm1/drivers/md/md.c	2004-03-11 09:10:01.000000000 +0100
+++ linux-2.6.4-mm1/drivers/md/md.c	2004-03-11 09:07:12.000000000 +0100
@@ -160,6 +160,28 @@
 	return 0;
 }
 
+static void md_unplug_all(request_queue_t *q)
+{
+	mddev_t *mddev = q->queuedata;
+	struct list_head *tmp;
+	mdk_rdev_t *rdev;
+
+	clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags);
+
+	/*
+	 * this list iteration is done without any locking in md?!
+	 */
+	ITERATE_RDEV(mddev, rdev, tmp) {
+		request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
+
+		if (r_queue->unplug_fn) {
+			set_bit(QUEUE_FLAG_PLUGGED, &r_queue->queue_flags);
+			r_queue->unplug_fn(r_queue);
+		}
+	}
+
+}
+
 static inline mddev_t *mddev_get(mddev_t *mddev)
 {
 	atomic_inc(&mddev->active);
@@ -335,6 +357,8 @@
 	struct bio_vec vec;
 	struct completion event;
 
+	rw |= (1 << BIO_RW_SYNC);
+
 	bio_init(&bio);
 	bio.bi_io_vec = &vec;
 	vec.bv_page = page;
@@ -349,7 +373,6 @@
 	bio.bi_private = &event;
 	bio.bi_end_io = bi_complete;
 	submit_bio(rw, &bio);
-	blk_run_queues();
 	wait_for_completion(&event);
 
 	return test_bit(BIO_UPTODATE, &bio.bi_flags);
@@ -1644,6 +1667,7 @@
 	 */
 	mddev->queue->queuedata = mddev;
 	mddev->queue->make_request_fn = mddev->pers->make_request;
+	mddev->queue->unplug_fn = md_unplug_all;
 
 	mddev->changed = 1;
 	return 0;
@@ -2718,7 +2742,7 @@
 		run = thread->run;
 		if (run) {
 			run(thread->mddev);
-			blk_run_queues();
+			blk_run_queue(thread->mddev->queue);
 		}
 		if (signal_pending(current))
 			flush_signals(current);
@@ -3286,7 +3310,7 @@
 		    test_bit(MD_RECOVERY_ERR, &mddev->recovery))
 			break;
 
-		blk_run_queues();
+		blk_run_queue(mddev->queue);
 
 	repeat:
 		if (jiffies >= mark[last_mark] + SYNC_MARK_STEP ) {
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/drivers/md/raid1.c linux-2.6.4-mm1/drivers/md/raid1.c
--- /opt/kernel/linux-2.6.4-mm1/drivers/md/raid1.c	2004-03-11 03:55:28.000000000 +0100
+++ linux-2.6.4-mm1/drivers/md/raid1.c	2004-03-11 09:07:12.000000000 +0100
@@ -451,6 +451,7 @@
 
 static void device_barrier(conf_t *conf, sector_t sect)
 {
+	blk_run_queue(conf->mddev->queue);
 	spin_lock_irq(&conf->resync_lock);
 	wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume), conf->resync_lock);
 	
@@ -478,6 +479,7 @@
 	 * thread has put up a bar for new requests.
 	 * Continue immediately if no resync is active currently.
 	 */
+	blk_run_queue(conf->mddev->queue);
 	spin_lock_irq(&conf->resync_lock);
 	wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock);
 	conf->nr_pending++;
@@ -644,6 +646,7 @@
 
 static void close_sync(conf_t *conf)
 {
+	blk_run_queue(conf->mddev->queue);
 	spin_lock_irq(&conf->resync_lock);
 	wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock);
 	spin_unlock_irq(&conf->resync_lock);
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/drivers/md/raid5.c linux-2.6.4-mm1/drivers/md/raid5.c
--- /opt/kernel/linux-2.6.4-mm1/drivers/md/raid5.c	2004-03-11 09:10:01.000000000 +0100
+++ linux-2.6.4-mm1/drivers/md/raid5.c	2004-03-11 09:07:12.000000000 +0100
@@ -249,6 +249,7 @@
 				break;
 			if (!sh) {
 				conf->inactive_blocked = 1;
+				blk_run_queue(conf->mddev->queue);
 				wait_event_lock_irq(conf->wait_for_stripe,
 						    !list_empty(&conf->inactive_list) &&
 						    (atomic_read(&conf->active_stripes) < (NR_STRIPES *3/4)
@@ -1292,9 +1293,8 @@
 		}
 	}
 }
-static void raid5_unplug_device(void *data)
+static void raid5_unplug_device(request_queue_t *q)
 {
-	request_queue_t *q = data;
 	mddev_t *mddev = q->queuedata;
 	raid5_conf_t *conf = mddev_to_conf(mddev);
 	unsigned long flags;
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/drivers/md/raid6main.c linux-2.6.4-mm1/drivers/md/raid6main.c
--- /opt/kernel/linux-2.6.4-mm1/drivers/md/raid6main.c	2004-03-11 09:10:01.000000000 +0100
+++ linux-2.6.4-mm1/drivers/md/raid6main.c	2004-03-11 09:07:12.000000000 +0100
@@ -1454,9 +1454,8 @@
 		}
 	}
 }
-static void raid6_unplug_device(void *data)
+static void raid6_unplug_device(request_queue_t *q)
 {
-	request_queue_t *q = data;
 	mddev_t *mddev = q->queuedata;
 	raid6_conf_t *conf = mddev_to_conf(mddev);
 	unsigned long flags;
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/drivers/mtd/devices/blkmtd.c linux-2.6.4-mm1/drivers/mtd/devices/blkmtd.c
--- /opt/kernel/linux-2.6.4-mm1/drivers/mtd/devices/blkmtd.c	2004-03-11 03:55:21.000000000 +0100
+++ linux-2.6.4-mm1/drivers/mtd/devices/blkmtd.c	2004-03-11 09:07:12.000000000 +0100
@@ -147,8 +147,7 @@
 		bio->bi_private = &event;
 		bio->bi_end_io = bi_read_complete;
 		if(bio_add_page(bio, page, PAGE_SIZE, 0) == PAGE_SIZE) {
-			submit_bio(READ, bio);
-			blk_run_queues();
+			submit_bio(READ_SYNC, bio);
 			wait_for_completion(&event);
 			err = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : -EIO;
 			bio_put(bio);
@@ -179,8 +178,7 @@
 	init_completion(&event);
 	bio->bi_private = &event;
 	bio->bi_end_io = bi_write_complete;
-	submit_bio(WRITE, bio);
-	blk_run_queues();
+	submit_bio(WRITE_SYNC, bio);
 	wait_for_completion(&event);
 	DEBUG(3, "submit_bio completed, bi_vcnt = %d\n", bio->bi_vcnt);
 	err = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : -EIO;
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/fs/buffer.c linux-2.6.4-mm1/fs/buffer.c
--- /opt/kernel/linux-2.6.4-mm1/fs/buffer.c	2004-03-11 09:10:01.948448091 +0100
+++ linux-2.6.4-mm1/fs/buffer.c	2004-03-11 09:07:12.000000000 +0100
@@ -132,7 +132,7 @@
 	do {
 		prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
 		if (buffer_locked(bh)) {
-			blk_run_queues();
+			blk_run_address_space(bh->b_bdev->bd_inode->i_mapping);
 			io_schedule();
 		}
 	} while (buffer_locked(bh));
@@ -491,7 +491,6 @@
 	pg_data_t *pgdat;
 
 	wakeup_bdflush(1024);
-	blk_run_queues();
 	yield();
 
 	for_each_pgdat(pgdat) {
@@ -2924,7 +2923,7 @@
 
 int block_sync_page(struct page *page)
 {
-	blk_run_queues();
+	blk_run_address_space(page->mapping);
 	return 0;
 }
 
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/fs/direct-io.c linux-2.6.4-mm1/fs/direct-io.c
--- /opt/kernel/linux-2.6.4-mm1/fs/direct-io.c	2004-03-11 09:10:01.987443867 +0100
+++ linux-2.6.4-mm1/fs/direct-io.c	2004-03-11 09:07:12.000000000 +0100
@@ -364,7 +364,7 @@
 		if (dio->bio_list == NULL) {
 			dio->waiter = current;
 			spin_unlock_irqrestore(&dio->bio_lock, flags);
-			blk_run_queues();
+			blk_run_address_space(dio->inode->i_mapping);
 			io_schedule();
 			spin_lock_irqsave(&dio->bio_lock, flags);
 			dio->waiter = NULL;
@@ -1035,7 +1035,7 @@
 		if (ret == 0)
 			ret = dio->result;
 		finished_one_bio(dio);		/* This can free the dio */
-		blk_run_queues();
+		blk_run_address_space(inode->i_mapping);
 		if (should_wait) {
 			unsigned long flags;
 			/*
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/fs/jfs/jfs_logmgr.c linux-2.6.4-mm1/fs/jfs/jfs_logmgr.c
--- /opt/kernel/linux-2.6.4-mm1/fs/jfs/jfs_logmgr.c	2004-03-11 03:55:27.000000000 +0100
+++ linux-2.6.4-mm1/fs/jfs/jfs_logmgr.c	2004-03-11 09:07:12.000000000 +0100
@@ -1972,8 +1972,7 @@
 
 	bio->bi_end_io = lbmIODone;
 	bio->bi_private = bp;
-	submit_bio(READ, bio);
-	blk_run_queues();
+	submit_bio(READ_SYNC, bio);
 
 	wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
 
@@ -2117,9 +2116,8 @@
 
 	/* check if journaling to disk has been disabled */
 	if (!log->no_integrity) {
-		submit_bio(WRITE, bio);
+		submit_bio(WRITE_SYNC, bio);
 		INCREMENT(lmStat.submitted);
-		blk_run_queues();
 	}
 	else {
 		bio->bi_size = 0;
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/fs/ntfs/compress.c linux-2.6.4-mm1/fs/ntfs/compress.c
--- /opt/kernel/linux-2.6.4-mm1/fs/ntfs/compress.c	2004-03-11 03:55:27.000000000 +0100
+++ linux-2.6.4-mm1/fs/ntfs/compress.c	2004-03-11 09:07:12.000000000 +0100
@@ -668,7 +668,7 @@
 					"uptodate! Unplugging the disk queue "
 					"and rescheduling.");
 			get_bh(tbh);
-			blk_run_queues();
+			blk_run_address_space(mapping);
 			schedule();
 			put_bh(tbh);
 			if (unlikely(!buffer_uptodate(tbh)))
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/fs/ufs/truncate.c linux-2.6.4-mm1/fs/ufs/truncate.c
--- /opt/kernel/linux-2.6.4-mm1/fs/ufs/truncate.c	2004-03-11 03:55:27.000000000 +0100
+++ linux-2.6.4-mm1/fs/ufs/truncate.c	2004-03-11 09:07:12.000000000 +0100
@@ -456,7 +456,7 @@
 			break;
 		if (IS_SYNC(inode) && (inode->i_state & I_DIRTY))
 			ufs_sync_inode (inode);
-		blk_run_queues();
+		blk_run_address_space(inode->i_mapping);
 		yield();
 	}
 	offset = inode->i_size & uspi->s_fshift;
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/fs/xfs/linux/xfs_buf.c linux-2.6.4-mm1/fs/xfs/linux/xfs_buf.c
--- /opt/kernel/linux-2.6.4-mm1/fs/xfs/linux/xfs_buf.c	2004-03-11 03:55:21.000000000 +0100
+++ linux-2.6.4-mm1/fs/xfs/linux/xfs_buf.c	2004-03-11 09:07:12.706793571 +0100
@@ -1013,7 +1013,7 @@
 {
 	PB_TRACE(pb, "lock", 0);
 	if (atomic_read(&pb->pb_io_remaining))
-		blk_run_queues();
+		blk_run_address_space(pb->pb_target->pbr_mapping);
 	down(&pb->pb_sema);
 	PB_SET_OWNER(pb);
 	PB_TRACE(pb, "locked", 0);
@@ -1109,7 +1109,7 @@
 		if (atomic_read(&pb->pb_pin_count) == 0)
 			break;
 		if (atomic_read(&pb->pb_io_remaining))
-			blk_run_queues();
+			blk_run_address_space(pb->pb_target->pbr_mapping);
 		schedule();
 	}
 	remove_wait_queue(&pb->pb_waiters, &wait);
@@ -1407,7 +1407,7 @@
 	if (pb->pb_flags & PBF_RUN_QUEUES) {
 		pb->pb_flags &= ~PBF_RUN_QUEUES;
 		if (atomic_read(&pb->pb_io_remaining) > 1)
-			blk_run_queues();
+			blk_run_address_space(pb->pb_target->pbr_mapping);
 	}
 }
 
@@ -1471,7 +1471,7 @@
 {
 	PB_TRACE(pb, "iowait", 0);
 	if (atomic_read(&pb->pb_io_remaining))
-		blk_run_queues();
+		blk_run_address_space(pb->pb_target->pbr_mapping);
 	down(&pb->pb_iodonesema);
 	PB_TRACE(pb, "iowaited", (long)pb->pb_error);
 	return pb->pb_error;
@@ -1617,7 +1617,6 @@
 pagebuf_daemon(
 	void			*data)
 {
-	int			count;
 	page_buf_t		*pb;
 	struct list_head	*curr, *next, tmp;
 
@@ -1640,7 +1639,6 @@
 
 		spin_lock(&pbd_delwrite_lock);
 
-		count = 0;
 		list_for_each_safe(curr, next, &pbd_delwrite_queue) {
 			pb = list_entry(curr, page_buf_t, pb_list);
 
@@ -1657,7 +1655,6 @@
 				pb->pb_flags &= ~PBF_DELWRI;
 				pb->pb_flags |= PBF_WRITE;
 				list_move(&pb->pb_list, &tmp);
-				count++;
 			}
 		}
 
@@ -1667,12 +1664,11 @@
 			list_del_init(&pb->pb_list);
 
 			pagebuf_iostrategy(pb);
+			blk_run_address_space(pb->pb_target->pbr_mapping);
 		}
 
 		if (as_list_len > 0)
 			purge_addresses();
-		if (count)
-			blk_run_queues();
 
 		force_flush = 0;
 	} while (pagebuf_daemon_active);
@@ -1689,7 +1685,6 @@
 	page_buf_t		*pb;
 	struct list_head	*curr, *next, tmp;
 	int			pincount = 0;
-	int			flush_cnt = 0;
 
 	pagebuf_runall_queues(pagebuf_dataio_workqueue);
 	pagebuf_runall_queues(pagebuf_logio_workqueue);
@@ -1733,14 +1728,8 @@
 
 		pagebuf_lock(pb);
 		pagebuf_iostrategy(pb);
-		if (++flush_cnt > 32) {
-			blk_run_queues();
-			flush_cnt = 0;
-		}
 	}
 
-	blk_run_queues();
-
 	while (!list_empty(&tmp)) {
 		pb = list_entry(tmp.next, page_buf_t, pb_list);
 
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/include/linux/backing-dev.h linux-2.6.4-mm1/include/linux/backing-dev.h
--- /opt/kernel/linux-2.6.4-mm1/include/linux/backing-dev.h	2004-03-11 09:10:02.000000000 +0100
+++ linux-2.6.4-mm1/include/linux/backing-dev.h	2004-03-11 09:07:12.706793571 +0100
@@ -28,6 +28,8 @@
 	int memory_backed;	/* Cannot clean pages with writepage */
 	congested_fn *congested_fn; /* Function pointer if device is md/dm */
 	void *congested_data;	/* Pointer to aux data for congested func */
+	void (*unplug_io_fn)(struct backing_dev_info *);
+	void *unplug_io_data;
 };
 
 extern struct backing_dev_info default_backing_dev_info;
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/include/linux/bio.h linux-2.6.4-mm1/include/linux/bio.h
--- /opt/kernel/linux-2.6.4-mm1/include/linux/bio.h	2004-03-11 03:55:37.000000000 +0100
+++ linux-2.6.4-mm1/include/linux/bio.h	2004-03-11 09:07:12.707793462 +0100
@@ -124,6 +124,7 @@
 #define BIO_RW_AHEAD	1
 #define BIO_RW_BARRIER	2
 #define BIO_RW_FAILFAST	3
+#define BIO_RW_SYNC	4
 
 /*
  * various member access, note that bio_data should of course not be used
@@ -138,6 +139,7 @@
 #define bio_cur_sectors(bio)	(bio_iovec(bio)->bv_len >> 9)
 #define bio_data(bio)		(page_address(bio_page((bio))) + bio_offset((bio)))
 #define bio_barrier(bio)	((bio)->bi_rw & (1 << BIO_RW_BARRIER))
+#define bio_sync(bio)		((bio)->bi_rw & (1 << BIO_RW_SYNC))
 
 /*
  * will die
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/include/linux/blkdev.h linux-2.6.4-mm1/include/linux/blkdev.h
--- /opt/kernel/linux-2.6.4-mm1/include/linux/blkdev.h	2004-03-11 09:10:02.000000000 +0100
+++ linux-2.6.4-mm1/include/linux/blkdev.h	2004-03-11 09:07:12.708793354 +0100
@@ -243,7 +243,7 @@
 typedef void (request_fn_proc) (request_queue_t *q);
 typedef int (make_request_fn) (request_queue_t *q, struct bio *bio);
 typedef int (prep_rq_fn) (request_queue_t *, struct request *);
-typedef void (unplug_fn) (void *q);
+typedef void (unplug_fn) (request_queue_t *);
 
 struct bio_vec;
 typedef int (merge_bvec_fn) (request_queue_t *, struct bio *, struct bio_vec *);
@@ -315,8 +315,6 @@
 	unsigned long		bounce_pfn;
 	int			bounce_gfp;
 
-	struct list_head	plug_list;
-
 	/*
 	 * various queue flags, see QUEUE_* below
 	 */
@@ -370,8 +368,9 @@
 #define QUEUE_FLAG_WRITEFULL	4	/* read queue has been filled */
 #define QUEUE_FLAG_DEAD		5	/* queue being torn down */
 #define QUEUE_FLAG_REENTER	6	/* Re-entrancy avoidance */
+#define QUEUE_FLAG_PLUGGED	7	/* queue is plugged */
 
-#define blk_queue_plugged(q)	!list_empty(&(q)->plug_list)
+#define blk_queue_plugged(q)	test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
 #define blk_queue_tagged(q)	test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 
@@ -515,7 +514,7 @@
 extern void blk_start_queue(request_queue_t *q);
 extern void blk_stop_queue(request_queue_t *q);
 extern void __blk_stop_queue(request_queue_t *q);
-extern void blk_run_queue(request_queue_t *q);
+extern void blk_run_queue(request_queue_t *);
 extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *);
 extern struct request *blk_rq_map_user(request_queue_t *, int, void __user *, unsigned int);
 extern int blk_rq_unmap_user(struct request *, void __user *, unsigned int, int);
@@ -526,6 +525,18 @@
 	return bdev->bd_disk->queue;
 }
 
+static inline void blk_run_backing_dev(struct backing_dev_info *bdi)
+{
+	if (bdi)
+		bdi->unplug_io_fn(bdi);
+}
+
+static inline void blk_run_address_space(struct address_space *mapping)
+{
+	if (mapping)
+		blk_run_backing_dev(mapping->backing_dev_info);
+}
+
 /*
  * end_request() and friends. Must be called with the request queue spinlock
  * acquired. All functions called within end_request() _must_be_ atomic.
@@ -572,7 +583,7 @@
 
 extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *);
 extern void blk_dump_rq_flags(struct request *, char *);
-extern void generic_unplug_device(void *);
+extern void generic_unplug_device(request_queue_t *);
 extern long nr_blockdev_pages(void);
 
 int blk_get_queue(request_queue_t *);
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/include/linux/fs.h linux-2.6.4-mm1/include/linux/fs.h
--- /opt/kernel/linux-2.6.4-mm1/include/linux/fs.h	2004-03-11 09:10:02.309408999 +0100
+++ linux-2.6.4-mm1/include/linux/fs.h	2004-03-11 09:07:12.000000000 +0100
@@ -82,6 +82,8 @@
 #define WRITE 1
 #define READA 2		/* read-ahead  - don't block if no resources */
 #define SPECIAL 4	/* For non-blockdevice requests in request queue */
+#define READ_SYNC	(READ | BIO_RW_SYNC)
+#define WRITE_SYNC	(WRITE | BIO_RW_SYNC)
 
 #define SEL_IN		1
 #define SEL_OUT		2
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/include/linux/raid/md_k.h linux-2.6.4-mm1/include/linux/raid/md_k.h
--- /opt/kernel/linux-2.6.4-mm1/include/linux/raid/md_k.h	2004-03-11 03:55:21.000000000 +0100
+++ linux-2.6.4-mm1/include/linux/raid/md_k.h	2004-03-11 09:07:12.000000000 +0100
@@ -326,7 +326,6 @@
 		if (condition)						\
 			break;						\
 		spin_unlock_irq(&lock);					\
-		blk_run_queues();					\
 		schedule();						\
 		spin_lock_irq(&lock);					\
 	}								\
@@ -341,30 +340,5 @@
 	__wait_event_lock_irq(wq, condition, lock);			\
 } while (0)
 
-
-#define __wait_disk_event(wq, condition) 				\
-do {									\
-	wait_queue_t __wait;						\
-	init_waitqueue_entry(&__wait, current);				\
-									\
-	add_wait_queue(&wq, &__wait);					\
-	for (;;) {							\
-		set_current_state(TASK_UNINTERRUPTIBLE);		\
-		if (condition)						\
-			break;						\
-		blk_run_queues();					\
-		schedule();						\
-	}								\
-	current->state = TASK_RUNNING;					\
-	remove_wait_queue(&wq, &__wait);				\
-} while (0)
-
-#define wait_disk_event(wq, condition) 					\
-do {									\
-	if (condition)	 						\
-		break;							\
-	__wait_disk_event(wq, condition);				\
-} while (0)
-
 #endif
 
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/kernel/power/disk.c linux-2.6.4-mm1/kernel/power/disk.c
--- /opt/kernel/linux-2.6.4-mm1/kernel/power/disk.c	2004-03-11 03:55:21.000000000 +0100
+++ linux-2.6.4-mm1/kernel/power/disk.c	2004-03-11 09:07:12.000000000 +0100
@@ -84,7 +84,6 @@
 	while (shrink_all_memory(10000))
 		printk(".");
 	printk("|\n");
-	blk_run_queues();
 }
 
 
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/kernel/power/pmdisk.c linux-2.6.4-mm1/kernel/power/pmdisk.c
--- /opt/kernel/linux-2.6.4-mm1/kernel/power/pmdisk.c	2004-03-11 03:55:28.000000000 +0100
+++ linux-2.6.4-mm1/kernel/power/pmdisk.c	2004-03-11 09:07:12.000000000 +0100
@@ -859,7 +859,6 @@
 
 static void wait_io(void)
 {
-	blk_run_queues();
 	while(atomic_read(&io_done))
 		io_schedule();
 }
@@ -895,6 +894,7 @@
 		goto Done;
 	}
 
+	rw |= BIO_RW_SYNC;
 	if (rw == WRITE)
 		bio_set_pages_dirty(bio);
 	start_io();
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/kernel/power/swsusp.c linux-2.6.4-mm1/kernel/power/swsusp.c
--- /opt/kernel/linux-2.6.4-mm1/kernel/power/swsusp.c	2004-03-11 03:55:24.000000000 +0100
+++ linux-2.6.4-mm1/kernel/power/swsusp.c	2004-03-11 09:07:12.000000000 +0100
@@ -707,11 +707,6 @@
 
 		free_some_memory();
 		
-		/* No need to invalidate any vfsmnt list -- 
-		 * they will be valid after resume, anyway.
-		 */
-		blk_run_queues();
-
 		/* Save state of all device drivers, and stop them. */		   
 		if ((res = device_suspend(4))==0)
 			/* If stopping device drivers worked, we proceed basically into
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/mm/mempool.c linux-2.6.4-mm1/mm/mempool.c
--- /opt/kernel/linux-2.6.4-mm1/mm/mempool.c	2004-03-11 03:55:34.000000000 +0100
+++ linux-2.6.4-mm1/mm/mempool.c	2004-03-11 09:07:12.000000000 +0100
@@ -233,8 +233,6 @@
 	if (!(gfp_mask & __GFP_WAIT))
 		return NULL;
 
-	blk_run_queues();
-
 	prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE);
 	if (!pool->curr_nr)
 		io_schedule();
diff -ur -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.4-mm1/mm/readahead.c linux-2.6.4-mm1/mm/readahead.c
--- /opt/kernel/linux-2.6.4-mm1/mm/readahead.c	2004-03-11 09:10:02.452393513 +0100
+++ linux-2.6.4-mm1/mm/readahead.c	2004-03-11 09:07:12.000000000 +0100
@@ -15,9 +15,14 @@
 #include <linux/backing-dev.h>
 #include <linux/pagevec.h>
 
+static void default_unplug_io_fn(struct backing_dev_info *bdi)
+{
+}
+
 struct backing_dev_info default_backing_dev_info = {
 	.ra_pages	= (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE,
 	.state		= 0,
+	.unplug_io_fn	= default_unplug_io_fn,
 };
 
 EXPORT_SYMBOL_GPL(default_backing_dev_info);

-- 
Jens Axboe


             reply	other threads:[~2004-03-11  8:37 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-03-11  8:36 Jens Axboe [this message]
2004-03-11 22:44 ` [PATCH] per-backing dev unplugging #2 Nathan Scott
2004-03-12  1:23 ` William Lee Irwin III
2004-03-12  6:41 ` Ingo Oeser
2004-03-12  8:04   ` Jens Axboe
2004-03-12 19:51 ` Chris Mason
2004-03-12 20:03   ` Andrew Morton
2004-03-12 20:02     ` Jens Axboe
2004-03-12 20:34       ` Chris Mason
2004-03-12 20:34         ` Jens Axboe
2004-03-12 20:41           ` Chris Mason
2004-03-12 20:51             ` Jens Axboe
2004-03-14 20:43               ` Chris Mason
2004-03-14 20:47                 ` Jens Axboe
2004-03-14 20:57                   ` Chris Mason
2004-03-14 21:04                   ` Andrew Morton
2004-03-14 21:09                     ` Jens Axboe
2004-03-15 21:58                       ` Chris Mason
2004-03-12 20:06     ` Chris Mason
  -- strict thread matches above, loose matches on Subject: below --
2004-03-16  5:22 Jeremy Higdon
2004-03-16  6:36 ` Nick Piggin
2004-03-16  7:20   ` Jeremy Higdon
2004-03-16  7:38     ` Jens Axboe
2004-03-17  0:23       ` Jeremy Higdon
2004-03-16  7:46     ` Nick Piggin
2004-03-16  7:29 ` Jens Axboe
2004-03-16 17:20 ` Jesse Barnes

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20040311083619.GH6955@suse.de \
    --to=axboe@suse.de \
    --cc=akpm@osdl.org \
    --cc=kenneth.w.chen@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.