Re: [patch] CFQ for xen domains

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Gerd Knorr <kraxel@suse.de>
To: Gerd Knorr <kraxel@suse.de>
Cc: Ian Pratt <m+Ian.Pratt@cl.cam.ac.uk>,
	xen-devel <xen-devel@lists.xensource.com>,
	Chris Mason <mason@suse.com>, Kurt Garloff <garloff@suse.de>,
	Jens Axboe <axboe@suse.de>
Subject: Re: [patch] CFQ for xen domains
Date: Tue, 15 Nov 2005 18:51:15 +0100	[thread overview]
Message-ID: <437A2013.9020601@suse.de> (raw)
In-Reply-To: <4371F9A5.4000205@suse.de>

[-- Attachment #1: Type: text/plain, Size: 1063 bytes --]

> I've resynced the blkback threading patch with the latest sparse tree, 
> here we are.  Changes:
> 
>   * One thread per blkif.  The I/O scheduler can do a better job that
>     way, also you can use ionice on the blkback threads to adjust the
>     block I/O priorities for the domain.
>   * Various stuff has been moved from global variables into blkif_t.
>   * The scary allocation ring for pending_req's is gone and has been
>     replaced by a free list.
>   * made dispatch_rw_block_io() reentrant.
>   * general linux coding style cleanup, at least for the code I've
>     touched anyway.
>   * number of outstanding requests is runtime-configurable now.
>   * made the ia64 #ifdefs smaller and dropped one.  It should still
>     work on ia64 in theory, but would be great if the ia64 folks
>     can have a look ...

Next version of that patch, with those additional changes:

   * re-added the xen_init() which got lost by mistake (pointed
     out by the ia64 guys).
   * adapted to the driver architecture changes merged recently.

cheers,

   Gerd

[-- Attachment #2: blkback-7793-11.diff --]
[-- Type: text/x-patch, Size: 23348 bytes --]

diff -r 090e44133d40 linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c	Mon Nov 14 17:13:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c	Tue Nov 15 18:17:26 2005
@@ -12,6 +12,8 @@
  */
 
 #include <linux/spinlock.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
 #include <asm-xen/balloon.h>
 #include <asm/hypervisor.h>
 #include "common.h"
@@ -21,26 +23,21 @@
  * pulled from a communication ring are quite likely to end up being part of
  * the same scatter/gather request at the disc.
  * 
- * ** TRY INCREASING 'MAX_PENDING_REQS' IF WRITE SPEEDS SEEM TOO LOW **
+ * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
+ * 
  * This will increase the chances of being able to write whole tracks.
  * 64 should be enough to keep us competitive with Linux.
  */
-#define MAX_PENDING_REQS 64
-#define BATCH_PER_DOMAIN 16
-
-static unsigned long mmap_vstart;
-#define MMAP_PAGES						\
-	(MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
-#ifdef __ia64__
-static void *pending_vaddrs[MMAP_PAGES];
-#define MMAP_VADDR(_idx, _i) \
-	(unsigned long)(pending_vaddrs[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)])
-#else
-#define MMAP_VADDR(_req,_seg)						\
-	(mmap_vstart +							\
-	 ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) +	\
-	 ((_seg) * PAGE_SIZE))
-#endif
+static int blkif_reqs = 64;
+static int mmap_pages;
+
+static int __init set_blkif_reqs(char *str)
+{
+	get_option(&str, &blkif_reqs);
+	return 1;
+}
+__setup("blkif_reqs=", set_blkif_reqs);
+
 
 /*
  * Each outstanding request that we've passed to the lower device layers has a 
@@ -55,43 +52,38 @@
 	atomic_t       pendcnt;
 	unsigned short operation;
 	int            status;
+	struct list_head free_list;
 } pending_req_t;
 
-/*
- * We can't allocate pending_req's in order, since they may complete out of 
- * order. We therefore maintain an allocation ring. This ring also indicates 
- * when enough work has been passed down -- at that point the allocation ring 
- * will be empty.
- */
-static pending_req_t pending_reqs[MAX_PENDING_REQS];
-static unsigned char pending_ring[MAX_PENDING_REQS];
-static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED;
-/* NB. We use a different index type to differentiate from shared blk rings. */
-typedef unsigned int PEND_RING_IDX;
-#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
-static PEND_RING_IDX pending_prod, pending_cons;
-#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
-
-static request_queue_t *plugged_queue;
-static inline void flush_plugged_queue(void)
-{
-	request_queue_t *q = plugged_queue;
-	if (q != NULL) {
-		if ( q->unplug_fn != NULL )
-			q->unplug_fn(q);
-		blk_put_queue(q);
-		plugged_queue = NULL;
-	}
-}
+static pending_req_t *pending_reqs;
+static struct list_head pending_free;
+static spinlock_t pending_free_lock = SPIN_LOCK_UNLOCKED;
+static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
+
+#define BLKBACK_INVALID_HANDLE (0xFFFF)
+
+static unsigned long mmap_vstart;
+static unsigned long *pending_vaddrs;
+static u16 *pending_grant_handles;
+
+static inline int vaddr_pagenr(pending_req_t *req, int seg)
+{
+	return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
+}
+
+static inline unsigned long vaddr(pending_req_t *req, int seg)
+{
+	return pending_vaddrs[vaddr_pagenr(req, seg)];
+}
+
+#define pending_handle(_req, _seg) \
+	(pending_grant_handles[vaddr_pagenr(_req, _seg)])
+
 
 /* When using grant tables to map a frame for device access then the
  * handle returned must be used to unmap the frame. This is needed to
  * drop the ref count on the frame.
  */
-static u16 pending_grant_handles[MMAP_PAGES];
-#define pending_handle(_idx, _i) \
-    (pending_grant_handles[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)])
-#define BLKBACK_INVALID_HANDLE (0xFFFF)
 
 #ifdef CONFIG_XEN_BLKDEV_TAP_BE
 /*
@@ -105,26 +97,79 @@
 static inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); }
 #endif
 
-static int do_block_io_op(blkif_t *blkif, int max_to_do);
-static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req);
+static int do_block_io_op(blkif_t *blkif);
+static void dispatch_rw_block_io(blkif_t *blkif,
+				 blkif_request_t *req,
+				 pending_req_t *pending_req);
 static void make_response(blkif_t *blkif, unsigned long id, 
                           unsigned short op, int st);
 
-static void fast_flush_area(int idx, int nr_pages)
+/******************************************************************
+ * misc small helpers
+ */
+static pending_req_t* alloc_req(void)
+{
+	pending_req_t *req = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pending_free_lock, flags);
+	if (!list_empty(&pending_free)) {
+		req = list_entry(pending_free.next, pending_req_t, free_list);
+		list_del(&req->free_list);
+	}
+	spin_unlock_irqrestore(&pending_free_lock, flags);
+	return req;
+}
+
+static void free_req(pending_req_t *req)
+{
+	unsigned long flags;
+	int was_empty;
+
+	spin_lock_irqsave(&pending_free_lock, flags);
+	was_empty = list_empty(&pending_free);
+	list_add(&req->free_list, &pending_free);
+	spin_unlock_irqrestore(&pending_free_lock, flags);
+	if (was_empty)
+		wake_up(&pending_free_wq);
+}
+
+static void unplug_queue(blkif_t *blkif)
+{
+	if (NULL == blkif->plug)
+		return;
+	if (blkif->plug->unplug_fn)
+		blkif->plug->unplug_fn(blkif->plug);
+	blk_put_queue(blkif->plug);
+	blkif->plug = NULL;
+}
+
+static void plug_queue(blkif_t *blkif, struct bio *bio)
+{
+	request_queue_t *q = bdev_get_queue(bio->bi_bdev);
+
+	if (q == blkif->plug)
+		return;
+	unplug_queue(blkif);
+	blk_get_queue(q);
+	blkif->plug = q;
+}
+
+static void fast_flush_area(pending_req_t *req)
 {
 	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	unsigned int i, invcount = 0;
 	u16 handle;
 	int ret;
 
-	for (i = 0; i < nr_pages; i++) {
-		handle = pending_handle(idx, i);
+	for (i = 0; i < req->nr_pages; i++) {
+		handle = pending_handle(req, i);
 		if (handle == BLKBACK_INVALID_HANDLE)
 			continue;
-		unmap[invcount].host_addr    = MMAP_VADDR(idx, i);
+		unmap[invcount].host_addr    = vaddr(req, i);
 		unmap[invcount].dev_bus_addr = 0;
 		unmap[invcount].handle       = handle;
-		pending_handle(idx, i) = BLKBACK_INVALID_HANDLE;
+		pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
 		invcount++;
 	}
 
@@ -133,109 +178,72 @@
 	BUG_ON(ret);
 }
 
-
-/******************************************************************
- * BLOCK-DEVICE SCHEDULER LIST MAINTENANCE
- */
-
-static struct list_head blkio_schedule_list;
-static spinlock_t blkio_schedule_list_lock;
-
-static int __on_blkdev_list(blkif_t *blkif)
-{
-	return blkif->blkdev_list.next != NULL;
-}
-
-static void remove_from_blkdev_list(blkif_t *blkif)
-{
-	unsigned long flags;
-
-	if (!__on_blkdev_list(blkif))
-		return;
-
-	spin_lock_irqsave(&blkio_schedule_list_lock, flags);
-	if (__on_blkdev_list(blkif)) {
-		list_del(&blkif->blkdev_list);
-		blkif->blkdev_list.next = NULL;
-		blkif_put(blkif);
-	}
-	spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
-}
-
-static void add_to_blkdev_list_tail(blkif_t *blkif)
-{
-	unsigned long flags;
-
-	if (__on_blkdev_list(blkif))
-		return;
-
-	spin_lock_irqsave(&blkio_schedule_list_lock, flags);
-	if (!__on_blkdev_list(blkif) && (blkif->status == CONNECTED)) {
-		list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
-		blkif_get(blkif);
-	}
-	spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
-}
-
-
 /******************************************************************
  * SCHEDULER FUNCTIONS
  */
 
-static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait);
-
-static int blkio_schedule(void *arg)
-{
-	DECLARE_WAITQUEUE(wq, current);
-
-	blkif_t          *blkif;
-	struct list_head *ent;
-
-	daemonize("xenblkd");
-
+int blkif_schedule(void *arg)
+{
+	blkif_t          *blkif = arg;
+
+	blkif_get(blkif);
+	printk(KERN_DEBUG "%s: started\n", current->comm);
 	for (;;) {
-		/* Wait for work to do. */
-		add_wait_queue(&blkio_schedule_wait, &wq);
-		set_current_state(TASK_INTERRUPTIBLE);
-		if ( (NR_PENDING_REQS == MAX_PENDING_REQS) || 
-		     list_empty(&blkio_schedule_list) )
-			schedule();
-		__set_current_state(TASK_RUNNING);
-		remove_wait_queue(&blkio_schedule_wait, &wq);
-
-		/* Queue up a batch of requests. */
-		while ((NR_PENDING_REQS < MAX_PENDING_REQS) &&
-		       !list_empty(&blkio_schedule_list)) {
-			ent = blkio_schedule_list.next;
-			blkif = list_entry(ent, blkif_t, blkdev_list);
-			blkif_get(blkif);
-			remove_from_blkdev_list(blkif);
-			if (do_block_io_op(blkif, BATCH_PER_DOMAIN))
-				add_to_blkdev_list_tail(blkif);
-			blkif_put(blkif);
-		}
-
-		/* Push the batch through to disc. */
-		flush_plugged_queue();
-	}
-}
-
-static void maybe_trigger_blkio_schedule(void)
-{
-	/*
-	 * Needed so that two processes, which together make the following
-	 * predicate true, don't both read stale values and evaluate the
-	 * predicate incorrectly. Incredibly unlikely to stall the scheduler
-	 * on x86, but...
-	 */
-	smp_mb();
-
-	if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
-	    !list_empty(&blkio_schedule_list))
-		wake_up(&blkio_schedule_wait);
-}
-
-
+		if (kthread_should_stop()) {
+			/* asked to quit? */
+			if (!atomic_read(&blkif->io_pending))
+				break;
+			printk(KERN_DEBUG "%s: I/O pending, delaying exit\n",
+			       current->comm);
+		}
+
+		if (!atomic_read(&blkif->io_pending)) {
+			/* Wait for work to do. */
+			wait_event_interruptible(blkif->wq,
+						 atomic_read(&blkif->io_pending) ||
+						 kthread_should_stop());
+		} else if (list_empty(&pending_free)) {
+			/* Wait for pending_req becoming available. */
+			wait_event_interruptible(pending_free_wq,
+						 !list_empty(&pending_free));
+		}
+
+		if (blkif->status != CONNECTED) {
+			/* make sure we are connected */
+			printk(KERN_DEBUG "%s: not connected (%d pending)\n",
+			       current->comm, atomic_read(&blkif->io_pending));
+			wait_event_interruptible(blkif->wq,
+						 blkif->status != CONNECTED ||
+						 kthread_should_stop());
+			continue;
+		}
+
+		/* Schedule I/O */
+		atomic_set(&blkif->io_pending, 0);
+		if (do_block_io_op(blkif))
+			atomic_inc(&blkif->io_pending);
+		unplug_queue(blkif);
+
+#if 0
+		/* Print stats for performance debugging. */
+		if (time_after(jiffies, blkif->st_print)) {
+			printk(KERN_DEBUG "%s: oo %3d  |  rd %4d  |  wr %4d\n",
+			       current->comm, blkif->st_oo_req,
+			       blkif->st_rd_req, blkif->st_wr_req);
+			blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
+			blkif->st_rd_req = 0;
+			blkif->st_wr_req = 0;
+			blkif->st_oo_req = 0;
+		}
+#endif
+	}
+
+	/* bye folks, and thanks for all the fish ;) */
+	printk(KERN_DEBUG "%s: exiting\n", current->comm);
+	blkif->xenblkd = NULL;
+	blkif_put(blkif);
+	return 0;
+}
 
 /******************************************************************
  * COMPLETION CALLBACK -- Called as bh->b_end_io()
@@ -243,8 +251,6 @@
 
 static void __end_block_io_op(pending_req_t *pending_req, int uptodate)
 {
-	unsigned long flags;
-
 	/* An error fails the entire request. */
 	if (!uptodate) {
 		DPRINTK("Buffer not up-to-date at end of operation\n");
@@ -252,15 +258,11 @@
 	}
 
 	if (atomic_dec_and_test(&pending_req->pendcnt)) {
-		int pending_idx = pending_req - pending_reqs;
-		fast_flush_area(pending_idx, pending_req->nr_pages);
+		fast_flush_area(pending_req);
 		make_response(pending_req->blkif, pending_req->id,
 			      pending_req->operation, pending_req->status);
 		blkif_put(pending_req->blkif);
-		spin_lock_irqsave(&pend_prod_lock, flags);
-		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
-		spin_unlock_irqrestore(&pend_prod_lock, flags);
-		maybe_trigger_blkio_schedule();
+		free_req(pending_req);
 	}
 }
 
@@ -281,8 +283,9 @@
 irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
 {
 	blkif_t *blkif = dev_id;
-	add_to_blkdev_list_tail(blkif);
-	maybe_trigger_blkio_schedule();
+
+	atomic_inc(&blkif->io_pending);
+	wake_up(&blkif->wq);
 	return IRQ_HANDLED;
 }
 
@@ -292,10 +295,11 @@
  * DOWNWARD CALLS -- These interface with the block-device layer proper.
  */
 
-static int do_block_io_op(blkif_t *blkif, int max_to_do)
+static int do_block_io_op(blkif_t *blkif)
 {
 	blkif_back_ring_t *blk_ring = &blkif->blk_ring;
 	blkif_request_t *req;
+	pending_req_t *pending_req;
 	RING_IDX i, rp;
 	int more_to_do = 0;
 
@@ -305,24 +309,30 @@
 	for (i = blk_ring->req_cons; 
 	     (i != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, i);
 	     i++) {
-		if ((max_to_do-- == 0) ||
-		    (NR_PENDING_REQS == MAX_PENDING_REQS)) {
+
+		pending_req = alloc_req();
+		if (NULL == pending_req) {
+			blkif->st_oo_req++;
 			more_to_do = 1;
 			break;
 		}
-        
+
 		req = RING_GET_REQUEST(blk_ring, i);
 		switch (req->operation) {
 		case BLKIF_OP_READ:
+			blkif->st_rd_req++;
+			dispatch_rw_block_io(blkif, req, pending_req);
+			break;
 		case BLKIF_OP_WRITE:
-			dispatch_rw_block_io(blkif, req);
+			blkif->st_wr_req++;
+			dispatch_rw_block_io(blkif, req, pending_req);
 			break;
-
 		default:
 			DPRINTK("error: unknown block io operation [%d]\n",
 				req->operation);
 			make_response(blkif, req->id, req->operation,
 				      BLKIF_RSP_ERROR);
+			free_req(pending_req);
 			break;
 		}
 	}
@@ -331,13 +341,13 @@
 	return more_to_do;
 }
 
-static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
+static void dispatch_rw_block_io(blkif_t *blkif,
+				 blkif_request_t *req,
+				 pending_req_t *pending_req)
 {
 	extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
 	int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
 	unsigned long fas = 0;
-	int i, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
-	pending_req_t *pending_req;
 	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	struct phys_req preq;
 	struct { 
@@ -345,31 +355,35 @@
 	} seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	unsigned int nseg;
 	struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-	int nbio = 0;
-	request_queue_t *q;
-	int ret, errors = 0;
+	int ret, i, nbio = 0;
 
 	/* Check that number of segments is sane. */
 	nseg = req->nr_segments;
 	if (unlikely(nseg == 0) || 
 	    unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
 		DPRINTK("Bad number of segments in request (%d)\n", nseg);
-		goto bad_descriptor;
+		goto fail_response;
 	}
 
 	preq.dev           = req->handle;
 	preq.sector_number = req->sector_number;
 	preq.nr_sects      = 0;
 
+	pending_req->blkif     = blkif;
+	pending_req->id        = req->id;
+	pending_req->operation = operation;
+	pending_req->status    = BLKIF_RSP_OKAY;
+	pending_req->nr_pages  = nseg;
+
 	for (i = 0; i < nseg; i++) {
 		fas         = req->frame_and_sects[i];
 		seg[i].nsec = blkif_last_sect(fas) - blkif_first_sect(fas) + 1;
 
 		if (seg[i].nsec <= 0)
-			goto bad_descriptor;
+			goto fail_response;
 		preq.nr_sects += seg[i].nsec;
 
-		map[i].host_addr = MMAP_VADDR(pending_idx, i);
+		map[i].host_addr = vaddr(pending_req, i);
 		map[i].dom = blkif->domid;
 		map[i].ref = blkif_gref_from_fas(fas);
 		map[i].flags = GNTMAP_host_map;
@@ -381,27 +395,23 @@
 	BUG_ON(ret);
 
 	for (i = 0; i < nseg; i++) {
-		if (likely(map[i].handle >= 0)) {
-			pending_handle(pending_idx, i) = map[i].handle;
+		if (unlikely(map[i].handle < 0)) {
+			DPRINTK("invalid buffer -- could not remap it\n");
+			goto fail_flush;
+		}
+
+		pending_handle(pending_req, i) = map[i].handle;
 #ifdef __ia64__
-			MMAP_VADDR(pending_idx,i) = gnttab_map_vaddr(map[i]);
+		pending_vaddrs[vaddr_pagenr(req, seg)] =
+			= gnttab_map_vaddr(map[i]);
 #else
-			set_phys_to_machine(__pa(MMAP_VADDR(
-				pending_idx, i)) >> PAGE_SHIFT,
-				FOREIGN_FRAME(map[i].dev_bus_addr>>PAGE_SHIFT));
+		set_phys_to_machine(__pa(vaddr(
+			pending_req, i)) >> PAGE_SHIFT,
+			FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
 #endif
-			fas        = req->frame_and_sects[i];
-			seg[i].buf = map[i].dev_bus_addr | 
-				(blkif_first_sect(fas) << 9);
-		} else {
-			errors++;
-		}
-	}
-
-	if (errors) {
-		DPRINTK("invalid buffer -- could not remap it\n");
-		fast_flush_area(pending_idx, nseg);
-		goto bad_descriptor;
+		fas         = req->frame_and_sects[i];
+		seg[i].buf  = map[i].dev_bus_addr | 
+			(blkif_first_sect(fas) << 9);
 	}
 
 	if (vbd_translate(&preq, blkif, operation) != 0) {
@@ -409,37 +419,25 @@
 			operation == READ ? "read" : "write",
 			preq.sector_number,
 			preq.sector_number + preq.nr_sects, preq.dev); 
-		goto bad_descriptor;
-	}
-
-	pending_req = &pending_reqs[pending_idx];
-	pending_req->blkif     = blkif;
-	pending_req->id        = req->id;
-	pending_req->operation = operation;
-	pending_req->status    = BLKIF_RSP_OKAY;
-	pending_req->nr_pages  = nseg;
+		goto fail_flush;
+	}
 
 	for (i = 0; i < nseg; i++) {
 		if (((int)preq.sector_number|(int)seg[i].nsec) &
 		    ((bdev_hardsect_size(preq.bdev) >> 9) - 1)) {
 			DPRINTK("Misaligned I/O request from domain %d",
 				blkif->domid);
-			goto cleanup_and_fail;
+			goto fail_put_bio;
 		}
 
 		while ((bio == NULL) ||
 		       (bio_add_page(bio,
-				     virt_to_page(MMAP_VADDR(pending_idx, i)),
+				     virt_to_page(vaddr(pending_req, i)),
 				     seg[i].nsec << 9,
 				     seg[i].buf & ~PAGE_MASK) == 0)) {
 			bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, nseg-i);
-			if (unlikely(bio == NULL)) {
-			cleanup_and_fail:
-				for (i = 0; i < (nbio-1); i++)
-					bio_put(biolist[i]);
-				fast_flush_area(pending_idx, nseg);
-				goto bad_descriptor;
-			}
+			if (unlikely(bio == NULL))
+				goto fail_put_bio;
                 
 			bio->bi_bdev    = preq.bdev;
 			bio->bi_private = pending_req;
@@ -450,14 +448,8 @@
 		preq.sector_number += seg[i].nsec;
 	}
 
-	if ((q = bdev_get_queue(bio->bi_bdev)) != plugged_queue) {
-		flush_plugged_queue();
-		blk_get_queue(q);
-		plugged_queue = q;
-	}
-
+	plug_queue(blkif, bio);
 	atomic_set(&pending_req->pendcnt, nbio);
-	pending_cons++;
 	blkif_get(blkif);
 
 	for (i = 0; i < nbio; i++)
@@ -465,8 +457,14 @@
 
 	return;
 
- bad_descriptor:
+ fail_put_bio:
+	for (i = 0; i < (nbio-1); i++)
+		bio_put(biolist[i]);
+ fail_flush:
+	fast_flush_area(pending_req);
+ fail_response:
 	make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
+	free_req(pending_req);
 } 
 
 
@@ -498,56 +496,50 @@
 	notify_remote_via_irq(blkif->irq);
 }
 
-void blkif_deschedule(blkif_t *blkif)
-{
-	remove_from_blkdev_list(blkif);
-}
-
 static int __init blkif_init(void)
 {
+	struct page *page;
 	int i;
-	struct page *page;
-	int ret;
-
-	for (i = 0; i < MMAP_PAGES; i++)
-		pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
 
 	if (xen_init() < 0)
 		return -ENODEV;
 
+	mmap_pages            = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	pending_reqs          = kmalloc(sizeof(pending_reqs[0]) *
+					blkif_reqs, GFP_KERNEL);
+	pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) *
+					mmap_pages, GFP_KERNEL);
+	pending_vaddrs        = kmalloc(sizeof(pending_vaddrs[0]) *
+					mmap_pages, GFP_KERNEL);
+	if (!pending_reqs || !pending_grant_handles || !pending_vaddrs) {
+		printk("%s: out of memory\n", __FUNCTION__);
+		return -1;
+	}
+
 	blkif_interface_init();
-
+	
 #ifdef __ia64__
-    {
 	extern unsigned long alloc_empty_foreign_map_page_range(unsigned long pages);
-	int i;
-
-	mmap_vstart =  alloc_empty_foreign_map_page_range(MMAP_PAGES);
-	printk("Allocated mmap_vstart: 0x%lx\n", mmap_vstart);
-	for(i = 0; i < MMAP_PAGES; i++)
-	    pending_vaddrs[i] = mmap_vstart + (i << PAGE_SHIFT);
-	BUG_ON(mmap_vstart == NULL);
-    }
-#else
-	page = balloon_alloc_empty_page_range(MMAP_PAGES);
+	mmap_vstart = (unsigned long)alloc_empty_foreign_map_page_range(mmap_pages);
+#else /* ! ia64 */
+	page = balloon_alloc_empty_page_range(mmap_pages);
 	BUG_ON(page == NULL);
 	mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
 #endif
-
-	pending_cons = 0;
-	pending_prod = MAX_PENDING_REQS;
+	printk("%s: reqs=%d, pages=%d, mmap_vstart=0x%lx\n",
+	       __FUNCTION__, blkif_reqs, mmap_pages, mmap_vstart);
+	BUG_ON(mmap_vstart == 0);
+	for (i = 0; i < mmap_pages; i++)
+		pending_vaddrs[i] = mmap_vstart + (i << PAGE_SHIFT);
+
+	memset(pending_grant_handles,  BLKBACK_INVALID_HANDLE, mmap_pages);
 	memset(pending_reqs, 0, sizeof(pending_reqs));
-	for (i = 0; i < MAX_PENDING_REQS; i++)
-		pending_ring[i] = i;
+	INIT_LIST_HEAD(&pending_free);
+
+	for (i = 0; i < blkif_reqs; i++)
+		list_add_tail(&pending_reqs[i].free_list, &pending_free);
     
-	spin_lock_init(&blkio_schedule_list_lock);
-	INIT_LIST_HEAD(&blkio_schedule_list);
-
-	ret = kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES);
-	BUG_ON(ret < 0);
-
 	blkif_xenbus_init();
-
 	return 0;
 }
 
diff -r 090e44133d40 linux-2.6-xen-sparse/drivers/xen/blkback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h	Mon Nov 14 17:13:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h	Tue Nov 15 18:17:26 2005
@@ -56,9 +56,19 @@
 	/* Is this a blktap frontend */
 	unsigned int     is_blktap;
 #endif
-	struct list_head blkdev_list;
 	spinlock_t       blk_ring_lock;
 	atomic_t         refcnt;
+
+	wait_queue_head_t   wq;
+	struct task_struct  *xenblkd;
+	atomic_t            io_pending;
+	request_queue_t     *plug;
+
+	/* statistics */
+	unsigned long       st_print;
+	int                 st_rd_req;
+	int                 st_wr_req;
+	int                 st_oo_req;
 
 	struct work_struct free_work;
 
@@ -97,11 +107,10 @@
 
 void blkif_interface_init(void);
 
-void blkif_deschedule(blkif_t *blkif);
-
 void blkif_xenbus_init(void);
 
 irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
+int blkif_schedule(void *arg);
 
 #endif /* __BLKIF__BACKEND__COMMON_H__ */
 
diff -r 090e44133d40 linux-2.6-xen-sparse/drivers/xen/blkback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c	Mon Nov 14 17:13:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c	Tue Nov 15 18:17:26 2005
@@ -24,6 +24,8 @@
 	blkif->status = DISCONNECTED;
 	spin_lock_init(&blkif->blk_ring_lock);
 	atomic_set(&blkif->refcnt, 1);
+	init_waitqueue_head(&blkif->wq);
+	blkif->st_print = jiffies;
 
 	return blkif;
 }
@@ -113,6 +115,7 @@
 	blkif->irq = bind_evtchn_to_irqhandler(
 		blkif->evtchn, blkif_be_int, 0, "blkif-backend", blkif);
 	blkif->status = CONNECTED;
+	wake_up(&blkif->wq);
 
 	return 0;
 }
diff -r 090e44133d40 linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c	Mon Nov 14 17:13:38 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c	Tue Nov 15 18:17:26 2005
@@ -20,6 +20,7 @@
 
 #include <stdarg.h>
 #include <linux/module.h>
+#include <linux/kthread.h>
 #include <asm-xen/xenbus.h>
 #include "common.h"
 
@@ -61,6 +62,8 @@
 		be->backend_watch.node = NULL;
 	}
 	if (be->blkif) {
+		if (be->blkif->xenblkd)
+			kthread_stop(be->blkif->xenblkd);
 		blkif_put(be->blkif);
 		be->blkif = NULL;
 	}
@@ -175,6 +178,16 @@
 			be->pdev = 0L;
 			xenbus_dev_fatal(dev, err,
 					 "creating vbd structure");
+			return;
+		}
+
+		be->blkif->xenblkd = kthread_run(blkif_schedule, be->blkif,
+						 "xenblkd %d/%04lx",
+						 be->blkif->domid, be->pdev);
+		if (IS_ERR(be->blkif->xenblkd)) {
+			err = PTR_ERR(be->blkif->xenblkd);
+			be->blkif->xenblkd = NULL;
+			xenbus_dev_error(dev, err, "start xenblkd");
 			return;
 		}
 

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

next prev parent reply	other threads:[~2005-11-15 17:51 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20051019150719.GC10710@tpkurt.pmc.nue.novell.com>
     [not found] ` <20051020085107.GA2811@suse.de>
     [not found]   ` <435E56B6.2060707@suse.de>
     [not found]     ` <20051025171505.GG4774@suse.de>
     [not found]       ` <9f07b56a8f036cecd2a0687e040a417b@cl.cam.ac.uk>
     [not found]         ` <4360ABE5.1010705@suse.de>
     [not found]           ` <b943edf9af93016dc65d8090323ed520@cl.cam.ac.uk>
     [not found]             ` <4360CA79.40605@suse.de>
     [not found]               ` <efbd0c82d427f99326de12f394c2c90b@cl.cam.ac.uk>
2005-11-09 13:29                 ` [patch] CFQ for xen domains Gerd Knorr
2005-11-15 17:51                   ` Gerd Knorr [this message]
2005-11-24 11:31                     ` Gerd Knorr
2005-11-24 23:00                       ` Keir Fraser
2005-12-06 15:47                       ` Gerd Knorr

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=437A2013.9020601@suse.de \
    --to=kraxel@suse.de \
    --cc=axboe@suse.de \
    --cc=garloff@suse.de \
    --cc=m+Ian.Pratt@cl.cam.ac.uk \
    --cc=mason@suse.com \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.