xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
To: Keith Swett <swettk@gmail.com>
Cc: xen-devel@lists.xensource.com
Subject: Re: Trying to track down odd behavior, errors in xen dmesg
Date: Thu, 14 Apr 2011 09:24:22 -0400	[thread overview]
Message-ID: <20110414132422.GH5548@dumpdata.com> (raw)
In-Reply-To: <BANLkTikYWd4KU4dP2FBjFhZOLzkdgKjEDw@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 1028 bytes --]

On Tue, Apr 12, 2011 at 04:23:13PM -0500, Keith Swett wrote:
> Konrad,
> I found an error in my process.
> On line 225 of the patch, there's a single parenthesis.  What should
> the lines in this area (418-420 of blkback.c) look like after the
> patch?

I updated the patch. Try the attached one.
> 
> --Keith
> 
> On Tue, Apr 12, 2011 at 4:08 PM, Konrad Rzeszutek Wilk
> <konrad.wilk@oracle.com> wrote:
> > On Tue, Apr 12, 2011 at 03:59:48PM -0500, Keith Swett wrote:
> >> Konrad,
> >>
> >> I'm not seeing the kernel messages, so i'm wondering if something else
> >> changed, and if I applied the patch correctly.
> >
> > Make sure you have 'loglevel=8 debug' in your Linux command. Also you might
> > need to look in /var/log/kern during runtime (or maybe some other place?)
> >
> >> Trying to repeat the process from this morning with a clean tree to confirm.
> >
> > OK.
> >
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel

[-- Attachment #2: patch --]
[-- Type: text/plain, Size: 9285 bytes --]

commit ff1902ee8462cf7d763a8fd42255a72f8e31bebe
Author: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date:   Mon Apr 11 22:37:52 2011 -0400

    blkback: Patch to fix unmapping pages while iSCSI still has references to them.
    
    Signed-off-by: Gary Grebus <ggrebus@virtualiron.com>
    Signed-off-by: Joshua Nicholas <jnicholas@virtualiron.com>
    [v1: Port from dom0-sources.4.5.16.11.tar.gz]
    [v2: Added a whole bunch of printks]
    [v3: Fixed merge error]
    Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c
index 0bef445..ab8d12d 100644
--- a/drivers/xen/blkback/blkback.c
+++ b/drivers/xen/blkback/blkback.c
@@ -81,7 +81,11 @@ typedef struct {
 	atomic_t       pendcnt;
 	unsigned short operation;
 	int            status;
-	struct list_head free_list;
+	unsigned long delay_expire_time;
+	union {
+		struct list_head free_list;
+		struct list_head delayed_list;
+	};
 } pending_req_t;
 
 static pending_req_t *pending_reqs;
@@ -89,6 +93,10 @@ static struct list_head pending_free;
 static DEFINE_SPINLOCK(pending_free_lock);
 static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
 
+static struct list_head delayed_reqs;
+static DEFINE_SPINLOCK(delayed_reqs_lock);
+static struct timer_list delayed_reqs_timer;
+
 #define BLKBACK_INVALID_HANDLE (~0)
 
 static struct page **pending_pages;
@@ -132,6 +140,8 @@ static pending_req_t* alloc_req(void)
 		list_del(&req->free_list);
 	}
 	spin_unlock_irqrestore(&pending_free_lock, flags);
+	if (req != NULL)
+		req->delay_expire_time = 0;
 	return req;
 }
 
@@ -169,29 +179,160 @@ static void plug_queue(blkif_t *blkif, struct block_device *bdev)
 	blkif->plug = q;
 }
 
-static void fast_flush_area(pending_req_t *req)
+static void delay_completion(pending_req_t *req)
+{
+	unsigned long flags;
+	pending_req_t *oldest;
+	unsigned long oldest_time;
+
+	req->delay_expire_time = jiffies + (HZ / 64);
+	spin_lock_irqsave(&delayed_reqs_lock, flags);
+	list_add_tail(&req->delayed_list, &delayed_reqs);
+
+	oldest = list_entry(delayed_reqs.next, pending_req_t, delayed_list);
+	oldest_time = oldest->delay_expire_time;
+	spin_unlock_irqrestore(&delayed_reqs_lock, flags);
+
+	mod_timer(&delayed_reqs_timer, oldest_time);
+	printk(KERN_INFO "%s: domid: %d (irq:%u) id: %llx added on delayed list\n",
+			__func__, (unsigned int)req->blkif->domid,
+			        req->blkif->irq,
+				req->id);
+}
+
+static int fast_flush_area(pending_req_t *req)
 {
 	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	unsigned int i, invcount = 0;
 	grant_handle_t handle;
 	int ret;
 
+	struct page *pending_page;
+	struct page *new_page;
+	int pending_page_count;
+	int pending_page_mapcount;
+
+	unsigned int pgidx;
+	unsigned int busy_pages = 0;
+	int delay_expired;
+
+	/* Already waited for extra refs to clear on one or more pages? */
+	delay_expired = req->delay_expire_time &&
+		time_before(req->delay_expire_time, jiffies);
+
 	for (i = 0; i < req->nr_pages; i++) {
 		handle = pending_handle(req, i);
 		if (handle == BLKBACK_INVALID_HANDLE)
 			continue;
-		blkback_pagemap_clear(pending_page(req, i));
-		gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i),
-				    GNTMAP_host_map, handle);
-		pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
-		invcount++;
+
+		pgidx = vaddr_pagenr(req, i);
+		pending_page = pending_pages[pgidx];
+		pending_page_count = page_count(pending_page);
+		pending_page_mapcount = page_mapcount(pending_page);
+
+		if (pending_page_count <= 1) {
+			if (unlikely(pending_page_mapcount > 0)) {	// Used as a flag by netback lazy copying scheme,
+				reset_page_mapcount(pending_page);		// but blocks gnttab_copy_grant_page() if set
+			}
+			blkback_pagemap_clear(pending_page(req, i));
+			gnttab_set_unmap_op(&unmap[invcount++], vaddr(req, i),
+					    GNTMAP_host_map, handle);
+			pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
+		} else {
+			printk(KERN_INFO "domid: %d (irq:%u) id: %llx count: %u, mapcount: %u, delay %s, busy_pages:%d\n",
+				(unsigned int)req->blkif->domid,
+			        req->blkif->irq,
+				req->id,
+				pending_page_count,
+				pending_page_mapcount,
+				delay_expired ? "expire" : "still ticking",
+				busy_pages);
+			if (!delay_expired) {
+				busy_pages++;
+			} else {
+				if (unlikely(pending_page_mapcount > 0)) {	// Used as a flag by netback lazy copying scheme,
+					reset_page_mapcount(pending_page);		// but blocks gnttab_copy_grant_page() if set
+					busy_pages++;
+				} else {
+					printk(KERN_INFO "domid: %d (irq:%u) id: %llx leaking page %d\n",
+			       			(unsigned int)req->blkif->domid,
+			       			req->blkif->irq, req->id,
+						pgidx);			
+					new_page = pending_page;
+					ret = gnttab_copy_grant_page(handle, &new_page);	// new dom0 page is returned
+					if ( ! ret ) {
+						pending_pages[pgidx] = new_page;
+						pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
+					} else {
+						busy_pages++;
+					}
+				}
+			}
+		}
 	}
 
-	ret = HYPERVISOR_grant_table_op(
-		GNTTABOP_unmap_grant_ref, unmap, invcount);
-	BUG_ON(ret);
+	if (invcount > 0) {
+		ret = HYPERVISOR_grant_table_op(
+			GNTTABOP_unmap_grant_ref, unmap, invcount);
+		BUG_ON(ret);
+	}
+	return busy_pages;
 }
 
+
+/*
+ * Completion of a request is delayed if one or more of the mapped guest
+ * pages still has a reference.  This can happen if the block I/O gets
+ * turned into a network I/O for example with iSCSI.
+ */
+static void complete_delayed_reqs(unsigned long unused)
+{
+	pending_req_t *req;
+	pending_req_t *tmp;
+	unsigned long oldest_time = 0;
+	unsigned long flags;
+	unsigned long count = 0;
+	LIST_HEAD(work_list);
+
+	if (list_empty(&delayed_reqs))
+		return;
+
+	/* Grab the whole list */
+	spin_lock_irqsave(&delayed_reqs_lock, flags);
+	list_for_each_entry_safe(req, tmp, &delayed_reqs, delayed_list) {
+		count++;
+		list_move_tail(&req->delayed_list, &work_list);
+	}
+	spin_unlock_irqrestore(&delayed_reqs_lock, flags);
+	printk(KERN_INFO "%s: processing %ld requests.\n", __func__, count);
+	list_for_each_entry_safe(req, tmp, &work_list, delayed_list) {
+		printk(KERN_INFO "%s: domid: %d (irq:%u) id: %llx\n", __func__,
+			(unsigned int)req->blkif->domid,
+			req->blkif->irq, req->id);
+		if (unlikely(fast_flush_area(req))) {
+			spin_lock_irqsave(&delayed_reqs_lock, flags);
+			list_move_tail(&req->delayed_list, &delayed_reqs);
+			spin_unlock_irqrestore(&delayed_reqs_lock, flags);
+		} else {
+			list_del(&req->delayed_list);
+			make_response(req->blkif, req->id,
+				      req->operation, req->status);
+
+			blkif_put(req->blkif);
+			free_req(req);
+		}
+	}
+
+	spin_lock_irqsave(&delayed_reqs_lock, flags);
+	if (! list_empty(&delayed_reqs)) {
+		tmp = list_entry(delayed_reqs.next, pending_req_t,
+				 delayed_list);
+		oldest_time = tmp->delay_expire_time;
+	}
+	spin_unlock_irqrestore(&delayed_reqs_lock, flags);
+	if (oldest_time)
+		mod_timer(&delayed_reqs_timer, oldest_time);
+}
 /******************************************************************
  * SCHEDULER FUNCTIONS
  */
@@ -258,6 +399,10 @@ int blkif_schedule(void *arg)
 
 static void __end_block_io_op(pending_req_t *pending_req, int error)
 {
+
+
+	complete_delayed_reqs(0); /* Previously delayed reqs are usually done.*/
+
 	/* An error fails the entire request. */
 	if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
 	    (error == -EOPNOTSUPP)) {
@@ -271,9 +416,15 @@ static void __end_block_io_op(pending_req_t *pending_req, int error)
 	}
 
 	if (atomic_dec_and_test(&pending_req->pendcnt)) {
-		fast_flush_area(pending_req);
+		if (unlikely(fast_flush_area(pending_req))) {
+			/* Granted page(s) are still being referenced */
+			delay_completion(pending_req);
+			return;
+		}
+
 		make_response(pending_req->blkif, pending_req->id,
 			      pending_req->operation, pending_req->status);
+
 		blkif_put(pending_req->blkif);
 		free_req(pending_req);
 	}
@@ -316,6 +467,7 @@ static int do_block_io_op(blkif_t *blkif)
 	RING_IDX rc, rp;
 	int more_to_do = 0;
 
+ 
 	rc = blk_rings->common.req_cons;
 	rp = blk_rings->common.sring->req_prod;
 	rmb(); /* Ensure we see queued requests up to 'rp'. */
@@ -337,6 +489,7 @@ static int do_block_io_op(blkif_t *blkif)
 			break;
 		}
 
+
 		switch (blkif->blk_protocol) {
 		case BLKIF_PROTOCOL_NATIVE:
 			memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req));
@@ -542,7 +695,13 @@ static void dispatch_rw_block_io(blkif_t *blkif,
 	return;
 
  fail_flush:
-	fast_flush_area(pending_req);
+	if (unlikely(fast_flush_area(pending_req))) {
+		/* Granted page(s) are being referenced by a previous I/O */
+		pending_req->status = BLKIF_RSP_ERROR;
+		delay_completion(pending_req);
+		msleep(1); /* back off a bit */
+		return;
+	}	
  fail_response:
 	make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
 	free_req(pending_req);
@@ -652,6 +811,12 @@ static int __init blkif_init(void)
 	memset(pending_reqs, 0, sizeof(pending_reqs));
 	INIT_LIST_HEAD(&pending_free);
 
+	INIT_LIST_HEAD(&delayed_reqs);
+	spin_lock_init(&delayed_reqs_lock);
+	init_timer(&delayed_reqs_timer);
+	delayed_reqs_timer.data = 0;
+	delayed_reqs_timer.function = complete_delayed_reqs;
+
 	for (i = 0; i < blkif_reqs; i++)
 		list_add_tail(&pending_reqs[i].free_list, &pending_free);
 

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

      parent reply	other threads:[~2011-04-14 13:24 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-04-06 16:30 Trying to track down odd behavior, errors in xen dmesg Keith Swett
2011-04-07 13:54 ` Konrad Rzeszutek Wilk
2011-04-07 14:45   ` Keith Swett
2011-04-07 15:52   ` Keith Swett
2011-04-07 16:01     ` Konrad Rzeszutek Wilk
2011-04-07 16:02       ` Keith Swett
2011-04-07 16:23         ` Konrad Rzeszutek Wilk
2011-04-07 16:39           ` Keith Swett
2011-04-08 13:40             ` Keith Swett
2011-04-08 13:53               ` Konrad Rzeszutek Wilk
2011-04-08 13:59                 ` Keith Swett
2011-04-12 10:20                   ` Konrad Rzeszutek Wilk
2011-04-12 16:20                     ` Keith Swett
2011-04-12 20:41                       ` Konrad Rzeszutek Wilk
2011-04-12 20:59                         ` Keith Swett
2011-04-12 21:08                           ` Konrad Rzeszutek Wilk
2011-04-12 21:23                             ` Keith Swett
2011-04-12 22:48                               ` Konrad Rzeszutek Wilk
2011-04-12 23:02                                 ` Keith Swett
2011-04-13 13:05                                   ` Konrad Rzeszutek Wilk
2011-04-14 13:24                               ` Konrad Rzeszutek Wilk [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20110414132422.GH5548@dumpdata.com \
    --to=konrad.wilk@oracle.com \
    --cc=swettk@gmail.com \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).