From: Valentin Priescu <vali.priescu@gmail.com>
To: xen-devel@lists.xensource.com
Cc: Valentin Priescu <priescuv@amazon.com>,
David Vrabel <david.vrabel@citrix.com>,
Matt Wilson <msw@amazon.com>
Subject: [PATCH v3] xen-blkback: defer freeing blkif to avoid blocking xenwatch
Date: Tue, 20 May 2014 22:28:50 +0200 [thread overview]
Message-ID: <1400617730-774-1-git-send-email-vali.priescu@gmail.com> (raw)
In-Reply-To: <1399922737-10741-1-git-send-email-vali.priescu@gmail.com>
From: Valentin Priescu <priescuv@amazon.com>
Currently xenwatch blocks in VBD disconnect, waiting for all pending I/O
requests to finish. If the VBD is attached to a hot-swappable disk, then
xenwatch can hang for a long period of time, stalling other watches.
INFO: task xenwatch:39 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
ffff880057f01bd0 0000000000000246 ffff880057f01ac0 ffffffff810b0782
ffff880057f01ad0 00000000000131c0 0000000000000004 ffff880057edb040
ffff8800344c6080 0000000000000000 ffff880058c00ba0 ffff880057edb040
Call Trace:
[<ffffffff810b0782>] ? irq_to_desc+0x12/0x20
[<ffffffff8128f761>] ? list_del+0x11/0x40
[<ffffffff8147a080>] ? wait_for_common+0x60/0x160
[<ffffffff8147bcef>] ? _raw_spin_lock_irqsave+0x2f/0x50
[<ffffffff8147bd49>] ? _raw_spin_unlock_irqrestore+0x19/0x20
[<ffffffff8147a26a>] schedule+0x3a/0x60
[<ffffffffa018fe6a>] xen_blkif_disconnect+0x8a/0x100 [xen_blkback]
[<ffffffff81079f70>] ? wake_up_bit+0x40/0x40
[<ffffffffa018ffce>] xen_blkbk_remove+0xae/0x1e0 [xen_blkback]
[<ffffffff8130b254>] xenbus_dev_remove+0x44/0x90
[<ffffffff81345cb7>] __device_release_driver+0x77/0xd0
[<ffffffff81346488>] device_release_driver+0x28/0x40
[<ffffffff813456e8>] bus_remove_device+0x78/0xe0
[<ffffffff81342c9f>] device_del+0x12f/0x1a0
[<ffffffff81342d2d>] device_unregister+0x1d/0x60
[<ffffffffa0190826>] frontend_changed+0xa6/0x4d0 [xen_blkback]
[<ffffffffa019c252>] ? frontend_changed+0x192/0x650 [xen_netback]
[<ffffffff8130ae50>] ? cmp_dev+0x60/0x60
[<ffffffff81344fe4>] ? bus_for_each_dev+0x94/0xa0
[<ffffffff8130b06e>] xenbus_otherend_changed+0xbe/0x120
[<ffffffff8130b4cb>] frontend_changed+0xb/0x10
[<ffffffff81309c82>] xenwatch_thread+0xf2/0x130
[<ffffffff81079f70>] ? wake_up_bit+0x40/0x40
[<ffffffff81309b90>] ? xenbus_directory+0x80/0x80
[<ffffffff810799d6>] kthread+0x96/0xa0
[<ffffffff81485934>] kernel_thread_helper+0x4/0x10
[<ffffffff814839f3>] ? int_ret_from_sys_call+0x7/0x1b
[<ffffffff8147c17c>] ? retint_restore_args+0x5/0x6
[<ffffffff81485930>] ? gs_change+0x13/0x13
With this patch, when there is still pending I/O, the actual disconnect
is done by the last reference holder (last pending I/O request). In this
case, xenwatch doesn't block indefinitely.
Signed-off-by: Valentin Priescu <priescuv@amazon.com>
Reviewed-by: Steven Kady <stevkady@amazon.com>
Reviewed-by: Steven Noonan <snoonan@amazon.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
---
v2: Reworked an 'if' statement in xen_blkif_disconnect(), as
suggested by David Vrabel <david.vrabel@citrix.com>
Handle the case when xen_blkif_disconnect() returns -EBUSY on
frontend_changed(), as noted by David Vrabel <david.vrabel@citrix.com>
v3: Update reviewers list.
drivers/block/xen-blkback/common.h | 4 ++--
drivers/block/xen-blkback/xenbus.c | 46 ++++++++++++++++++++++++++++----------
2 files changed, 36 insertions(+), 14 deletions(-)
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index be05277..f65b807 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -314,7 +314,7 @@ struct xen_blkif {
unsigned long long st_rd_sect;
unsigned long long st_wr_sect;
- wait_queue_head_t waiting_to_free;
+ struct work_struct free_work;
/* Thread shutdown wait queue. */
wait_queue_head_t shutdown_wq;
};
@@ -361,7 +361,7 @@ struct pending_req {
#define xen_blkif_put(_b) \
do { \
if (atomic_dec_and_test(&(_b)->refcnt)) \
- wake_up(&(_b)->waiting_to_free);\
+ schedule_work(&(_b)->free_work);\
} while (0)
struct phys_req {
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 9a547e6..e911d28 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -35,12 +35,26 @@ static void connect(struct backend_info *);
static int connect_ring(struct backend_info *);
static void backend_changed(struct xenbus_watch *, const char **,
unsigned int);
+static void xen_blkif_free(struct xen_blkif *blkif);
+static void xen_vbd_free(struct xen_vbd *vbd);
struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be)
{
return be->dev;
}
+/*
+ * The last request could free the device from softirq context and
+ * xen_blkif_free() can sleep.
+ */
+static void xen_blkif_deferred_free(struct work_struct *work)
+{
+ struct xen_blkif *blkif;
+
+ blkif = container_of(work, struct xen_blkif, free_work);
+ xen_blkif_free(blkif);
+}
+
static int blkback_name(struct xen_blkif *blkif, char *buf)
{
char *devpath, *devname;
@@ -121,7 +135,6 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
init_completion(&blkif->drain_complete);
atomic_set(&blkif->drain, 0);
blkif->st_print = jiffies;
- init_waitqueue_head(&blkif->waiting_to_free);
blkif->persistent_gnts.rb_node = NULL;
spin_lock_init(&blkif->free_pages_lock);
INIT_LIST_HEAD(&blkif->free_pages);
@@ -132,6 +145,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants);
INIT_LIST_HEAD(&blkif->pending_free);
+ INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
for (i = 0; i < XEN_BLKIF_REQS; i++) {
req = kzalloc(sizeof(*req), GFP_KERNEL);
@@ -231,7 +245,7 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
return 0;
}
-static void xen_blkif_disconnect(struct xen_blkif *blkif)
+static int xen_blkif_disconnect(struct xen_blkif *blkif)
{
if (blkif->xenblkd) {
kthread_stop(blkif->xenblkd);
@@ -239,9 +253,12 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
blkif->xenblkd = NULL;
}
- atomic_dec(&blkif->refcnt);
- wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
- atomic_inc(&blkif->refcnt);
+ /* The above kthread_stop() guarantees that at this point we
+ * don't have any discard_io or other_io requests. So, checking
+ * for inflight IO is enough.
+ */
+ if (atomic_read(&blkif->inflight) > 0)
+ return -EBUSY;
if (blkif->irq) {
unbind_from_irqhandler(blkif->irq, blkif);
@@ -252,6 +269,8 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
blkif->blk_rings.common.sring = NULL;
}
+
+ return 0;
}
static void xen_blkif_free(struct xen_blkif *blkif)
@@ -259,8 +278,8 @@ static void xen_blkif_free(struct xen_blkif *blkif)
struct pending_req *req, *n;
int i = 0, j;
- if (!atomic_dec_and_test(&blkif->refcnt))
- BUG();
+ xen_blkif_disconnect(blkif);
+ xen_vbd_free(&blkif->vbd);
/* Remove all persistent grants and the cache of ballooned pages. */
xen_blkbk_free_caches(blkif);
@@ -449,16 +468,15 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
be->backend_watch.node = NULL;
}
+ dev_set_drvdata(&dev->dev, NULL);
+
if (be->blkif) {
xen_blkif_disconnect(be->blkif);
- xen_vbd_free(&be->blkif->vbd);
- xen_blkif_free(be->blkif);
- be->blkif = NULL;
+ xen_blkif_put(be->blkif);
}
kfree(be->mode);
kfree(be);
- dev_set_drvdata(&dev->dev, NULL);
return 0;
}
@@ -700,7 +718,11 @@ static void frontend_changed(struct xenbus_device *dev,
* Enforce precondition before potential leak point.
* xen_blkif_disconnect() is idempotent.
*/
- xen_blkif_disconnect(be->blkif);
+ err = xen_blkif_disconnect(be->blkif);
+ if (err) {
+ xenbus_dev_fatal(dev, err, "pending I/O");
+ break;
+ }
err = connect_ring(be);
if (err)
--
1.9.1
next prev parent reply other threads:[~2014-05-20 20:28 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-05-09 23:34 [PATCH] xen-blkback: defer freeing blkif to avoid blocking xenwatch Valentin Priescu
2014-05-09 23:56 ` Matt Wilson
2014-05-12 6:42 ` Vasiliy Tolstov
2014-05-12 8:59 ` Valentin Priescu
2014-05-12 9:12 ` Alexey
2014-05-12 10:50 ` Valentin Priescu
2014-05-12 9:39 ` David Vrabel
2014-05-12 10:41 ` Valentin Priescu
2014-05-12 10:43 ` David Vrabel
2014-05-12 10:47 ` Valentin Priescu
2014-05-12 16:40 ` Boris Ostrovsky
2014-05-12 17:04 ` Valentin Priescu
2014-05-12 17:50 ` Boris Ostrovsky
2014-05-12 19:25 ` [PATCH v2] " Valentin Priescu
2014-05-13 9:32 ` David Vrabel
2014-05-13 17:00 ` Vasiliy Tolstov
2014-05-13 17:32 ` Valentin Priescu
2014-05-13 17:39 ` Vasiliy Tolstov
2014-05-16 8:37 ` Valentin Priescu
2014-06-02 16:20 ` Matt Wilson
2014-05-20 20:28 ` Valentin Priescu [this message]
2014-05-22 9:11 ` [PATCH v3] " Vasiliy Tolstov
2014-05-22 10:16 ` Valentin Priescu
2014-06-03 8:07 ` Vasiliy Tolstov
2014-06-03 9:59 ` Valentin Priescu
2014-06-02 16:13 ` Valentin Priescu
2014-06-02 16:56 ` Konrad Rzeszutek Wilk
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1400617730-774-1-git-send-email-vali.priescu@gmail.com \
--to=vali.priescu@gmail.com \
--cc=david.vrabel@citrix.com \
--cc=msw@amazon.com \
--cc=priescuv@amazon.com \
--cc=xen-devel@lists.xensource.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).