From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
To: Valentin Priescu <vali.priescu@gmail.com>
Cc: "Tülin İzer" <tulinizer@gmail.com>,
xen-devel@lists.xensource.com, "Matt Wilson" <msw@amazon.com>,
"Valentin Priescu" <priescuv@amazon.com>
Subject: Re: [PATCH] xen-blkback: defer freeing blkif to avoid blocking xenwatch
Date: Mon, 12 May 2014 12:40:44 -0400 [thread overview]
Message-ID: <5370F98C.9020601@oracle.com> (raw)
In-Reply-To: <1399678479-14758-1-git-send-email-vali.priescu@gmail.com>
On 05/09/2014 07:34 PM, Valentin Priescu wrote:
> From: Valentin Priescu <priescuv@amazon.com>
>
> Currently xenwatch blocks in VBD disconnect, waiting for all pending I/O
> requests to finish. If the VBD is attached to a hot-swappable disk, then
> xenwatch can hang for a long period of time, stalling other watches.
Note that we have a GSoC student (copied here) who is going to be
looking at xenwatch's single-threadness (?) over the summer.
-boris
>
> INFO: task xenwatch:39 blocked for more than 120 seconds.
> "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> ffff880057f01bd0 0000000000000246 ffff880057f01ac0 ffffffff810b0782
> ffff880057f01ad0 00000000000131c0 0000000000000004 ffff880057edb040
> ffff8800344c6080 0000000000000000 ffff880058c00ba0 ffff880057edb040
> Call Trace:
> [<ffffffff810b0782>] ? irq_to_desc+0x12/0x20
> [<ffffffff8128f761>] ? list_del+0x11/0x40
> [<ffffffff8147a080>] ? wait_for_common+0x60/0x160
> [<ffffffff8147bcef>] ? _raw_spin_lock_irqsave+0x2f/0x50
> [<ffffffff8147bd49>] ? _raw_spin_unlock_irqrestore+0x19/0x20
> [<ffffffff8147a26a>] schedule+0x3a/0x60
> [<ffffffffa018fe6a>] xen_blkif_disconnect+0x8a/0x100 [xen_blkback]
> [<ffffffff81079f70>] ? wake_up_bit+0x40/0x40
> [<ffffffffa018ffce>] xen_blkbk_remove+0xae/0x1e0 [xen_blkback]
> [<ffffffff8130b254>] xenbus_dev_remove+0x44/0x90
> [<ffffffff81345cb7>] __device_release_driver+0x77/0xd0
> [<ffffffff81346488>] device_release_driver+0x28/0x40
> [<ffffffff813456e8>] bus_remove_device+0x78/0xe0
> [<ffffffff81342c9f>] device_del+0x12f/0x1a0
> [<ffffffff81342d2d>] device_unregister+0x1d/0x60
> [<ffffffffa0190826>] frontend_changed+0xa6/0x4d0 [xen_blkback]
> [<ffffffffa019c252>] ? frontend_changed+0x192/0x650 [xen_netback]
> [<ffffffff8130ae50>] ? cmp_dev+0x60/0x60
> [<ffffffff81344fe4>] ? bus_for_each_dev+0x94/0xa0
> [<ffffffff8130b06e>] xenbus_otherend_changed+0xbe/0x120
> [<ffffffff8130b4cb>] frontend_changed+0xb/0x10
> [<ffffffff81309c82>] xenwatch_thread+0xf2/0x130
> [<ffffffff81079f70>] ? wake_up_bit+0x40/0x40
> [<ffffffff81309b90>] ? xenbus_directory+0x80/0x80
> [<ffffffff810799d6>] kthread+0x96/0xa0
> [<ffffffff81485934>] kernel_thread_helper+0x4/0x10
> [<ffffffff814839f3>] ? int_ret_from_sys_call+0x7/0x1b
> [<ffffffff8147c17c>] ? retint_restore_args+0x5/0x6
> [<ffffffff81485930>] ? gs_change+0x13/0x13
>
> With this patch, when there is still pending I/O, the actual disconnect
> is done by the last reference holder (last pending I/O request). In this
> case, xenwatch doesn't block indefinitely.
>
> Signed-off-by: Valentin Priescu <priescuv@amazon.com>
> Reviewed-by: Steven Kady <stevkady@amazon.com>
> Reviewed-by: Steven Noonan <snoonan@amazon.com>
> Cc: Matt Wilson <msw@amazon.com>
> ---
> drivers/block/xen-blkback/common.h | 4 +--
> drivers/block/xen-blkback/xenbus.c | 60 ++++++++++++++++++++++++++------------
> 2 files changed, 43 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
> index be05277..f65b807 100644
> --- a/drivers/block/xen-blkback/common.h
> +++ b/drivers/block/xen-blkback/common.h
> @@ -314,7 +314,7 @@ struct xen_blkif {
> unsigned long long st_rd_sect;
> unsigned long long st_wr_sect;
>
> - wait_queue_head_t waiting_to_free;
> + struct work_struct free_work;
> /* Thread shutdown wait queue. */
> wait_queue_head_t shutdown_wq;
> };
> @@ -361,7 +361,7 @@ struct pending_req {
> #define xen_blkif_put(_b) \
> do { \
> if (atomic_dec_and_test(&(_b)->refcnt)) \
> - wake_up(&(_b)->waiting_to_free);\
> + schedule_work(&(_b)->free_work);\
> } while (0)
>
> struct phys_req {
> diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
> index 9a547e6..7b6124e 100644
> --- a/drivers/block/xen-blkback/xenbus.c
> +++ b/drivers/block/xen-blkback/xenbus.c
> @@ -35,12 +35,26 @@ static void connect(struct backend_info *);
> static int connect_ring(struct backend_info *);
> static void backend_changed(struct xenbus_watch *, const char **,
> unsigned int);
> +static void xen_blkif_free(struct xen_blkif *blkif);
> +static void xen_vbd_free(struct xen_vbd *vbd);
>
> struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be)
> {
> return be->dev;
> }
>
> +/*
> + * The last request could free the device from softirq context and
> + * xen_blkif_free() can sleep.
> + */
> +static void xen_blkif_deferred_free(struct work_struct *work)
> +{
> + struct xen_blkif *blkif;
> +
> + blkif = container_of(work, struct xen_blkif, free_work);
> + xen_blkif_free(blkif);
> +}
> +
> static int blkback_name(struct xen_blkif *blkif, char *buf)
> {
> char *devpath, *devname;
> @@ -121,7 +135,6 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
> init_completion(&blkif->drain_complete);
> atomic_set(&blkif->drain, 0);
> blkif->st_print = jiffies;
> - init_waitqueue_head(&blkif->waiting_to_free);
> blkif->persistent_gnts.rb_node = NULL;
> spin_lock_init(&blkif->free_pages_lock);
> INIT_LIST_HEAD(&blkif->free_pages);
> @@ -132,6 +145,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
> INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants);
>
> INIT_LIST_HEAD(&blkif->pending_free);
> + INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
>
> for (i = 0; i < XEN_BLKIF_REQS; i++) {
> req = kzalloc(sizeof(*req), GFP_KERNEL);
> @@ -231,7 +245,7 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
> return 0;
> }
>
> -static void xen_blkif_disconnect(struct xen_blkif *blkif)
> +static int xen_blkif_disconnect(struct xen_blkif *blkif)
> {
> if (blkif->xenblkd) {
> kthread_stop(blkif->xenblkd);
> @@ -239,19 +253,26 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
> blkif->xenblkd = NULL;
> }
>
> - atomic_dec(&blkif->refcnt);
> - wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
> - atomic_inc(&blkif->refcnt);
> + /* The above kthread_stop() guarantees that at this point we
> + * don't have any discard_io or other_io requests. So, checking
> + * for inflight IO is enough.
> + */
> + if (!atomic_read(&blkif->inflight)) {
> + if (blkif->irq) {
> + unbind_from_irqhandler(blkif->irq, blkif);
> + blkif->irq = 0;
> + }
>
> - if (blkif->irq) {
> - unbind_from_irqhandler(blkif->irq, blkif);
> - blkif->irq = 0;
> - }
> + if (blkif->blk_rings.common.sring) {
> + xenbus_unmap_ring_vfree(blkif->be->dev,
> + blkif->blk_ring);
> + blkif->blk_rings.common.sring = NULL;
> + }
>
> - if (blkif->blk_rings.common.sring) {
> - xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
> - blkif->blk_rings.common.sring = NULL;
> + return 0;
> }
> +
> + return -EBUSY;
> }
>
> static void xen_blkif_free(struct xen_blkif *blkif)
> @@ -259,8 +280,8 @@ static void xen_blkif_free(struct xen_blkif *blkif)
> struct pending_req *req, *n;
> int i = 0, j;
>
> - if (!atomic_dec_and_test(&blkif->refcnt))
> - BUG();
> + xen_blkif_disconnect(blkif);
> + xen_vbd_free(&blkif->vbd);
>
> /* Remove all persistent grants and the cache of ballooned pages. */
> xen_blkbk_free_caches(blkif);
> @@ -449,16 +470,15 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
> be->backend_watch.node = NULL;
> }
>
> + dev_set_drvdata(&dev->dev, NULL);
> +
> if (be->blkif) {
> xen_blkif_disconnect(be->blkif);
> - xen_vbd_free(&be->blkif->vbd);
> - xen_blkif_free(be->blkif);
> - be->blkif = NULL;
> + xen_blkif_put(be->blkif);
> }
>
> kfree(be->mode);
> kfree(be);
> - dev_set_drvdata(&dev->dev, NULL);
> return 0;
> }
>
> @@ -700,7 +720,9 @@ static void frontend_changed(struct xenbus_device *dev,
> * Enforce precondition before potential leak point.
> * xen_blkif_disconnect() is idempotent.
> */
> - xen_blkif_disconnect(be->blkif);
> + err = xen_blkif_disconnect(be->blkif);
> + if (err)
> + break;
>
> err = connect_ring(be);
> if (err)
next prev parent reply other threads:[~2014-05-12 16:40 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-05-09 23:34 [PATCH] xen-blkback: defer freeing blkif to avoid blocking xenwatch Valentin Priescu
2014-05-09 23:56 ` Matt Wilson
2014-05-12 6:42 ` Vasiliy Tolstov
2014-05-12 8:59 ` Valentin Priescu
2014-05-12 9:12 ` Alexey
2014-05-12 10:50 ` Valentin Priescu
2014-05-12 9:39 ` David Vrabel
2014-05-12 10:41 ` Valentin Priescu
2014-05-12 10:43 ` David Vrabel
2014-05-12 10:47 ` Valentin Priescu
2014-05-12 16:40 ` Boris Ostrovsky [this message]
2014-05-12 17:04 ` Valentin Priescu
2014-05-12 17:50 ` Boris Ostrovsky
2014-05-12 19:25 ` [PATCH v2] " Valentin Priescu
2014-05-13 9:32 ` David Vrabel
2014-05-13 17:00 ` Vasiliy Tolstov
2014-05-13 17:32 ` Valentin Priescu
2014-05-13 17:39 ` Vasiliy Tolstov
2014-05-16 8:37 ` Valentin Priescu
2014-06-02 16:20 ` Matt Wilson
2014-05-20 20:28 ` [PATCH v3] " Valentin Priescu
2014-05-22 9:11 ` Vasiliy Tolstov
2014-05-22 10:16 ` Valentin Priescu
2014-06-03 8:07 ` Vasiliy Tolstov
2014-06-03 9:59 ` Valentin Priescu
2014-06-02 16:13 ` Valentin Priescu
2014-06-02 16:56 ` Konrad Rzeszutek Wilk
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5370F98C.9020601@oracle.com \
--to=boris.ostrovsky@oracle.com \
--cc=msw@amazon.com \
--cc=priescuv@amazon.com \
--cc=tulinizer@gmail.com \
--cc=vali.priescu@gmail.com \
--cc=xen-devel@lists.xensource.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.