From mboxrd@z Thu Jan 1 00:00:00 1970 From: Valentin Priescu Subject: Re: [PATCH] xen-blkback: defer freeing blkif to avoid blocking xenwatch Date: Mon, 12 May 2014 10:59:03 +0200 Message-ID: <53708D57.9080509@gmail.com> References: <1399678479-14758-1-git-send-email-vali.priescu@gmail.com> <20140509235653.GA22316@u109add4315675089e695.ant.amazon.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii"; Format="flowed" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Vasiliy Tolstov , Matt Wilson Cc: xen-devel@lists.xensource.com, Matt Wilson , Valentin Priescu List-Id: xen-devel@lists.xenproject.org Yes, this patch applies to all kernels >= v3.14 (including the latest stable). On 05/12/2014 08:42 AM, Vasiliy Tolstov wrote: > As i understand this is needed for stable too? > > 2014-05-10 3:56 GMT+04:00 Matt Wilson : >> On Sat, May 10, 2014 at 01:34:39AM +0200, Valentin Priescu wrote: >>> From: Valentin Priescu >>> >>> Currently xenwatch blocks in VBD disconnect, waiting for all pending I/O >>> requests to finish. If the VBD is attached to a hot-swappable disk, then >>> xenwatch can hang for a long period of time, stalling other watches. >>> >>> INFO: task xenwatch:39 blocked for more than 120 seconds. >>> "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. >>> ffff880057f01bd0 0000000000000246 ffff880057f01ac0 ffffffff810b0782 >>> ffff880057f01ad0 00000000000131c0 0000000000000004 ffff880057edb040 >>> ffff8800344c6080 0000000000000000 ffff880058c00ba0 ffff880057edb040 >>> Call Trace: >>> [] ? irq_to_desc+0x12/0x20 >>> [] ? list_del+0x11/0x40 >>> [] ? wait_for_common+0x60/0x160 >>> [] ? _raw_spin_lock_irqsave+0x2f/0x50 >>> [] ? _raw_spin_unlock_irqrestore+0x19/0x20 >>> [] schedule+0x3a/0x60 >>> [] xen_blkif_disconnect+0x8a/0x100 [xen_blkback] >>> [] ? wake_up_bit+0x40/0x40 >>> [] xen_blkbk_remove+0xae/0x1e0 [xen_blkback] >>> [] xenbus_dev_remove+0x44/0x90 >>> [] __device_release_driver+0x77/0xd0 >>> [] device_release_driver+0x28/0x40 >>> [] bus_remove_device+0x78/0xe0 >>> [] device_del+0x12f/0x1a0 >>> [] device_unregister+0x1d/0x60 >>> [] frontend_changed+0xa6/0x4d0 [xen_blkback] >>> [] ? frontend_changed+0x192/0x650 [xen_netback] >>> [] ? cmp_dev+0x60/0x60 >>> [] ? bus_for_each_dev+0x94/0xa0 >>> [] xenbus_otherend_changed+0xbe/0x120 >>> [] frontend_changed+0xb/0x10 >>> [] xenwatch_thread+0xf2/0x130 >>> [] ? wake_up_bit+0x40/0x40 >>> [] ? xenbus_directory+0x80/0x80 >>> [] kthread+0x96/0xa0 >>> [] kernel_thread_helper+0x4/0x10 >>> [] ? int_ret_from_sys_call+0x7/0x1b >>> [] ? retint_restore_args+0x5/0x6 >>> [] ? gs_change+0x13/0x13 >>> >>> With this patch, when there is still pending I/O, the actual disconnect >>> is done by the last reference holder (last pending I/O request). In this >>> case, xenwatch doesn't block indefinitely. >>> >>> Signed-off-by: Valentin Priescu >>> Reviewed-by: Steven Kady >>> Reviewed-by: Steven Noonan >>> Cc: Matt Wilson >> >> Acked-by: Matt Wilson >> >>> --- >>> drivers/block/xen-blkback/common.h | 4 +-- >>> drivers/block/xen-blkback/xenbus.c | 60 ++++++++++++++++++++++++++------------ >>> 2 files changed, 43 insertions(+), 21 deletions(-) >>> >>> diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h >>> index be05277..f65b807 100644 >>> --- a/drivers/block/xen-blkback/common.h >>> +++ b/drivers/block/xen-blkback/common.h >>> @@ -314,7 +314,7 @@ struct xen_blkif { >>> unsigned long long st_rd_sect; >>> unsigned long long st_wr_sect; >>> >>> - wait_queue_head_t waiting_to_free; >>> + struct work_struct free_work; >>> /* Thread shutdown wait queue. */ >>> wait_queue_head_t shutdown_wq; >>> }; >>> @@ -361,7 +361,7 @@ struct pending_req { >>> #define xen_blkif_put(_b) \ >>> do { \ >>> if (atomic_dec_and_test(&(_b)->refcnt)) \ >>> - wake_up(&(_b)->waiting_to_free);\ >>> + schedule_work(&(_b)->free_work);\ >>> } while (0) >>> >>> struct phys_req { >>> diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c >>> index 9a547e6..7b6124e 100644 >>> --- a/drivers/block/xen-blkback/xenbus.c >>> +++ b/drivers/block/xen-blkback/xenbus.c >>> @@ -35,12 +35,26 @@ static void connect(struct backend_info *); >>> static int connect_ring(struct backend_info *); >>> static void backend_changed(struct xenbus_watch *, const char **, >>> unsigned int); >>> +static void xen_blkif_free(struct xen_blkif *blkif); >>> +static void xen_vbd_free(struct xen_vbd *vbd); >>> >>> struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be) >>> { >>> return be->dev; >>> } >>> >>> +/* >>> + * The last request could free the device from softirq context and >>> + * xen_blkif_free() can sleep. >>> + */ >>> +static void xen_blkif_deferred_free(struct work_struct *work) >>> +{ >>> + struct xen_blkif *blkif; >>> + >>> + blkif = container_of(work, struct xen_blkif, free_work); >>> + xen_blkif_free(blkif); >>> +} >>> + >>> static int blkback_name(struct xen_blkif *blkif, char *buf) >>> { >>> char *devpath, *devname; >>> @@ -121,7 +135,6 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) >>> init_completion(&blkif->drain_complete); >>> atomic_set(&blkif->drain, 0); >>> blkif->st_print = jiffies; >>> - init_waitqueue_head(&blkif->waiting_to_free); >>> blkif->persistent_gnts.rb_node = NULL; >>> spin_lock_init(&blkif->free_pages_lock); >>> INIT_LIST_HEAD(&blkif->free_pages); >>> @@ -132,6 +145,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) >>> INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants); >>> >>> INIT_LIST_HEAD(&blkif->pending_free); >>> + INIT_WORK(&blkif->free_work, xen_blkif_deferred_free); >>> >>> for (i = 0; i < XEN_BLKIF_REQS; i++) { >>> req = kzalloc(sizeof(*req), GFP_KERNEL); >>> @@ -231,7 +245,7 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, >>> return 0; >>> } >>> >>> -static void xen_blkif_disconnect(struct xen_blkif *blkif) >>> +static int xen_blkif_disconnect(struct xen_blkif *blkif) >>> { >>> if (blkif->xenblkd) { >>> kthread_stop(blkif->xenblkd); >>> @@ -239,19 +253,26 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif) >>> blkif->xenblkd = NULL; >>> } >>> >>> - atomic_dec(&blkif->refcnt); >>> - wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); >>> - atomic_inc(&blkif->refcnt); >>> + /* The above kthread_stop() guarantees that at this point we >>> + * don't have any discard_io or other_io requests. So, checking >>> + * for inflight IO is enough. >>> + */ >>> + if (!atomic_read(&blkif->inflight)) { >>> + if (blkif->irq) { >>> + unbind_from_irqhandler(blkif->irq, blkif); >>> + blkif->irq = 0; >>> + } >>> >>> - if (blkif->irq) { >>> - unbind_from_irqhandler(blkif->irq, blkif); >>> - blkif->irq = 0; >>> - } >>> + if (blkif->blk_rings.common.sring) { >>> + xenbus_unmap_ring_vfree(blkif->be->dev, >>> + blkif->blk_ring); >>> + blkif->blk_rings.common.sring = NULL; >>> + } >>> >>> - if (blkif->blk_rings.common.sring) { >>> - xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring); >>> - blkif->blk_rings.common.sring = NULL; >>> + return 0; >>> } >>> + >>> + return -EBUSY; >>> } >>> >>> static void xen_blkif_free(struct xen_blkif *blkif) >>> @@ -259,8 +280,8 @@ static void xen_blkif_free(struct xen_blkif *blkif) >>> struct pending_req *req, *n; >>> int i = 0, j; >>> >>> - if (!atomic_dec_and_test(&blkif->refcnt)) >>> - BUG(); >>> + xen_blkif_disconnect(blkif); >>> + xen_vbd_free(&blkif->vbd); >>> >>> /* Remove all persistent grants and the cache of ballooned pages. */ >>> xen_blkbk_free_caches(blkif); >>> @@ -449,16 +470,15 @@ static int xen_blkbk_remove(struct xenbus_device *dev) >>> be->backend_watch.node = NULL; >>> } >>> >>> + dev_set_drvdata(&dev->dev, NULL); >>> + >>> if (be->blkif) { >>> xen_blkif_disconnect(be->blkif); >>> - xen_vbd_free(&be->blkif->vbd); >>> - xen_blkif_free(be->blkif); >>> - be->blkif = NULL; >>> + xen_blkif_put(be->blkif); >>> } >>> >>> kfree(be->mode); >>> kfree(be); >>> - dev_set_drvdata(&dev->dev, NULL); >>> return 0; >>> } >>> >>> @@ -700,7 +720,9 @@ static void frontend_changed(struct xenbus_device *dev, >>> * Enforce precondition before potential leak point. >>> * xen_blkif_disconnect() is idempotent. >>> */ >>> - xen_blkif_disconnect(be->blkif); >>> + err = xen_blkif_disconnect(be->blkif); >>> + if (err) >>> + break; >>> >>> err = connect_ring(be); >>> if (err) >>