From mboxrd@z Thu Jan 1 00:00:00 1970 From: hch@lst.de (Christoph Hellwig) Date: Thu, 4 Jan 2018 11:04:59 +0100 Subject: [PATCH v2 rfc] nvme-core/loop/rdma: Host delete_work and reset_work on separate workqueues In-Reply-To: <20171231120636.30593-1-sagi@grimberg.me> References: <20171231120636.30593-1-sagi@grimberg.me> Message-ID: <20180104100459.GD4436@lst.de> Looks fine to me. On Sun, Dec 31, 2017@02:06:36PM +0200, Sagi Grimberg wrote: > From: Roy Shterman > > We need to ensure that delete_work will be hosted on a different > workqueue than all the works we flush or cancel from it. > Otherwise we may hit a circular dependency warning [1]. > > Also, given that delete_work flushes reset_work, host reset_work > on nvme_reset_wq and delete_work on nvme_delete_wq. In addition, > fix the flushing in the individual drivers to flush nvme_delete_wq > when draining queued deletes. > > [1]: > [ 178.491942] ============================================= > [ 178.492718] [ INFO: possible recursive locking detected ] > [ 178.493495] 4.9.0-rc4-c844263313a8-lb #3 Tainted: G OE > [ 178.494382] --------------------------------------------- > [ 178.495160] kworker/5:1/135 is trying to acquire lock: > [ 178.495894] ( > [ 178.496120] "nvme-wq" > [ 178.496471] ){++++.+} > [ 178.496599] , at: > [ 178.496921] [] flush_work+0x1a6/0x2d0 > [ 178.497670] > but task is already holding lock: > [ 178.498499] ( > [ 178.498724] "nvme-wq" > [ 178.499074] ){++++.+} > [ 178.499202] , at: > [ 178.499520] [] process_one_work+0x162/0x6a0 > [ 178.500343] > other info that might help us debug this: > [ 178.501269] Possible unsafe locking scenario: > > [ 178.502113] CPU0 > [ 178.502472] ---- > [ 178.502829] lock( > [ 178.503115] "nvme-wq" > [ 178.503467] ); > [ 178.503716] lock( > [ 178.504001] "nvme-wq" > [ 178.504353] ); > [ 178.504601] > *** DEADLOCK *** > > [ 178.505441] May be due to missing lock nesting notation > > [ 178.506453] 2 locks held by kworker/5:1/135: > [ 178.507068] #0: > [ 178.507330] ( > [ 178.507598] "nvme-wq" > [ 178.507726] ){++++.+} > [ 178.508079] , at: > [ 178.508173] [] process_one_work+0x162/0x6a0 > [ 178.509004] #1: > [ 178.509265] ( > [ 178.509532] (&ctrl->delete_work) > [ 178.509795] ){+.+.+.} > [ 178.510145] , at: > [ 178.510239] [] process_one_work+0x162/0x6a0 > [ 178.511070] > stack backtrace: > : > [ 178.511693] CPU: 5 PID: 135 Comm: kworker/5:1 Tainted: G OE 4.9.0-rc4-c844263313a8-lb #3 > [ 178.512974] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.1-1ubuntu1 04/01/2014 > [ 178.514247] Workqueue: nvme-wq nvme_del_ctrl_work [nvme_tcp] > [ 178.515071] ffffc2668175bae0 ffffffffa7450823 ffffffffa88abd80 ffffffffa88abd80 > [ 178.516195] ffffc2668175bb98 ffffffffa70eb012 ffffffffa8d8d90d ffff9c472e9ea700 > [ 178.517318] ffff9c472e9ea700 ffff9c4700000000 ffff9c4700007200 ab83be61bec0d50e > [ 178.518443] Call Trace: > [ 178.518807] [] dump_stack+0x85/0xc2 > [ 178.519542] [] __lock_acquire+0x17d2/0x18f0 > [ 178.520377] [] ? serial8250_console_putchar+0x27/0x30 > [ 178.521330] [] ? wait_for_xmitr+0xa0/0xa0 > [ 178.522174] [] ? flush_work+0x18b/0x2d0 > [ 178.522975] [] lock_acquire+0x11b/0x220 > [ 178.523753] [] ? flush_work+0x1a6/0x2d0 > [ 178.524535] [] flush_work+0x1c9/0x2d0 > [ 178.525291] [] ? flush_work+0x1a6/0x2d0 > [ 178.526077] [] ? flush_workqueue_prep_pwqs+0x220/0x220 > [ 178.527040] [] __cancel_work_timer+0x10f/0x1d0 > [ 178.527907] [] ? vprintk_default+0x29/0x40 > [ 178.528726] [] ? printk+0x48/0x50 > [ 178.529434] [] cancel_delayed_work_sync+0x13/0x20 > [ 178.530381] [] nvme_stop_ctrl+0x5b/0x70 [nvme_core] > [ 178.531314] [] nvme_del_ctrl_work+0x2c/0x50 [nvme_tcp] > [ 178.532271] [] process_one_work+0x1e1/0x6a0 > [ 178.533101] [] ? process_one_work+0x162/0x6a0 > [ 178.533954] [] worker_thread+0x4e/0x490 > [ 178.534735] [] ? process_one_work+0x6a0/0x6a0 > [ 178.535588] [] ? process_one_work+0x6a0/0x6a0 > [ 178.536441] [] kthread+0xff/0x120 > [ 178.537149] [] ? kthread_park+0x60/0x60 > [ 178.538094] [] ? kthread_park+0x60/0x60 > [ 178.538900] [] ret_from_fork+0x2a/0x40 > > Signed-off-by: Roy Shterman > Signed-off-by: Sagi Grimberg > --- > Changes from v1: > - Introduced nvme_reset_wq and nvme_delete_wq memreclaim enabled workqueues > - Added documentation on the roles of nvme private workqueues > > drivers/nvme/host/core.c | 44 +++++++++++++++++++++++++++++++++++++++----- > drivers/nvme/host/nvme.h | 2 ++ > drivers/nvme/host/rdma.c | 2 +- > drivers/nvme/target/loop.c | 2 +- > 4 files changed, 43 insertions(+), 7 deletions(-) > > diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c > index 1e46e60b8f10..e4b9960a5b07 100644 > --- a/drivers/nvme/host/core.c > +++ b/drivers/nvme/host/core.c > @@ -65,9 +65,26 @@ static bool streams; > module_param(streams, bool, 0644); > MODULE_PARM_DESC(streams, "turn on support for Streams write directives"); > > +/* > + * nvme_wq - hosts nvme related works that are not reset or delete > + * nvme_reset_wq - hosts nvme reset works > + * nvme_delete_wq - hosts nvme delete works > + * > + * nvme_wq will host works such are scan, aen handling, fw activation, > + * keep-alive error recovery, periodic reconnects etc. nvme_reset_wq > + * runs reset works which also flush works hosted on nvme_wq for > + * serialization purposes. nvme_delete_wq host controller deletion > + * works which flush reset works for serialization. > + */ > struct workqueue_struct *nvme_wq; > EXPORT_SYMBOL_GPL(nvme_wq); > > +struct workqueue_struct *nvme_reset_wq; > +EXPORT_SYMBOL_GPL(nvme_reset_wq); > + > +struct workqueue_struct *nvme_delete_wq; > +EXPORT_SYMBOL_GPL(nvme_delete_wq); > + > static DEFINE_IDA(nvme_subsystems_ida); > static LIST_HEAD(nvme_subsystems); > static DEFINE_MUTEX(nvme_subsystems_lock); > @@ -89,7 +106,7 @@ int nvme_reset_ctrl(struct nvme_ctrl *ctrl) > { > if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) > return -EBUSY; > - if (!queue_work(nvme_wq, &ctrl->reset_work)) > + if (!queue_work(nvme_reset_wq, &ctrl->reset_work)) > return -EBUSY; > return 0; > } > @@ -122,7 +139,7 @@ int nvme_delete_ctrl(struct nvme_ctrl *ctrl) > { > if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING)) > return -EBUSY; > - if (!queue_work(nvme_wq, &ctrl->delete_work)) > + if (!queue_work(nvme_delete_wq, &ctrl->delete_work)) > return -EBUSY; > return 0; > } > @@ -3472,16 +3489,26 @@ EXPORT_SYMBOL_GPL(nvme_reinit_tagset); > > int __init nvme_core_init(void) > { > - int result; > + int result = -ENOMEM; > > nvme_wq = alloc_workqueue("nvme-wq", > WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); > if (!nvme_wq) > - return -ENOMEM; > + goto out; > + > + nvme_reset_wq = alloc_workqueue("nvme-reset-wq", > + WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); > + if (!nvme_reset_wq) > + goto destroy_wq; > + > + nvme_delete_wq = alloc_workqueue("nvme-delete-wq", > + WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); > + if (!nvme_delete_wq) > + goto destroy_reset_wq; > > result = alloc_chrdev_region(&nvme_chr_devt, 0, NVME_MINORS, "nvme"); > if (result < 0) > - goto destroy_wq; > + goto destroy_delete_wq; > > nvme_class = class_create(THIS_MODULE, "nvme"); > if (IS_ERR(nvme_class)) { > @@ -3500,8 +3527,13 @@ int __init nvme_core_init(void) > class_destroy(nvme_class); > unregister_chrdev: > unregister_chrdev_region(nvme_chr_devt, NVME_MINORS); > +destroy_delete_wq: > + destroy_workqueue(nvme_delete_wq); > +destroy_reset_wq: > + destroy_workqueue(nvme_reset_wq); > destroy_wq: > destroy_workqueue(nvme_wq); > +out: > return result; > } > > @@ -3511,6 +3543,8 @@ void nvme_core_exit(void) > class_destroy(nvme_subsys_class); > class_destroy(nvme_class); > unregister_chrdev_region(nvme_chr_devt, NVME_MINORS); > + destroy_workqueue(nvme_delete_wq); > + destroy_workqueue(nvme_reset_wq); > destroy_workqueue(nvme_wq); > } > > diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h > index ea1aa5283e8e..3cddb73f563e 100644 > --- a/drivers/nvme/host/nvme.h > +++ b/drivers/nvme/host/nvme.h > @@ -32,6 +32,8 @@ extern unsigned int admin_timeout; > #define NVME_KATO_GRACE 10 > > extern struct workqueue_struct *nvme_wq; > +extern struct workqueue_struct *nvme_reset_wq; > +extern struct workqueue_struct *nvme_delete_wq; > > enum { > NVME_NS_LBA = 0, > diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c > index 37af56596be6..75623e783bcd 100644 > --- a/drivers/nvme/host/rdma.c > +++ b/drivers/nvme/host/rdma.c > @@ -2028,7 +2028,7 @@ static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data) > } > mutex_unlock(&nvme_rdma_ctrl_mutex); > > - flush_workqueue(nvme_wq); > + flush_workqueue(nvme_delete_wq); > } > > static struct ib_client nvme_rdma_ib_client = { > diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c > index 1e21b286f299..5d8054f9a412 100644 > --- a/drivers/nvme/target/loop.c > +++ b/drivers/nvme/target/loop.c > @@ -716,7 +716,7 @@ static void __exit nvme_loop_cleanup_module(void) > nvme_delete_ctrl(&ctrl->ctrl); > mutex_unlock(&nvme_loop_ctrl_mutex); > > - flush_workqueue(nvme_wq); > + flush_workqueue(nvme_delete_wq); > } > > module_init(nvme_loop_init_module); > -- > 2.14.1 ---end quoted text---