* [PATCH v2 1/2] block: Add a third argument to blk_alloc_queue_node() [not found] <20180131235300.12773-1-bart.vanassche@wdc.com> @ 2018-01-31 23:52 ` Bart Van Assche 2018-01-31 23:53 ` [PATCH v2 2/2] block: Fix a race between the throttling code and request queue initialization Bart Van Assche 1 sibling, 0 replies; 8+ messages in thread From: Bart Van Assche @ 2018-01-31 23:52 UTC (permalink / raw) To: Jens Axboe Cc: linux-block, Christoph Hellwig, Bart Van Assche, Joseph Qi, Philipp Reisner, Ulf Hansson, Kees Cook, stable This patch does not change any functionality. Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Joseph Qi <joseph.qi@linux.alibaba.com> Cc: Philipp Reisner <philipp.reisner@linbit.com> Cc: Ulf Hansson <ulf.hansson@linaro.org> Cc: Kees Cook <keescook@chromium.org> Cc: <stable@vger.kernel.org> --- block/blk-core.c | 7 ++++--- block/blk-mq.c | 2 +- drivers/block/null_blk.c | 3 ++- drivers/ide/ide-probe.c | 2 +- drivers/lightnvm/core.c | 2 +- drivers/md/dm.c | 2 +- drivers/nvdimm/pmem.c | 2 +- drivers/nvme/host/multipath.c | 2 +- drivers/scsi/scsi_lib.c | 2 +- include/linux/blkdev.h | 3 ++- 10 files changed, 15 insertions(+), 12 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index bd43bc50740a..860a039fd1a8 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -868,7 +868,7 @@ void blk_exit_rl(struct request_queue *q, struct request_list *rl) struct request_queue *blk_alloc_queue(gfp_t gfp_mask) { - return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE); + return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE, NULL); } EXPORT_SYMBOL(blk_alloc_queue); @@ -946,7 +946,8 @@ static void blk_rq_timed_out_timer(struct timer_list *t) kblockd_schedule_work(&q->timeout_work); } -struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) +struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, + spinlock_t *lock) { struct request_queue *q; @@ -1088,7 +1089,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) { struct request_queue *q; - q = blk_alloc_queue_node(GFP_KERNEL, node_id); + q = blk_alloc_queue_node(GFP_KERNEL, node_id, NULL); if (!q) return NULL; diff --git a/block/blk-mq.c b/block/blk-mq.c index aacc5280e25f..8191391d1a1d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2554,7 +2554,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) { struct request_queue *uninit_q, *q; - uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node); + uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node, NULL); if (!uninit_q) return ERR_PTR(-ENOMEM); diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index aa1c7d4bcac5..9fe8f2a3ec45 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -1717,7 +1717,8 @@ static int null_add_dev(struct nullb_device *dev) } null_init_queues(nullb); } else if (dev->queue_mode == NULL_Q_BIO) { - nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node); + nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node, + NULL); if (!nullb->q) { rv = -ENOMEM; goto out_cleanup_queues; diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 70d6d8ff0fd9..1303d0e31e80 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -766,7 +766,7 @@ static int ide_init_queue(ide_drive_t *drive) * limits and LBA48 we could raise it but as yet * do not. */ - q = blk_alloc_queue_node(GFP_KERNEL, hwif_to_node(hwif)); + q = blk_alloc_queue_node(GFP_KERNEL, hwif_to_node(hwif), NULL); if (!q) return 1; diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index dcc9e621e651..5f1988df1593 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -384,7 +384,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) goto err_dev; } - tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node); + tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node, NULL); if (!tqueue) { ret = -ENOMEM; goto err_disk; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 5c478c185041..93f3ef15b4b2 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1731,7 +1731,7 @@ static struct mapped_device *alloc_dev(int minor) INIT_LIST_HEAD(&md->table_devices); spin_lock_init(&md->uevent_lock); - md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id); + md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id, NULL); if (!md->queue) goto bad; diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 49285701fe48..118b4b13592d 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -342,7 +342,7 @@ static int pmem_attach_disk(struct device *dev, return -EBUSY; } - q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev)); + q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev), NULL); if (!q) return -ENOMEM; diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 605f53376e94..4e362949721a 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -162,7 +162,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath) return 0; - q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE); + q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL); if (!q) goto out; q->queuedata = head; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 3fcf5c7c7917..799d02615e71 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2206,7 +2206,7 @@ struct request_queue *scsi_old_alloc_queue(struct scsi_device *sdev) struct Scsi_Host *shost = sdev->host; struct request_queue *q; - q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE); + q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL); if (!q) return NULL; q->cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 781992c4124e..f1f3ad6419f1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1330,7 +1330,8 @@ extern long nr_blockdev_pages(void); bool __must_check blk_get_queue(struct request_queue *); struct request_queue *blk_alloc_queue(gfp_t); -struct request_queue *blk_alloc_queue_node(gfp_t, int); +struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, + spinlock_t *lock); extern void blk_put_queue(struct request_queue *); extern void blk_set_queue_dying(struct request_queue *); -- 2.16.0 ^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH v2 2/2] block: Fix a race between the throttling code and request queue initialization [not found] <20180131235300.12773-1-bart.vanassche@wdc.com> 2018-01-31 23:52 ` [PATCH v2 1/2] block: Add a third argument to blk_alloc_queue_node() Bart Van Assche @ 2018-01-31 23:53 ` Bart Van Assche 2018-02-01 1:53 ` Joseph Qi 1 sibling, 1 reply; 8+ messages in thread From: Bart Van Assche @ 2018-01-31 23:53 UTC (permalink / raw) To: Jens Axboe Cc: linux-block, Christoph Hellwig, Bart Van Assche, Joseph Qi, Philipp Reisner, Ulf Hansson, Kees Cook, stable Initialize the request queue lock earlier such that the following race can no longer occur: blk_init_queue_node blkcg_print_blkgs blk_alloc_queue_node (1) q->queue_lock = &q->__queue_lock (2) blkcg_init_queue(q) (3) spin_lock_irq(blkg->q->queue_lock) (4) q->queue_lock = lock (5) spin_unlock_irq(blkg->q->queue_lock) (6) (1) allocate an uninitialized queue; (2) initialize queue_lock to its default internal lock; (3) initialize blkcg part of request queue, which will create blkg and then insert it to blkg_list; (4) traverse blkg_list and find the created blkg, and then take its queue lock, here it is the default *internal lock*; (5) *race window*, now queue_lock is overridden with *driver specified lock*; (6) now unlock *driver specified lock*, not the locked *internal lock*, unlock balance breaks. The changes in this patch are as follows: - Move the .queue_lock initialization from blk_init_queue_node() into blk_alloc_queue_node(). - For all all block drivers that initialize .queue_lock explicitly, change the blk_alloc_queue() call in the driver into a blk_alloc_queue_node() call and remove the explicit .queue_lock initialization. Additionally, initialize the spin lock that will be used as queue lock earlier if necessary. Reported-by: Joseph Qi <joseph.qi@linux.alibaba.com> Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Joseph Qi <joseph.qi@linux.alibaba.com> Cc: Philipp Reisner <philipp.reisner@linbit.com> Cc: Ulf Hansson <ulf.hansson@linaro.org> Cc: Kees Cook <keescook@chromium.org> Cc: <stable@vger.kernel.org> --- block/blk-core.c | 24 ++++++++++++++++-------- drivers/block/drbd/drbd_main.c | 3 +-- drivers/block/umem.c | 7 +++---- drivers/mmc/core/queue.c | 3 +-- 4 files changed, 21 insertions(+), 16 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 860a039fd1a8..c2c81c5b7420 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -946,6 +946,20 @@ static void blk_rq_timed_out_timer(struct timer_list *t) kblockd_schedule_work(&q->timeout_work); } +/** + * blk_alloc_queue_node - allocate a request queue + * @gfp_mask: memory allocation flags + * @node_id: NUMA node to allocate memory from + * @lock: Pointer to a spinlock that will be used to e.g. serialize calls to + * the legacy .request_fn(). Only set this pointer for queues that use + * legacy mode and not for queues that use blk-mq. + * + * Note: pass the queue lock as the third argument to this function instead of + * setting the queue lock pointer explicitly to avoid triggering a crash in + * the blkcg throttling code. That code namely makes sysfs attributes visible + * in user space before this function returns and the show methods of these + * sysfs attributes use the queue lock. + */ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, spinlock_t *lock) { @@ -998,11 +1012,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, mutex_init(&q->sysfs_lock); spin_lock_init(&q->__queue_lock); - /* - * By default initialize queue_lock to internal lock and driver can - * override it later if need be. - */ - q->queue_lock = &q->__queue_lock; + q->queue_lock = lock ? : &q->__queue_lock; /* * A queue starts its life with bypass turned on to avoid @@ -1089,13 +1099,11 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) { struct request_queue *q; - q = blk_alloc_queue_node(GFP_KERNEL, node_id, NULL); + q = blk_alloc_queue_node(GFP_KERNEL, node_id, lock); if (!q) return NULL; q->request_fn = rfn; - if (lock) - q->queue_lock = lock; if (blk_init_allocated_queue(q) < 0) { blk_cleanup_queue(q); return NULL; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 4b4697a1f963..058247bc2f30 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2822,7 +2822,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig drbd_init_set_defaults(device); - q = blk_alloc_queue(GFP_KERNEL); + q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, &resource->req_lock); if (!q) goto out_no_q; device->rq_queue = q; @@ -2854,7 +2854,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig /* Setting the max_hw_sectors to an odd value of 8kibyte here This triggers a max_bio_size message upon first attach or connect */ blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8); - q->queue_lock = &resource->req_lock; device->md_io.page = alloc_page(GFP_KERNEL); if (!device->md_io.page) diff --git a/drivers/block/umem.c b/drivers/block/umem.c index 8077123678ad..5c7fb8cc4149 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -888,13 +888,14 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) card->Active = -1; /* no page is active */ card->bio = NULL; card->biotail = &card->bio; + spin_lock_init(&card->lock); - card->queue = blk_alloc_queue(GFP_KERNEL); + card->queue = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, + &card->lock); if (!card->queue) goto failed_alloc; blk_queue_make_request(card->queue, mm_make_request); - card->queue->queue_lock = &card->lock; card->queue->queuedata = card; tasklet_init(&card->tasklet, process_page, (unsigned long)card); @@ -968,8 +969,6 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) dev_printk(KERN_INFO, &card->dev->dev, "Window size %d bytes, IRQ %d\n", data, dev->irq); - spin_lock_init(&card->lock); - pci_set_drvdata(dev, card); if (pci_write_cmd != 0x0F) /* If not Memory Write & Invalidate */ diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 5ecd54088988..bcf6ae03fa97 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -216,10 +216,9 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, int ret = -ENOMEM; mq->card = card; - mq->queue = blk_alloc_queue(GFP_KERNEL); + mq->queue = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, lock); if (!mq->queue) return -ENOMEM; - mq->queue->queue_lock = lock; mq->queue->request_fn = mmc_request_fn; mq->queue->init_rq_fn = mmc_init_request; mq->queue->exit_rq_fn = mmc_exit_request; -- 2.16.0 ^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH v2 2/2] block: Fix a race between the throttling code and request queue initialization 2018-01-31 23:53 ` [PATCH v2 2/2] block: Fix a race between the throttling code and request queue initialization Bart Van Assche @ 2018-02-01 1:53 ` Joseph Qi 2018-02-01 16:16 ` Bart Van Assche 0 siblings, 1 reply; 8+ messages in thread From: Joseph Qi @ 2018-02-01 1:53 UTC (permalink / raw) To: Bart Van Assche, Jens Axboe Cc: linux-block, Christoph Hellwig, Philipp Reisner, Ulf Hansson, Kees Cook, stable Hi Bart, On 18/2/1 07:53, Bart Van Assche wrote: > Initialize the request queue lock earlier such that the following > race can no longer occur: > > blk_init_queue_node blkcg_print_blkgs > blk_alloc_queue_node (1) > q->queue_lock = &q->__queue_lock (2) > blkcg_init_queue(q) (3) > spin_lock_irq(blkg->q->queue_lock) (4) > q->queue_lock = lock (5) > spin_unlock_irq(blkg->q->queue_lock) (6) > > (1) allocate an uninitialized queue; > (2) initialize queue_lock to its default internal lock; > (3) initialize blkcg part of request queue, which will create blkg and > then insert it to blkg_list; > (4) traverse blkg_list and find the created blkg, and then take its > queue lock, here it is the default *internal lock*; > (5) *race window*, now queue_lock is overridden with *driver specified > lock*; > (6) now unlock *driver specified lock*, not the locked *internal lock*, > unlock balance breaks. > > The changes in this patch are as follows: > - Move the .queue_lock initialization from blk_init_queue_node() into > blk_alloc_queue_node(). > - For all all block drivers that initialize .queue_lock explicitly, > change the blk_alloc_queue() call in the driver into a > blk_alloc_queue_node() call and remove the explicit .queue_lock > initialization. Additionally, initialize the spin lock that will > be used as queue lock earlier if necessary. > I'm afraid the risk may also exist in blk_cleanup_queue, which will set queue_lock to to the default internal lock. spin_lock_irq(lock); if (q->queue_lock != &q->__queue_lock) q->queue_lock = &q->__queue_lock; spin_unlock_irq(lock); I'm thinking of getting blkg->q->queue_lock to local first, but this will result in still using driver lock even the queue_lock has already been set to the default internal lock. Thanks, Joseph ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2 2/2] block: Fix a race between the throttling code and request queue initialization 2018-02-01 1:53 ` Joseph Qi @ 2018-02-01 16:16 ` Bart Van Assche 2018-02-02 1:02 ` Joseph Qi 0 siblings, 1 reply; 8+ messages in thread From: Bart Van Assche @ 2018-02-01 16:16 UTC (permalink / raw) To: joseph.qi@linux.alibaba.com, axboe@kernel.dk Cc: hch@lst.de, ulf.hansson@linaro.org, linux-block@vger.kernel.org, philipp.reisner@linbit.com, stable@vger.kernel.org, keescook@chromium.org On Thu, 2018-02-01 at 09:53 +0800, Joseph Qi wrote: > I'm afraid the risk may also exist in blk_cleanup_queue, which will > set queue_lock to to the default internal lock. > > spin_lock_irq(lock); > if (q->queue_lock != &q->__queue_lock) > q->queue_lock = &q->__queue_lock; > spin_unlock_irq(lock); > > I'm thinking of getting blkg->q->queue_lock to local first, but this > will result in still using driver lock even the queue_lock has already > been set to the default internal lock. Hello Joseph, I think the race between the queue_lock assignment in blk_cleanup_queue() and the use of that pointer by cgroup attributes could be solved by removing the visibility of these attributes from blk_cleanup_queue() instead of __blk_release_queue(). However, last time I proposed to move code from __blk_release_queue() into blk_cleanup_queue() I received the feedback that from some kernel developers that they didn't like this. Is the block driver that triggered the race on the q->queue_lock assignment using legacy (single queue) or multiqueue (blk-mq) mode? If that driver is using legacy mode, are you aware that there are plans to remove legacy mode from the upstream kernel? And if your driver is using multiqueue mode, how about the following change instead of the two patches in this patch series: --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1093,7 +1093,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) return NULL; q->request_fn = rfn; - if (lock) + if (!q->mq_ops && lock) q->queue_lock = lock; if (blk_init_allocated_queue(q) < 0) { blk_cleanup_queue(q); Thanks, Bart. ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2 2/2] block: Fix a race between the throttling code and request queue initialization 2018-02-01 16:16 ` Bart Van Assche @ 2018-02-02 1:02 ` Joseph Qi 2018-02-02 14:52 ` Jens Axboe 2018-02-02 16:21 ` Bart Van Assche 0 siblings, 2 replies; 8+ messages in thread From: Joseph Qi @ 2018-02-02 1:02 UTC (permalink / raw) To: Bart Van Assche, joseph.qi@linux.alibaba.com, axboe@kernel.dk Cc: hch@lst.de, ulf.hansson@linaro.org, linux-block@vger.kernel.org, philipp.reisner@linbit.com, stable@vger.kernel.org, keescook@chromium.org Hi Bart, On 18/2/2 00:16, Bart Van Assche wrote: > On Thu, 2018-02-01 at 09:53 +0800, Joseph Qi wrote: >> I'm afraid the risk may also exist in blk_cleanup_queue, which will >> set queue_lock to to the default internal lock. >> >> spin_lock_irq(lock); >> if (q->queue_lock != &q->__queue_lock) >> q->queue_lock = &q->__queue_lock; >> spin_unlock_irq(lock); >> >> I'm thinking of getting blkg->q->queue_lock to local first, but this >> will result in still using driver lock even the queue_lock has already >> been set to the default internal lock. > > Hello Joseph, > > I think the race between the queue_lock assignment in blk_cleanup_queue() > and the use of that pointer by cgroup attributes could be solved by > removing the visibility of these attributes from blk_cleanup_queue() instead > of __blk_release_queue(). However, last time I proposed to move code from > __blk_release_queue() into blk_cleanup_queue() I received the feedback that > from some kernel developers that they didn't like this. > > Is the block driver that triggered the race on the q->queue_lock assignment > using legacy (single queue) or multiqueue (blk-mq) mode? If that driver is > using legacy mode, are you aware that there are plans to remove legacy mode > from the upstream kernel? And if your driver is using multiqueue mode, how > about the following change instead of the two patches in this patch series: > We triggered this race when using single queue. I'm not sure if it exists in multi-queue. Do you mean upstream won't fix bugs any more in single queue? Thanks, Joseph > --- a/block/blk-core.c > +++ b/block/blk-core.c > @@ -1093,7 +1093,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) > return NULL; > > q->request_fn = rfn; > - if (lock) > + if (!q->mq_ops && lock) > q->queue_lock = lock; > if (blk_init_allocated_queue(q) < 0) { > blk_cleanup_queue(q); > > Thanks, > > Bart. > ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2 2/2] block: Fix a race between the throttling code and request queue initialization 2018-02-02 1:02 ` Joseph Qi @ 2018-02-02 14:52 ` Jens Axboe 2018-02-02 16:21 ` Bart Van Assche 1 sibling, 0 replies; 8+ messages in thread From: Jens Axboe @ 2018-02-02 14:52 UTC (permalink / raw) To: Joseph Qi, Bart Van Assche, joseph.qi@linux.alibaba.com Cc: hch@lst.de, ulf.hansson@linaro.org, linux-block@vger.kernel.org, philipp.reisner@linbit.com, stable@vger.kernel.org, keescook@chromium.org On 2/1/18 6:02 PM, Joseph Qi wrote: > Hi Bart, > > On 18/2/2 00:16, Bart Van Assche wrote: >> On Thu, 2018-02-01 at 09:53 +0800, Joseph Qi wrote: >>> I'm afraid the risk may also exist in blk_cleanup_queue, which will >>> set queue_lock to to the default internal lock. >>> >>> spin_lock_irq(lock); >>> if (q->queue_lock != &q->__queue_lock) >>> q->queue_lock = &q->__queue_lock; >>> spin_unlock_irq(lock); >>> >>> I'm thinking of getting blkg->q->queue_lock to local first, but this >>> will result in still using driver lock even the queue_lock has already >>> been set to the default internal lock. >> >> Hello Joseph, >> >> I think the race between the queue_lock assignment in blk_cleanup_queue() >> and the use of that pointer by cgroup attributes could be solved by >> removing the visibility of these attributes from blk_cleanup_queue() instead >> of __blk_release_queue(). However, last time I proposed to move code from >> __blk_release_queue() into blk_cleanup_queue() I received the feedback that >> from some kernel developers that they didn't like this. >> >> Is the block driver that triggered the race on the q->queue_lock assignment >> using legacy (single queue) or multiqueue (blk-mq) mode? If that driver is >> using legacy mode, are you aware that there are plans to remove legacy mode >> from the upstream kernel? And if your driver is using multiqueue mode, how >> about the following change instead of the two patches in this patch series: >> > We triggered this race when using single queue. I'm not sure if it > exists in multi-queue. > Do you mean upstream won't fix bugs any more in single queue? No, we'll still fix bugs in the legacy path, we just won't introduce any new features of accept any new drivers that use that path. Ultimately that path will go away once there are no more users, but until then it is maintained. -- Jens Axboe ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2 2/2] block: Fix a race between the throttling code and request queue initialization 2018-02-02 1:02 ` Joseph Qi 2018-02-02 14:52 ` Jens Axboe @ 2018-02-02 16:21 ` Bart Van Assche 2018-02-03 2:51 ` Joseph Qi 1 sibling, 1 reply; 8+ messages in thread From: Bart Van Assche @ 2018-02-02 16:21 UTC (permalink / raw) To: jiangqi903@gmail.com, joseph.qi@linux.alibaba.com, axboe@kernel.dk Cc: hch@lst.de, ulf.hansson@linaro.org, linux-block@vger.kernel.org, philipp.reisner@linbit.com, stable@vger.kernel.org, keescook@chromium.org On Fri, 2018-02-02 at 09:02 +0800, Joseph Qi wrote: > We triggered this race when using single queue. I'm not sure if it > exists in multi-queue. Regarding the races between modifying the queue_lock pointer and the code that uses that pointer, I think the following construct in blk_cleanup_queue() is sufficient to avoid races between the queue_lock pointer assignment and the code that executes concurrently with blk_cleanup_queue(): spin_lock_irq(lock); if (q->queue_lock != &q->__queue_lock) q->queue_lock = &q->__queue_lock; spin_unlock_irq(lock); In other words, I think that this patch series should be sufficient to address all races between .queue_lock assignments and the code that uses that pointer. Thanks, Bart. ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2 2/2] block: Fix a race between the throttling code and request queue initialization 2018-02-02 16:21 ` Bart Van Assche @ 2018-02-03 2:51 ` Joseph Qi 0 siblings, 0 replies; 8+ messages in thread From: Joseph Qi @ 2018-02-03 2:51 UTC (permalink / raw) To: Bart Van Assche, axboe@kernel.dk Cc: hch@lst.de, ulf.hansson@linaro.org, linux-block@vger.kernel.org, philipp.reisner@linbit.com, stable@vger.kernel.org, keescook@chromium.org Hi Bart, On 18/2/3 00:21, Bart Van Assche wrote: > On Fri, 2018-02-02 at 09:02 +0800, Joseph Qi wrote: >> We triggered this race when using single queue. I'm not sure if it >> exists in multi-queue. > > Regarding the races between modifying the queue_lock pointer and the code that > uses that pointer, I think the following construct in blk_cleanup_queue() is > sufficient to avoid races between the queue_lock pointer assignment and the code > that executes concurrently with blk_cleanup_queue(): > > spin_lock_irq(lock); > if (q->queue_lock != &q->__queue_lock) > q->queue_lock = &q->__queue_lock; > spin_unlock_irq(lock); > IMO, the race also exists. blk_cleanup_queue blkcg_print_blkgs spin_lock_irq(lock) (1) spin_lock_irq(blkg->q->queue_lock) (2,5) q->queue_lock = &q->__queue_lock (3) spin_unlock_irq(lock) (4) spin_unlock_irq(blkg->q->queue_lock) (6) (1) take driver lock; (2) busy loop for driver lock; (3) override driver lock with internal lock; (4) unlock driver lock; (5) can take driver lock now; (6) but unlock internal lock. If we get blkg->q->queue_lock to local first like blk_cleanup_queue, it indeed can fix the different lock use in lock/unlock. But since blk_cleanup_queue has overridden queue lock to internal lock now, I'm afraid we couldn't still use driver lock in blkcg_print_blkgs. Thanks, Joseph > In other words, I think that this patch series should be sufficient to address > all races between .queue_lock assignments and the code that uses that pointer. > > Thanks, > > Bart. > ^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2018-02-03 2:51 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <20180131235300.12773-1-bart.vanassche@wdc.com>
2018-01-31 23:52 ` [PATCH v2 1/2] block: Add a third argument to blk_alloc_queue_node() Bart Van Assche
2018-01-31 23:53 ` [PATCH v2 2/2] block: Fix a race between the throttling code and request queue initialization Bart Van Assche
2018-02-01 1:53 ` Joseph Qi
2018-02-01 16:16 ` Bart Van Assche
2018-02-02 1:02 ` Joseph Qi
2018-02-02 14:52 ` Jens Axboe
2018-02-02 16:21 ` Bart Van Assche
2018-02-03 2:51 ` Joseph Qi
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox