[PATCH v2 0/2] block: Fix a race between the throttling code and request queue initialization

public inbox for linux-block@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH v2 0/2] block: Fix a race between the throttling code and request queue initialization
@ 2018-01-31 23:52 Bart Van Assche
  2018-01-31 23:52 ` [PATCH v2 1/2] block: Add a third argument to blk_alloc_queue_node() Bart Van Assche
  2018-01-31 23:53 ` [PATCH v2 2/2] block: Fix a race between the throttling code and request queue initialization Bart Van Assche
  0 siblings, 2 replies; 11+ messages in thread
From: Bart Van Assche @ 2018-01-31 23:52 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, Christoph Hellwig, Bart Van Assche

Hello Jens,

The two patches in this series fix a recently reported race between the
throttling code and request queue initialization. It would be appreciated
if you could have a look at this patch series.

Thanks,

Bart.

Changes between v1 and v2:
- Split a single patch into two patches.
- Dropped blk_alloc_queue_node2() and modified all block drivers that call
  blk_alloc_queue_node().

Bart Van Assche (2):
  block: Add a third argument to blk_alloc_queue_node()
  block: Fix a race between the throttling code and request queue
    initialization

 block/blk-core.c               | 29 +++++++++++++++++++----------
 block/blk-mq.c                 |  2 +-
 drivers/block/drbd/drbd_main.c |  3 +--
 drivers/block/null_blk.c       |  3 ++-
 drivers/block/umem.c           |  7 +++----
 drivers/ide/ide-probe.c        |  2 +-
 drivers/lightnvm/core.c        |  2 +-
 drivers/md/dm.c                |  2 +-
 drivers/mmc/core/queue.c       |  3 +--
 drivers/nvdimm/pmem.c          |  2 +-
 drivers/nvme/host/multipath.c  |  2 +-
 drivers/scsi/scsi_lib.c        |  2 +-
 include/linux/blkdev.h         |  3 ++-
 13 files changed, 35 insertions(+), 27 deletions(-)

-- 
2.16.0

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 1/2] block: Add a third argument to blk_alloc_queue_node()
  2018-01-31 23:52 [PATCH v2 0/2] block: Fix a race between the throttling code and request queue initialization Bart Van Assche
@ 2018-01-31 23:52 ` Bart Van Assche
  2018-01-31 23:53 ` [PATCH v2 2/2] block: Fix a race between the throttling code and request queue initialization Bart Van Assche
  1 sibling, 0 replies; 11+ messages in thread
From: Bart Van Assche @ 2018-01-31 23:52 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-block, Christoph Hellwig, Bart Van Assche, Joseph Qi,
	Philipp Reisner, Ulf Hansson, Kees Cook, stable

This patch does not change any functionality.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Philipp Reisner <philipp.reisner@linbit.com>
Cc: Ulf Hansson <ulf.hansson@linaro.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: <stable@vger.kernel.org>
---
 block/blk-core.c              | 7 ++++---
 block/blk-mq.c                | 2 +-
 drivers/block/null_blk.c      | 3 ++-
 drivers/ide/ide-probe.c       | 2 +-
 drivers/lightnvm/core.c       | 2 +-
 drivers/md/dm.c               | 2 +-
 drivers/nvdimm/pmem.c         | 2 +-
 drivers/nvme/host/multipath.c | 2 +-
 drivers/scsi/scsi_lib.c       | 2 +-
 include/linux/blkdev.h        | 3 ++-
 10 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index bd43bc50740a..860a039fd1a8 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -868,7 +868,7 @@ void blk_exit_rl(struct request_queue *q, struct request_list *rl)
 
 struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
 {
-	return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE);
+	return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE, NULL);
 }
 EXPORT_SYMBOL(blk_alloc_queue);
 
@@ -946,7 +946,8 @@ static void blk_rq_timed_out_timer(struct timer_list *t)
 	kblockd_schedule_work(&q->timeout_work);
 }
 
-struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
+struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
+					   spinlock_t *lock)
 {
 	struct request_queue *q;
 
@@ -1088,7 +1089,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
 {
 	struct request_queue *q;
 
-	q = blk_alloc_queue_node(GFP_KERNEL, node_id);
+	q = blk_alloc_queue_node(GFP_KERNEL, node_id, NULL);
 	if (!q)
 		return NULL;
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index aacc5280e25f..8191391d1a1d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2554,7 +2554,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 {
 	struct request_queue *uninit_q, *q;
 
-	uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node);
+	uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node, NULL);
 	if (!uninit_q)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index aa1c7d4bcac5..9fe8f2a3ec45 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -1717,7 +1717,8 @@ static int null_add_dev(struct nullb_device *dev)
 		}
 		null_init_queues(nullb);
 	} else if (dev->queue_mode == NULL_Q_BIO) {
-		nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node);
+		nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node,
+						NULL);
 		if (!nullb->q) {
 			rv = -ENOMEM;
 			goto out_cleanup_queues;
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 70d6d8ff0fd9..1303d0e31e80 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -766,7 +766,7 @@ static int ide_init_queue(ide_drive_t *drive)
 	 *	limits and LBA48 we could raise it but as yet
 	 *	do not.
 	 */
-	q = blk_alloc_queue_node(GFP_KERNEL, hwif_to_node(hwif));
+	q = blk_alloc_queue_node(GFP_KERNEL, hwif_to_node(hwif), NULL);
 	if (!q)
 		return 1;
 
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index dcc9e621e651..5f1988df1593 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -384,7 +384,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
 		goto err_dev;
 	}
 
-	tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node);
+	tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node, NULL);
 	if (!tqueue) {
 		ret = -ENOMEM;
 		goto err_disk;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 5c478c185041..93f3ef15b4b2 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1731,7 +1731,7 @@ static struct mapped_device *alloc_dev(int minor)
 	INIT_LIST_HEAD(&md->table_devices);
 	spin_lock_init(&md->uevent_lock);
 
-	md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id);
+	md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id, NULL);
 	if (!md->queue)
 		goto bad;
 
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 49285701fe48..118b4b13592d 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -342,7 +342,7 @@ static int pmem_attach_disk(struct device *dev,
 		return -EBUSY;
 	}
 
-	q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev));
+	q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev), NULL);
 	if (!q)
 		return -ENOMEM;
 
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 605f53376e94..4e362949721a 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -162,7 +162,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
 	if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath)
 		return 0;
 
-	q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE);
+	q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL);
 	if (!q)
 		goto out;
 	q->queuedata = head;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 3fcf5c7c7917..799d02615e71 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2206,7 +2206,7 @@ struct request_queue *scsi_old_alloc_queue(struct scsi_device *sdev)
 	struct Scsi_Host *shost = sdev->host;
 	struct request_queue *q;
 
-	q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE);
+	q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL);
 	if (!q)
 		return NULL;
 	q->cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 781992c4124e..f1f3ad6419f1 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1330,7 +1330,8 @@ extern long nr_blockdev_pages(void);
 
 bool __must_check blk_get_queue(struct request_queue *);
 struct request_queue *blk_alloc_queue(gfp_t);
-struct request_queue *blk_alloc_queue_node(gfp_t, int);
+struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
+					   spinlock_t *lock);
 extern void blk_put_queue(struct request_queue *);
 extern void blk_set_queue_dying(struct request_queue *);
 
-- 
2.16.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v2 2/2] block: Fix a race between the throttling code and request queue initialization
  2018-01-31 23:52 [PATCH v2 0/2] block: Fix a race between the throttling code and request queue initialization Bart Van Assche
  2018-01-31 23:52 ` [PATCH v2 1/2] block: Add a third argument to blk_alloc_queue_node() Bart Van Assche
@ 2018-01-31 23:53 ` Bart Van Assche
  2018-02-01  1:53   ` Joseph Qi
  1 sibling, 1 reply; 11+ messages in thread
From: Bart Van Assche @ 2018-01-31 23:53 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-block, Christoph Hellwig, Bart Van Assche, Joseph Qi,
	Philipp Reisner, Ulf Hansson, Kees Cook, stable

Initialize the request queue lock earlier such that the following
race can no longer occur:

blk_init_queue_node                 blkcg_print_blkgs
  blk_alloc_queue_node (1)
    q->queue_lock = &q->__queue_lock (2)
    blkcg_init_queue(q) (3)
                                    spin_lock_irq(blkg->q->queue_lock) (4)
  q->queue_lock = lock (5)
                                    spin_unlock_irq(blkg->q->queue_lock) (6)

(1) allocate an uninitialized queue;
(2) initialize queue_lock to its default internal lock;
(3) initialize blkcg part of request queue, which will create blkg and
    then insert it to blkg_list;
(4) traverse blkg_list and find the created blkg, and then take its
    queue lock, here it is the default *internal lock*;
(5) *race window*, now queue_lock is overridden with *driver specified
    lock*;
(6) now unlock *driver specified lock*, not the locked *internal lock*,
    unlock balance breaks.

The changes in this patch are as follows:
- Move the .queue_lock initialization from blk_init_queue_node() into
  blk_alloc_queue_node().
- For all all block drivers that initialize .queue_lock explicitly,
  change the blk_alloc_queue() call in the driver into a
  blk_alloc_queue_node() call and remove the explicit .queue_lock
  initialization. Additionally, initialize the spin lock that will
  be used as queue lock earlier if necessary.

Reported-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Philipp Reisner <philipp.reisner@linbit.com>
Cc: Ulf Hansson <ulf.hansson@linaro.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: <stable@vger.kernel.org>
---
 block/blk-core.c               | 24 ++++++++++++++++--------
 drivers/block/drbd/drbd_main.c |  3 +--
 drivers/block/umem.c           |  7 +++----
 drivers/mmc/core/queue.c       |  3 +--
 4 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 860a039fd1a8..c2c81c5b7420 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -946,6 +946,20 @@ static void blk_rq_timed_out_timer(struct timer_list *t)
 	kblockd_schedule_work(&q->timeout_work);
 }
 
+/**
+ * blk_alloc_queue_node - allocate a request queue
+ * @gfp_mask: memory allocation flags
+ * @node_id: NUMA node to allocate memory from
+ * @lock: Pointer to a spinlock that will be used to e.g. serialize calls to
+ *	  the legacy .request_fn(). Only set this pointer for queues that use
+ *	  legacy mode and not for queues that use blk-mq.
+ *
+ * Note: pass the queue lock as the third argument to this function instead of
+ * setting the queue lock pointer explicitly to avoid triggering a crash in
+ * the blkcg throttling code. That code namely makes sysfs attributes visible
+ * in user space before this function returns and the show methods of these
+ * sysfs attributes use the queue lock.
+ */
 struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
 					   spinlock_t *lock)
 {
@@ -998,11 +1012,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
 	mutex_init(&q->sysfs_lock);
 	spin_lock_init(&q->__queue_lock);
 
-	/*
-	 * By default initialize queue_lock to internal lock and driver can
-	 * override it later if need be.
-	 */
-	q->queue_lock = &q->__queue_lock;
+	q->queue_lock = lock ? : &q->__queue_lock;
 
 	/*
 	 * A queue starts its life with bypass turned on to avoid
@@ -1089,13 +1099,11 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
 {
 	struct request_queue *q;
 
-	q = blk_alloc_queue_node(GFP_KERNEL, node_id, NULL);
+	q = blk_alloc_queue_node(GFP_KERNEL, node_id, lock);
 	if (!q)
 		return NULL;
 
 	q->request_fn = rfn;
-	if (lock)
-		q->queue_lock = lock;
 	if (blk_init_allocated_queue(q) < 0) {
 		blk_cleanup_queue(q);
 		return NULL;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 4b4697a1f963..058247bc2f30 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2822,7 +2822,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
 
 	drbd_init_set_defaults(device);
 
-	q = blk_alloc_queue(GFP_KERNEL);
+	q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, &resource->req_lock);
 	if (!q)
 		goto out_no_q;
 	device->rq_queue = q;
@@ -2854,7 +2854,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
 	/* Setting the max_hw_sectors to an odd value of 8kibyte here
 	   This triggers a max_bio_size message upon first attach or connect */
 	blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
-	q->queue_lock = &resource->req_lock;
 
 	device->md_io.page = alloc_page(GFP_KERNEL);
 	if (!device->md_io.page)
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index 8077123678ad..5c7fb8cc4149 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -888,13 +888,14 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	card->Active = -1;	/* no page is active */
 	card->bio = NULL;
 	card->biotail = &card->bio;
+	spin_lock_init(&card->lock);
 
-	card->queue = blk_alloc_queue(GFP_KERNEL);
+	card->queue = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE,
+					   &card->lock);
 	if (!card->queue)
 		goto failed_alloc;
 
 	blk_queue_make_request(card->queue, mm_make_request);
-	card->queue->queue_lock = &card->lock;
 	card->queue->queuedata = card;
 
 	tasklet_init(&card->tasklet, process_page, (unsigned long)card);
@@ -968,8 +969,6 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	dev_printk(KERN_INFO, &card->dev->dev,
 		"Window size %d bytes, IRQ %d\n", data, dev->irq);
 
-	spin_lock_init(&card->lock);
-
 	pci_set_drvdata(dev, card);
 
 	if (pci_write_cmd != 0x0F) 	/* If not Memory Write & Invalidate */
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 5ecd54088988..bcf6ae03fa97 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -216,10 +216,9 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
 	int ret = -ENOMEM;
 
 	mq->card = card;
-	mq->queue = blk_alloc_queue(GFP_KERNEL);
+	mq->queue = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, lock);
 	if (!mq->queue)
 		return -ENOMEM;
-	mq->queue->queue_lock = lock;
 	mq->queue->request_fn = mmc_request_fn;
 	mq->queue->init_rq_fn = mmc_init_request;
 	mq->queue->exit_rq_fn = mmc_exit_request;
-- 
2.16.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 2/2] block: Fix a race between the throttling code and request queue initialization
  2018-01-31 23:53 ` [PATCH v2 2/2] block: Fix a race between the throttling code and request queue initialization Bart Van Assche
@ 2018-02-01  1:53   ` Joseph Qi
  2018-02-01 16:16     ` Bart Van Assche
  0 siblings, 1 reply; 11+ messages in thread
From: Joseph Qi @ 2018-02-01  1:53 UTC (permalink / raw)
  To: Bart Van Assche, Jens Axboe
  Cc: linux-block, Christoph Hellwig, Philipp Reisner, Ulf Hansson,
	Kees Cook, stable

Hi Bart,

On 18/2/1 07:53, Bart Van Assche wrote:
> Initialize the request queue lock earlier such that the following
> race can no longer occur:
> 
> blk_init_queue_node                 blkcg_print_blkgs
>   blk_alloc_queue_node (1)
>     q->queue_lock = &q->__queue_lock (2)
>     blkcg_init_queue(q) (3)
>                                     spin_lock_irq(blkg->q->queue_lock) (4)
>   q->queue_lock = lock (5)
>                                     spin_unlock_irq(blkg->q->queue_lock) (6)
> 
> (1) allocate an uninitialized queue;
> (2) initialize queue_lock to its default internal lock;
> (3) initialize blkcg part of request queue, which will create blkg and
>     then insert it to blkg_list;
> (4) traverse blkg_list and find the created blkg, and then take its
>     queue lock, here it is the default *internal lock*;
> (5) *race window*, now queue_lock is overridden with *driver specified
>     lock*;
> (6) now unlock *driver specified lock*, not the locked *internal lock*,
>     unlock balance breaks.
> 
> The changes in this patch are as follows:
> - Move the .queue_lock initialization from blk_init_queue_node() into
>   blk_alloc_queue_node().
> - For all all block drivers that initialize .queue_lock explicitly,
>   change the blk_alloc_queue() call in the driver into a
>   blk_alloc_queue_node() call and remove the explicit .queue_lock
>   initialization. Additionally, initialize the spin lock that will
>   be used as queue lock earlier if necessary.
> 
I'm afraid the risk may also exist in blk_cleanup_queue, which will
set queue_lock to to the default internal lock.

spin_lock_irq(lock);
if (q->queue_lock != &q->__queue_lock)
	q->queue_lock = &q->__queue_lock;
spin_unlock_irq(lock);

I'm thinking of getting blkg->q->queue_lock to local first, but this
will result in still using driver lock even the queue_lock has already
been set to the default internal lock.

Thanks,
Joseph

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 2/2] block: Fix a race between the throttling code and request queue initialization
  2018-02-01  1:53   ` Joseph Qi
@ 2018-02-01 16:16     ` Bart Van Assche
  2018-02-02  1:02       ` Joseph Qi
  0 siblings, 1 reply; 11+ messages in thread
From: Bart Van Assche @ 2018-02-01 16:16 UTC (permalink / raw)
  To: joseph.qi@linux.alibaba.com, axboe@kernel.dk
  Cc: hch@lst.de, ulf.hansson@linaro.org, linux-block@vger.kernel.org,
	philipp.reisner@linbit.com, stable@vger.kernel.org,
	keescook@chromium.org

T24gVGh1LCAyMDE4LTAyLTAxIGF0IDA5OjUzICswODAwLCBKb3NlcGggUWkgd3JvdGU6DQo+IEkn
bSBhZnJhaWQgdGhlIHJpc2sgbWF5IGFsc28gZXhpc3QgaW4gYmxrX2NsZWFudXBfcXVldWUsIHdo
aWNoIHdpbGwNCj4gc2V0IHF1ZXVlX2xvY2sgdG8gdG8gdGhlIGRlZmF1bHQgaW50ZXJuYWwgbG9j
ay4NCj4gDQo+IHNwaW5fbG9ja19pcnEobG9jayk7DQo+IGlmIChxLT5xdWV1ZV9sb2NrICE9ICZx
LT5fX3F1ZXVlX2xvY2spDQo+IAlxLT5xdWV1ZV9sb2NrID0gJnEtPl9fcXVldWVfbG9jazsNCj4g
c3Bpbl91bmxvY2tfaXJxKGxvY2spOw0KPiANCj4gSSdtIHRoaW5raW5nIG9mIGdldHRpbmcgYmxr
Zy0+cS0+cXVldWVfbG9jayB0byBsb2NhbCBmaXJzdCwgYnV0IHRoaXMNCj4gd2lsbCByZXN1bHQg
aW4gc3RpbGwgdXNpbmcgZHJpdmVyIGxvY2sgZXZlbiB0aGUgcXVldWVfbG9jayBoYXMgYWxyZWFk
eQ0KPiBiZWVuIHNldCB0byB0aGUgZGVmYXVsdCBpbnRlcm5hbCBsb2NrLg0KDQpIZWxsbyBKb3Nl
cGgsDQoNCkkgdGhpbmsgdGhlIHJhY2UgYmV0d2VlbiB0aGUgcXVldWVfbG9jayBhc3NpZ25tZW50
IGluIGJsa19jbGVhbnVwX3F1ZXVlKCkNCmFuZCB0aGUgdXNlIG9mIHRoYXQgcG9pbnRlciBieSBj
Z3JvdXAgYXR0cmlidXRlcyBjb3VsZCBiZSBzb2x2ZWQgYnkNCnJlbW92aW5nIHRoZSB2aXNpYmls
aXR5IG9mIHRoZXNlIGF0dHJpYnV0ZXMgZnJvbSBibGtfY2xlYW51cF9xdWV1ZSgpIGluc3RlYWQN
Cm9mIF9fYmxrX3JlbGVhc2VfcXVldWUoKS4gSG93ZXZlciwgbGFzdCB0aW1lIEkgcHJvcG9zZWQg
dG8gbW92ZSBjb2RlIGZyb20NCl9fYmxrX3JlbGVhc2VfcXVldWUoKSBpbnRvIGJsa19jbGVhbnVw
X3F1ZXVlKCkgSSByZWNlaXZlZCB0aGUgZmVlZGJhY2sgdGhhdA0KZnJvbSBzb21lIGtlcm5lbCBk
ZXZlbG9wZXJzIHRoYXQgdGhleSBkaWRuJ3QgbGlrZSB0aGlzLg0KDQpJcyB0aGUgYmxvY2sgZHJp
dmVyIHRoYXQgdHJpZ2dlcmVkIHRoZSByYWNlIG9uIHRoZSBxLT5xdWV1ZV9sb2NrIGFzc2lnbm1l
bnQNCnVzaW5nIGxlZ2FjeSAoc2luZ2xlIHF1ZXVlKSBvciBtdWx0aXF1ZXVlIChibGstbXEpIG1v
ZGU/IElmIHRoYXQgZHJpdmVyIGlzDQp1c2luZyBsZWdhY3kgbW9kZSwgYXJlIHlvdSBhd2FyZSB0
aGF0IHRoZXJlIGFyZSBwbGFucyB0byByZW1vdmUgbGVnYWN5IG1vZGUNCmZyb20gdGhlIHVwc3Ry
ZWFtIGtlcm5lbD8gQW5kIGlmIHlvdXIgZHJpdmVyIGlzIHVzaW5nIG11bHRpcXVldWUgbW9kZSwg
aG93DQphYm91dCB0aGUgZm9sbG93aW5nIGNoYW5nZSBpbnN0ZWFkIG9mIHRoZSB0d28gcGF0Y2hl
cyBpbiB0aGlzIHBhdGNoIHNlcmllczoNCg0KLS0tIGEvYmxvY2svYmxrLWNvcmUuYw0KKysrIGIv
YmxvY2svYmxrLWNvcmUuYw0KQEAgLTEwOTMsNyArMTA5Myw3IEBAIGJsa19pbml0X3F1ZXVlX25v
ZGUocmVxdWVzdF9mbl9wcm9jICpyZm4sIHNwaW5sb2NrX3QgKmxvY2ssIGludCBub2RlX2lkKQ0K
IAkJcmV0dXJuIE5VTEw7DQogDQogCXEtPnJlcXVlc3RfZm4gPSByZm47DQotCWlmIChsb2NrKQ0K
KwlpZiAoIXEtPm1xX29wcyAmJiBsb2NrKQ0KIAkJcS0+cXVldWVfbG9jayA9IGxvY2s7DQogCWlm
IChibGtfaW5pdF9hbGxvY2F0ZWRfcXVldWUocSkgPCAwKSB7DQogCQlibGtfY2xlYW51cF9xdWV1
ZShxKTsNCg0KVGhhbmtzLA0KDQpCYXJ0Lg==

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 2/2] block: Fix a race between the throttling code and request queue initialization
  2018-02-01 16:16     ` Bart Van Assche
@ 2018-02-02  1:02       ` Joseph Qi
  2018-02-02 14:52         ` Jens Axboe
  2018-02-02 16:21         ` Bart Van Assche
  0 siblings, 2 replies; 11+ messages in thread
From: Joseph Qi @ 2018-02-02  1:02 UTC (permalink / raw)
  To: Bart Van Assche, joseph.qi@linux.alibaba.com, axboe@kernel.dk
  Cc: hch@lst.de, ulf.hansson@linaro.org, linux-block@vger.kernel.org,
	philipp.reisner@linbit.com, stable@vger.kernel.org,
	keescook@chromium.org

Hi Bart,

On 18/2/2 00:16, Bart Van Assche wrote:
> On Thu, 2018-02-01 at 09:53 +0800, Joseph Qi wrote:
>> I'm afraid the risk may also exist in blk_cleanup_queue, which will
>> set queue_lock to to the default internal lock.
>>
>> spin_lock_irq(lock);
>> if (q->queue_lock != &q->__queue_lock)
>> 	q->queue_lock = &q->__queue_lock;
>> spin_unlock_irq(lock);
>>
>> I'm thinking of getting blkg->q->queue_lock to local first, but this
>> will result in still using driver lock even the queue_lock has already
>> been set to the default internal lock.
> 
> Hello Joseph,
> 
> I think the race between the queue_lock assignment in blk_cleanup_queue()
> and the use of that pointer by cgroup attributes could be solved by
> removing the visibility of these attributes from blk_cleanup_queue() instead
> of __blk_release_queue(). However, last time I proposed to move code from
> __blk_release_queue() into blk_cleanup_queue() I received the feedback that
> from some kernel developers that they didn't like this.
> 
> Is the block driver that triggered the race on the q->queue_lock assignment
> using legacy (single queue) or multiqueue (blk-mq) mode? If that driver is
> using legacy mode, are you aware that there are plans to remove legacy mode
> from the upstream kernel? And if your driver is using multiqueue mode, how
> about the following change instead of the two patches in this patch series:
> 
We triggered this race when using single queue. I'm not sure if it
exists in multi-queue.
Do you mean upstream won't fix bugs any more in single queue?

Thanks,
Joseph

> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -1093,7 +1093,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
>  		return NULL;
>  
>  	q->request_fn = rfn;
> -	if (lock)
> +	if (!q->mq_ops && lock)
>  		q->queue_lock = lock;
>  	if (blk_init_allocated_queue(q) < 0) {
>  		blk_cleanup_queue(q);
> 
> Thanks,
> 
> Bart.
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 2/2] block: Fix a race between the throttling code and request queue initialization
  2018-02-02  1:02       ` Joseph Qi
@ 2018-02-02 14:52         ` Jens Axboe
  2018-02-02 16:21         ` Bart Van Assche
  1 sibling, 0 replies; 11+ messages in thread
From: Jens Axboe @ 2018-02-02 14:52 UTC (permalink / raw)
  To: Joseph Qi, Bart Van Assche, joseph.qi@linux.alibaba.com
  Cc: hch@lst.de, ulf.hansson@linaro.org, linux-block@vger.kernel.org,
	philipp.reisner@linbit.com, stable@vger.kernel.org,
	keescook@chromium.org

On 2/1/18 6:02 PM, Joseph Qi wrote:
> Hi Bart,
> 
> On 18/2/2 00:16, Bart Van Assche wrote:
>> On Thu, 2018-02-01 at 09:53 +0800, Joseph Qi wrote:
>>> I'm afraid the risk may also exist in blk_cleanup_queue, which will
>>> set queue_lock to to the default internal lock.
>>>
>>> spin_lock_irq(lock);
>>> if (q->queue_lock != &q->__queue_lock)
>>> 	q->queue_lock = &q->__queue_lock;
>>> spin_unlock_irq(lock);
>>>
>>> I'm thinking of getting blkg->q->queue_lock to local first, but this
>>> will result in still using driver lock even the queue_lock has already
>>> been set to the default internal lock.
>>
>> Hello Joseph,
>>
>> I think the race between the queue_lock assignment in blk_cleanup_queue()
>> and the use of that pointer by cgroup attributes could be solved by
>> removing the visibility of these attributes from blk_cleanup_queue() instead
>> of __blk_release_queue(). However, last time I proposed to move code from
>> __blk_release_queue() into blk_cleanup_queue() I received the feedback that
>> from some kernel developers that they didn't like this.
>>
>> Is the block driver that triggered the race on the q->queue_lock assignment
>> using legacy (single queue) or multiqueue (blk-mq) mode? If that driver is
>> using legacy mode, are you aware that there are plans to remove legacy mode
>> from the upstream kernel? And if your driver is using multiqueue mode, how
>> about the following change instead of the two patches in this patch series:
>>
> We triggered this race when using single queue. I'm not sure if it
> exists in multi-queue.
> Do you mean upstream won't fix bugs any more in single queue?

No, we'll still fix bugs in the legacy path, we just won't introduce
any new features of accept any new drivers that use that path.
Ultimately that path will go away once there are no more users,
but until then it is maintained.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 2/2] block: Fix a race between the throttling code and request queue initialization
  2018-02-02  1:02       ` Joseph Qi
  2018-02-02 14:52         ` Jens Axboe
@ 2018-02-02 16:21         ` Bart Van Assche
  2018-02-03  2:51           ` Joseph Qi
  1 sibling, 1 reply; 11+ messages in thread
From: Bart Van Assche @ 2018-02-02 16:21 UTC (permalink / raw)
  To: jiangqi903@gmail.com, joseph.qi@linux.alibaba.com,
	axboe@kernel.dk
  Cc: hch@lst.de, ulf.hansson@linaro.org, linux-block@vger.kernel.org,
	philipp.reisner@linbit.com, stable@vger.kernel.org,
	keescook@chromium.org

T24gRnJpLCAyMDE4LTAyLTAyIGF0IDA5OjAyICswODAwLCBKb3NlcGggUWkgd3JvdGU6DQo+IFdl
IHRyaWdnZXJlZCB0aGlzIHJhY2Ugd2hlbiB1c2luZyBzaW5nbGUgcXVldWUuIEknbSBub3Qgc3Vy
ZSBpZiBpdA0KPiBleGlzdHMgaW4gbXVsdGktcXVldWUuDQoNClJlZ2FyZGluZyB0aGUgcmFjZXMg
YmV0d2VlbiBtb2RpZnlpbmcgdGhlIHF1ZXVlX2xvY2sgcG9pbnRlciBhbmQgdGhlIGNvZGUgdGhh
dA0KdXNlcyB0aGF0IHBvaW50ZXIsIEkgdGhpbmsgdGhlIGZvbGxvd2luZyBjb25zdHJ1Y3QgaW4g
YmxrX2NsZWFudXBfcXVldWUoKSBpcw0Kc3VmZmljaWVudCB0byBhdm9pZCByYWNlcyBiZXR3ZWVu
IHRoZSBxdWV1ZV9sb2NrIHBvaW50ZXIgYXNzaWdubWVudCBhbmQgdGhlIGNvZGUNCnRoYXQgZXhl
Y3V0ZXMgY29uY3VycmVudGx5IHdpdGggYmxrX2NsZWFudXBfcXVldWUoKToNCg0KCXNwaW5fbG9j
a19pcnEobG9jayk7DQoJaWYgKHEtPnF1ZXVlX2xvY2sgIT0gJnEtPl9fcXVldWVfbG9jaykNCgkJ
cS0+cXVldWVfbG9jayA9ICZxLT5fX3F1ZXVlX2xvY2s7DQoJc3Bpbl91bmxvY2tfaXJxKGxvY2sp
Ow0KDQpJbiBvdGhlciB3b3JkcywgSSB0aGluayB0aGF0IHRoaXMgcGF0Y2ggc2VyaWVzIHNob3Vs
ZCBiZSBzdWZmaWNpZW50IHRvIGFkZHJlc3MNCmFsbCByYWNlcyBiZXR3ZWVuIC5xdWV1ZV9sb2Nr
IGFzc2lnbm1lbnRzIGFuZCB0aGUgY29kZSB0aGF0IHVzZXMgdGhhdCBwb2ludGVyLg0KDQpUaGFu
a3MsDQoNCkJhcnQu

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 2/2] block: Fix a race between the throttling code and request queue initialization
  2018-02-02 16:21         ` Bart Van Assche
@ 2018-02-03  2:51           ` Joseph Qi
  2018-02-05 17:58             ` Bart Van Assche
  0 siblings, 1 reply; 11+ messages in thread
From: Joseph Qi @ 2018-02-03  2:51 UTC (permalink / raw)
  To: Bart Van Assche, axboe@kernel.dk
  Cc: hch@lst.de, ulf.hansson@linaro.org, linux-block@vger.kernel.org,
	philipp.reisner@linbit.com, stable@vger.kernel.org,
	keescook@chromium.org

Hi Bart,

On 18/2/3 00:21, Bart Van Assche wrote:
> On Fri, 2018-02-02 at 09:02 +0800, Joseph Qi wrote:
>> We triggered this race when using single queue. I'm not sure if it
>> exists in multi-queue.
> 
> Regarding the races between modifying the queue_lock pointer and the code that
> uses that pointer, I think the following construct in blk_cleanup_queue() is
> sufficient to avoid races between the queue_lock pointer assignment and the code
> that executes concurrently with blk_cleanup_queue():
> 
> 	spin_lock_irq(lock);
> 	if (q->queue_lock != &q->__queue_lock)
> 		q->queue_lock = &q->__queue_lock;
> 	spin_unlock_irq(lock);
> 
IMO, the race also exists.

blk_cleanup_queue                   blkcg_print_blkgs
  spin_lock_irq(lock) (1)           spin_lock_irq(blkg->q->queue_lock) (2,5)
    q->queue_lock = &q->__queue_lock (3)
  spin_unlock_irq(lock) (4)
                                    spin_unlock_irq(blkg->q->queue_lock) (6)

(1) take driver lock;
(2) busy loop for driver lock;
(3) override driver lock with internal lock;
(4) unlock driver lock; 
(5) can take driver lock now;
(6) but unlock internal lock.

If we get blkg->q->queue_lock to local first like blk_cleanup_queue,
it indeed can fix the different lock use in lock/unlock. But since
blk_cleanup_queue has overridden queue lock to internal lock now, I'm
afraid we couldn't still use driver lock in blkcg_print_blkgs.

Thanks,
Joseph

> In other words, I think that this patch series should be sufficient to address
> all races between .queue_lock assignments and the code that uses that pointer.
> 
> Thanks,
> 
> Bart.
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 2/2] block: Fix a race between the throttling code and request queue initialization
  2018-02-03  2:51           ` Joseph Qi
@ 2018-02-05 17:58             ` Bart Van Assche
  2018-02-07 11:54               ` Jan Kara
  0 siblings, 1 reply; 11+ messages in thread
From: Bart Van Assche @ 2018-02-05 17:58 UTC (permalink / raw)
  To: jack@suse.cz, joseph.qi@linux.alibaba.com, axboe@kernel.dk
  Cc: hch@lst.de, linux-block@vger.kernel.org

T24gU2F0LCAyMDE4LTAyLTAzIGF0IDEwOjUxICswODAwLCBKb3NlcGggUWkgd3JvdGU6DQo+IEhp
IEJhcnQsDQo+IA0KPiBPbiAxOC8yLzMgMDA6MjEsIEJhcnQgVmFuIEFzc2NoZSB3cm90ZToNCj4g
PiBPbiBGcmksIDIwMTgtMDItMDIgYXQgMDk6MDIgKzA4MDAsIEpvc2VwaCBRaSB3cm90ZToNCj4g
PiA+IFdlIHRyaWdnZXJlZCB0aGlzIHJhY2Ugd2hlbiB1c2luZyBzaW5nbGUgcXVldWUuIEknbSBu
b3Qgc3VyZSBpZiBpdA0KPiA+ID4gZXhpc3RzIGluIG11bHRpLXF1ZXVlLg0KPiA+IA0KPiA+IFJl
Z2FyZGluZyB0aGUgcmFjZXMgYmV0d2VlbiBtb2RpZnlpbmcgdGhlIHF1ZXVlX2xvY2sgcG9pbnRl
ciBhbmQgdGhlIGNvZGUgdGhhdA0KPiA+IHVzZXMgdGhhdCBwb2ludGVyLCBJIHRoaW5rIHRoZSBm
b2xsb3dpbmcgY29uc3RydWN0IGluIGJsa19jbGVhbnVwX3F1ZXVlKCkgaXMNCj4gPiBzdWZmaWNp
ZW50IHRvIGF2b2lkIHJhY2VzIGJldHdlZW4gdGhlIHF1ZXVlX2xvY2sgcG9pbnRlciBhc3NpZ25t
ZW50IGFuZCB0aGUgY29kZQ0KPiA+IHRoYXQgZXhlY3V0ZXMgY29uY3VycmVudGx5IHdpdGggYmxr
X2NsZWFudXBfcXVldWUoKToNCj4gPiANCj4gPiAJc3Bpbl9sb2NrX2lycShsb2NrKTsNCj4gPiAJ
aWYgKHEtPnF1ZXVlX2xvY2sgIT0gJnEtPl9fcXVldWVfbG9jaykNCj4gPiAJCXEtPnF1ZXVlX2xv
Y2sgPSAmcS0+X19xdWV1ZV9sb2NrOw0KPiA+IAlzcGluX3VubG9ja19pcnEobG9jayk7DQo+ID4g
DQo+IA0KPiBJTU8sIHRoZSByYWNlIGFsc28gZXhpc3RzLg0KPiANCj4gYmxrX2NsZWFudXBfcXVl
dWUgICAgICAgICAgICAgICAgICAgYmxrY2dfcHJpbnRfYmxrZ3MNCj4gICBzcGluX2xvY2tfaXJx
KGxvY2spICgxKSAgICAgICAgICAgc3Bpbl9sb2NrX2lycShibGtnLT5xLT5xdWV1ZV9sb2NrKSAo
Miw1KQ0KPiAgICAgcS0+cXVldWVfbG9jayA9ICZxLT5fX3F1ZXVlX2xvY2sgKDMpDQo+ICAgc3Bp
bl91bmxvY2tfaXJxKGxvY2spICg0KQ0KPiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICBzcGluX3VubG9ja19pcnEoYmxrZy0+cS0+cXVldWVfbG9jaykgKDYpDQo+IA0KPiAoMSkg
dGFrZSBkcml2ZXIgbG9jazsNCj4gKDIpIGJ1c3kgbG9vcCBmb3IgZHJpdmVyIGxvY2s7DQo+ICgz
KSBvdmVycmlkZSBkcml2ZXIgbG9jayB3aXRoIGludGVybmFsIGxvY2s7DQo+ICg0KSB1bmxvY2sg
ZHJpdmVyIGxvY2s7IA0KPiAoNSkgY2FuIHRha2UgZHJpdmVyIGxvY2sgbm93Ow0KPiAoNikgYnV0
IHVubG9jayBpbnRlcm5hbCBsb2NrLg0KPiANCj4gSWYgd2UgZ2V0IGJsa2ctPnEtPnF1ZXVlX2xv
Y2sgdG8gbG9jYWwgZmlyc3QgbGlrZSBibGtfY2xlYW51cF9xdWV1ZSwNCj4gaXQgaW5kZWVkIGNh
biBmaXggdGhlIGRpZmZlcmVudCBsb2NrIHVzZSBpbiBsb2NrL3VubG9jay4gQnV0IHNpbmNlDQo+
IGJsa19jbGVhbnVwX3F1ZXVlIGhhcyBvdmVycmlkZGVuIHF1ZXVlIGxvY2sgdG8gaW50ZXJuYWwg
bG9jayBub3csIEknbQ0KPiBhZnJhaWQgd2UgY291bGRuJ3Qgc3RpbGwgdXNlIGRyaXZlciBsb2Nr
IGluIGJsa2NnX3ByaW50X2Jsa2dzLg0KDQooKyBKYW4gS2FyYSkNCg0KSGVsbG8gSm9zZXBoLA0K
DQpUaGF0J3MgYSBnb29kIGNhdGNoLiBTaW5jZSBtb2RpZnlpbmcgYWxsIGNvZGUgdGhhdCBhY2Nl
c3NlcyB0aGUgcXVldWVfbG9jaw0KcG9pbnRlciBhbmQgdGhhdCBjYW4gcmFjZSB3aXRoIGJsa19j
bGVhbnVwX3F1ZXVlKCkgd291bGQgYmUgdG9vIGN1bWJlcnNvbWUgSQ0Kc2VlIG9ubHkgb25lIHNv
bHV0aW9uLCBuYW1lbHkgbWFraW5nIHRoZSByZXF1ZXN0IHF1ZXVlIGNncm91cCBhbmQgc3lzZnMN
CmF0dHJpYnV0ZXMgaW52aXNpYmxlIGJlZm9yZSB0aGUgcXVldWVfbG9jayBwb2ludGVyIGlzIHJl
c3RvcmVkLiBMZWF2aW5nIHRoZQ0KZGVidWdmcyBhdHRyaWJ1dGVzIHZpc2libGUgd2hpbGUgYmxr
X2NsZWFudXBfcXVldWUoKSBpcyBpbiBwcm9ncmVzcyBzaG91bGQNCmJlIGZpbmUgaWYgdGhlIHJl
cXVlc3QgcXVldWUgaW5pdGlhbGl6YXRpb24gY29kZSBpcyBtb2RpZmllZCBzdWNoIHRoYXQgaXQN
Cm9ubHkgbW9kaWZpZXMgdGhlIHF1ZXVlX2xvY2sgcG9pbnRlciBmb3IgbGVnYWN5IHF1ZXVlcy4g
SmFuLCBJIHRoaW5rIHNvbWUNCnRpbWUgYWdvIHlvdSBvYmplY3RlZCB3aGVuIEkgcHJvcG9zZWQg
dG8gbW92ZSBjb2RlIGZyb20gX19ibGtfcmVsZWFzZV9xdWV1ZSgpDQppbnRvIGJsa19jbGVhbnVw
X3F1ZXVlKCkuIFdvdWxkIHlvdSBiZSBmaW5lIHdpdGggYSBzbGlnaHRseSBkaWZmZXJlbnQNCmFw
cHJvYWNoLCBuYW1lbHkgbWFraW5nIGJsb2NrIGNncm91cCBhbmQgc3lzZnMgYXR0cmlidXRlcyBp
bnZpc2libGUgZWFybGllciwNCm5hbWVseSBmcm9tIGluc2lkZSBibGtfY2xlYW51cF9xdWV1ZSgp
IGluc3RlYWQgb2YgZnJvbSBpbnNpZGUNCl9fYmxrX3JlbGVhc2VfcXVldWUoKT8NCg0KVGhhbmtz
LA0KDQpCYXJ0Lg0KDQo=

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 2/2] block: Fix a race between the throttling code and request queue initialization
  2018-02-05 17:58             ` Bart Van Assche
@ 2018-02-07 11:54               ` Jan Kara
  0 siblings, 0 replies; 11+ messages in thread
From: Jan Kara @ 2018-02-07 11:54 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: jack@suse.cz, joseph.qi@linux.alibaba.com, axboe@kernel.dk,
	hch@lst.de, linux-block@vger.kernel.org

On Mon 05-02-18 17:58:12, Bart Van Assche wrote:
> On Sat, 2018-02-03 at 10:51 +0800, Joseph Qi wrote:
> > Hi Bart,
> > 
> > On 18/2/3 00:21, Bart Van Assche wrote:
> > > On Fri, 2018-02-02 at 09:02 +0800, Joseph Qi wrote:
> > > > We triggered this race when using single queue. I'm not sure if it
> > > > exists in multi-queue.
> > > 
> > > Regarding the races between modifying the queue_lock pointer and the code that
> > > uses that pointer, I think the following construct in blk_cleanup_queue() is
> > > sufficient to avoid races between the queue_lock pointer assignment and the code
> > > that executes concurrently with blk_cleanup_queue():
> > > 
> > > 	spin_lock_irq(lock);
> > > 	if (q->queue_lock != &q->__queue_lock)
> > > 		q->queue_lock = &q->__queue_lock;
> > > 	spin_unlock_irq(lock);
> > > 
> > 
> > IMO, the race also exists.
> > 
> > blk_cleanup_queue                   blkcg_print_blkgs
> >   spin_lock_irq(lock) (1)           spin_lock_irq(blkg->q->queue_lock) (2,5)
> >     q->queue_lock = &q->__queue_lock (3)
> >   spin_unlock_irq(lock) (4)
> >                                     spin_unlock_irq(blkg->q->queue_lock) (6)
> > 
> > (1) take driver lock;
> > (2) busy loop for driver lock;
> > (3) override driver lock with internal lock;
> > (4) unlock driver lock; 
> > (5) can take driver lock now;
> > (6) but unlock internal lock.
> > 
> > If we get blkg->q->queue_lock to local first like blk_cleanup_queue,
> > it indeed can fix the different lock use in lock/unlock. But since
> > blk_cleanup_queue has overridden queue lock to internal lock now, I'm
> > afraid we couldn't still use driver lock in blkcg_print_blkgs.
> 
> (+ Jan Kara)
> 
> Hello Joseph,
> 
> That's a good catch. Since modifying all code that accesses the queue_lock
> pointer and that can race with blk_cleanup_queue() would be too cumbersome I
> see only one solution, namely making the request queue cgroup and sysfs
> attributes invisible before the queue_lock pointer is restored. Leaving the
> debugfs attributes visible while blk_cleanup_queue() is in progress should
> be fine if the request queue initialization code is modified such that it
> only modifies the queue_lock pointer for legacy queues. Jan, I think some
> time ago you objected when I proposed to move code from __blk_release_queue()
> into blk_cleanup_queue(). Would you be fine with a slightly different
> approach, namely making block cgroup and sysfs attributes invisible earlier,
> namely from inside blk_cleanup_queue() instead of from inside
> __blk_release_queue()?

Making attributes invisible earlier should be fine. But this whole
switching of queue_lock in blk_cleanup_queue() looks error-prone to me.
Generally anyone having access to request_queue can have old value of
q->queue_lock in its CPU caches and can happily use that value after
blk_cleanup_queue() finishes and the driver specific structure storing the
lock is freed. blkcg_print_blkgs() is one such example but I wouldn't bet a
penny that there are no other paths with a similar problem.

Logically, the lifetime of storage for q->queue_lock should be at least as
long as that of the request_queue itself - i.e., released only after
__blk_release_queue(). Otherwise all users of q->queue_lock need a proper
synchronization against lock switch in blk_cleanup_queue(). Either of these
looks like a lot of work. I guess since this involves only a legacy path,
your approach to move removal sysfs attributes earlier might be a
reasonable band aid.

								Honza
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2018-02-07 11:54 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-01-31 23:52 [PATCH v2 0/2] block: Fix a race between the throttling code and request queue initialization Bart Van Assche
2018-01-31 23:52 ` [PATCH v2 1/2] block: Add a third argument to blk_alloc_queue_node() Bart Van Assche
2018-01-31 23:53 ` [PATCH v2 2/2] block: Fix a race between the throttling code and request queue initialization Bart Van Assche
2018-02-01  1:53   ` Joseph Qi
2018-02-01 16:16     ` Bart Van Assche
2018-02-02  1:02       ` Joseph Qi
2018-02-02 14:52         ` Jens Axboe
2018-02-02 16:21         ` Bart Van Assche
2018-02-03  2:51           ` Joseph Qi
2018-02-05 17:58             ` Bart Van Assche
2018-02-07 11:54               ` Jan Kara

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox