Linux block layer
 help / color / mirror / Atom feed
* [GIT PULL 11/19] lightnvm: allow to init targets on factory mode
From: Matias Bjørling @ 2017-04-15 18:55 UTC (permalink / raw)
  To: axboe
  Cc: linux-block, linux-kernel, Javier González,
	Javier González, Matias Bjørling
In-Reply-To: <20170415185553.16098-1-matias@cnexlabs.com>

From: Javier González <jg@lightnvm.io>

Target initialization has two responsibilities: creating the target
partition and instantiating the target. This patch enables to create a
factory partition (e.g., do not trigger recovery on the given target).
This is useful for target development and for being able to restore the
device state at any moment in time without requiring a full-device
erase.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <matias@cnexlabs.com>
---
 drivers/lightnvm/core.c       | 14 +++++++++++---
 drivers/lightnvm/rrpc.c       |  3 ++-
 include/linux/lightnvm.h      |  3 ++-
 include/uapi/linux/lightnvm.h |  4 ++++
 4 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 5f84d2a..a63b563 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -280,7 +280,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
 	tdisk->fops = &nvm_fops;
 	tdisk->queue = tqueue;
 
-	targetdata = tt->init(tgt_dev, tdisk);
+	targetdata = tt->init(tgt_dev, tdisk, create->flags);
 	if (IS_ERR(targetdata))
 		goto err_init;
 
@@ -1244,8 +1244,16 @@ static long nvm_ioctl_dev_create(struct file *file, void __user *arg)
 	create.tgtname[DISK_NAME_LEN - 1] = '\0';
 
 	if (create.flags != 0) {
-		pr_err("nvm: no flags supported\n");
-		return -EINVAL;
+		__u32 flags = create.flags;
+
+		/* Check for valid flags */
+		if (flags & NVM_TARGET_FACTORY)
+			flags &= ~NVM_TARGET_FACTORY;
+
+		if (flags) {
+			pr_err("nvm: flag not supported\n");
+			return -EINVAL;
+		}
 	}
 
 	return __nvm_configure_create(&create);
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index a8acf9e..5dba544 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -1506,7 +1506,8 @@ static int rrpc_luns_configure(struct rrpc *rrpc)
 
 static struct nvm_tgt_type tt_rrpc;
 
-static void *rrpc_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk)
+static void *rrpc_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
+		       int flags)
 {
 	struct request_queue *bqueue = dev->q;
 	struct request_queue *tqueue = tdisk->queue;
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index eff7d1f..7dfa56e 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -436,7 +436,8 @@ static inline int ppa_cmp_blk(struct ppa_addr ppa1, struct ppa_addr ppa2)
 
 typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
 typedef sector_t (nvm_tgt_capacity_fn)(void *);
-typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *);
+typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *,
+				int flags);
 typedef void (nvm_tgt_exit_fn)(void *);
 typedef int (nvm_tgt_sysfs_init_fn)(struct gendisk *);
 typedef void (nvm_tgt_sysfs_exit_fn)(struct gendisk *);
diff --git a/include/uapi/linux/lightnvm.h b/include/uapi/linux/lightnvm.h
index fd19f36..c8aec4b 100644
--- a/include/uapi/linux/lightnvm.h
+++ b/include/uapi/linux/lightnvm.h
@@ -85,6 +85,10 @@ struct nvm_ioctl_create_conf {
 	};
 };
 
+enum {
+	NVM_TARGET_FACTORY = 1 << 0,	/* Init target in factory mode */
+};
+
 struct nvm_ioctl_create {
 	char dev[DISK_NAME_LEN];		/* open-channel SSD device */
 	char tgttype[NVM_TTYPE_NAME_MAX];	/* target type name */
-- 
2.9.3

^ permalink raw reply related

* [GIT PULL 12/19] lightnvm: make nvm_free static
From: Matias Bjørling @ 2017-04-15 18:55 UTC (permalink / raw)
  To: axboe
  Cc: linux-block, linux-kernel, Javier González,
	Javier González, Matias Bjørling
In-Reply-To: <20170415185553.16098-1-matias@cnexlabs.com>

From: Javier González <jg@lightnvm.io>

Prefix the nvm_free static function with a missing static keyword.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <matias@cnexlabs.com>
---
 drivers/lightnvm/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index a63b563..eb9ab1a 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -999,7 +999,7 @@ static int nvm_core_init(struct nvm_dev *dev)
 	return ret;
 }
 
-void nvm_free(struct nvm_dev *dev)
+static void nvm_free(struct nvm_dev *dev)
 {
 	if (!dev)
 		return;
-- 
2.9.3

^ permalink raw reply related

* [GIT PULL 14/19] lightnvm: fix type checks on rrpc
From: Matias Bjørling @ 2017-04-15 18:55 UTC (permalink / raw)
  To: axboe
  Cc: linux-block, linux-kernel, Javier González,
	Javier González, Matias Bjørling
In-Reply-To: <20170415185553.16098-1-matias@cnexlabs.com>

From: Javier González <jg@lightnvm.io>

sector_t is always unsigned, therefore avoid < 0 checks on it.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <matias@cnexlabs.com>
---
 drivers/lightnvm/rrpc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index 5dba544..cf0e28a 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -817,7 +817,7 @@ static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
 
 	for (i = 0; i < npages; i++) {
 		/* We assume that mapping occurs at 4KB granularity */
-		BUG_ON(!(laddr + i >= 0 && laddr + i < rrpc->nr_sects));
+		BUG_ON(!(laddr + i < rrpc->nr_sects));
 		gp = &rrpc->trans_map[laddr + i];
 
 		if (gp->rblk) {
@@ -846,7 +846,7 @@ static int rrpc_read_rq(struct rrpc *rrpc, struct bio *bio, struct nvm_rq *rqd,
 	if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd))
 		return NVM_IO_REQUEUE;
 
-	BUG_ON(!(laddr >= 0 && laddr < rrpc->nr_sects));
+	BUG_ON(!(laddr < rrpc->nr_sects));
 	gp = &rrpc->trans_map[laddr];
 
 	if (gp->rblk) {
-- 
2.9.3

^ permalink raw reply related

* [GIT PULL 15/19] lightnvm: convert sprintf into strlcpy
From: Matias Bjørling @ 2017-04-15 18:55 UTC (permalink / raw)
  To: axboe
  Cc: linux-block, linux-kernel, Javier González,
	Javier González, Matias Bjørling
In-Reply-To: <20170415185553.16098-1-matias@cnexlabs.com>

From: Javier González <jg@lightnvm.io>

Convert sprintf calls to strlcpy in order to make possible buffer
overflow more obvious.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <matias@cnexlabs.com>
---
 drivers/lightnvm/core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 258007a..2c26af3 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -273,7 +273,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
 		goto err_disk;
 	blk_queue_make_request(tqueue, tt->make_rq);
 
-	sprintf(tdisk->disk_name, "%s", create->tgtname);
+	strlcpy(tdisk->disk_name, create->tgtname, sizeof(tdisk->disk_name));
 	tdisk->flags = GENHD_FL_EXT_DEVT;
 	tdisk->major = 0;
 	tdisk->first_minor = 0;
@@ -1198,13 +1198,13 @@ static long nvm_ioctl_get_devices(struct file *file, void __user *arg)
 	list_for_each_entry(dev, &nvm_devices, devices) {
 		struct nvm_ioctl_device_info *info = &devices->info[i];
 
-		sprintf(info->devname, "%s", dev->name);
+		strlcpy(info->devname, dev->name, sizeof(info->devname));
 
 		/* kept for compatibility */
 		info->bmversion[0] = 1;
 		info->bmversion[1] = 0;
 		info->bmversion[2] = 0;
-		sprintf(info->bmname, "%s", "gennvm");
+		strlcpy(info->bmname, "gennvm", sizeof(info->bmname));
 		i++;
 
 		if (i > 31) {
-- 
2.9.3

^ permalink raw reply related

* [GIT PULL 13/19] lightnvm: clean unused variable
From: Matias Bjørling @ 2017-04-15 18:55 UTC (permalink / raw)
  To: axboe
  Cc: linux-block, linux-kernel, Javier González,
	Javier González, Matias Bjørling
In-Reply-To: <20170415185553.16098-1-matias@cnexlabs.com>

From: Javier González <jg@lightnvm.io>

Clean unused variable on lightnvm core.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <matias@cnexlabs.com>
---
 drivers/lightnvm/core.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index eb9ab1a..258007a 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -501,7 +501,6 @@ void nvm_part_to_tgt(struct nvm_dev *dev, sector_t *entries,
 		int *lun_roffs;
 		struct ppa_addr gaddr;
 		u64 pba = le64_to_cpu(entries[i]);
-		int off;
 		u64 diff;
 
 		if (!pba)
@@ -511,8 +510,6 @@ void nvm_part_to_tgt(struct nvm_dev *dev, sector_t *entries,
 		ch_rmap = &dev_rmap->chnls[gaddr.g.ch];
 		lun_roffs = ch_rmap->lun_offs;
 
-		off = gaddr.g.ch * geo->luns_per_chnl + gaddr.g.lun;
-
 		diff = ((ch_rmap->ch_off * geo->luns_per_chnl) +
 				(lun_roffs[gaddr.g.lun])) * geo->sec_per_lun;
 
-- 
2.9.3

^ permalink raw reply related

* [GIT PULL 17/19] lightnvm: pblk-gc: fix an error pointer dereference in init
From: Matias Bjørling @ 2017-04-15 18:55 UTC (permalink / raw)
  To: axboe; +Cc: linux-block, linux-kernel, Dan Carpenter, Matias Bjørling
In-Reply-To: <20170415185553.16098-1-matias@cnexlabs.com>

From: Dan Carpenter <dan.carpenter@oracle.com>

These labels are reversed so we could end up dereferencing an error
pointer or leaking.

Fixes: 7f347ba6bb3a ("lightnvm: physical block device (pblk) target")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Matias Bjørling <matias@cnexlabs.com>
---
 drivers/lightnvm/pblk-gc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
index 9b147cf..f173fd4 100644
--- a/drivers/lightnvm/pblk-gc.c
+++ b/drivers/lightnvm/pblk-gc.c
@@ -527,10 +527,10 @@ int pblk_gc_init(struct pblk *pblk)
 
 	return 0;
 
-fail_free_main_kthread:
-	kthread_stop(gc->gc_ts);
 fail_free_writer_kthread:
 	kthread_stop(gc->gc_writer_ts);
+fail_free_main_kthread:
+	kthread_stop(gc->gc_ts);
 
 	return ret;
 }
-- 
2.9.3

^ permalink raw reply related

* [GIT PULL 19/19] lightnvm: fix some error code in pblk-init.c
From: Matias Bjørling @ 2017-04-15 18:55 UTC (permalink / raw)
  To: axboe; +Cc: linux-block, linux-kernel, Dan Carpenter, Matias Bjørling
In-Reply-To: <20170415185553.16098-1-matias@cnexlabs.com>

From: Dan Carpenter <dan.carpenter@oracle.com>

There were a bunch of places in pblk_lines_init() where we didn't set an
error code.  And in pblk_writer_init() we accidentally return 1 instead
of a correct error code, which would result in a Oops later.

Fixes: 11a5d6fdf919 ("lightnvm: physical block device (pblk) target")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Matias Bjørling <matias@cnexlabs.com>
---
 drivers/lightnvm/pblk-init.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index 94653b1..3996e4b 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -543,7 +543,7 @@ static int pblk_lines_init(struct pblk *pblk)
 	long nr_bad_blks, nr_meta_blks, nr_free_blks;
 	int bb_distance;
 	int i;
-	int ret = 0;
+	int ret;
 
 	lm->sec_per_line = geo->sec_per_blk * geo->nr_luns;
 	lm->blk_per_line = geo->nr_luns;
@@ -638,12 +638,16 @@ static int pblk_lines_init(struct pblk *pblk)
 	}
 
 	l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
-	if (!l_mg->bb_template)
+	if (!l_mg->bb_template) {
+		ret = -ENOMEM;
 		goto fail_free_meta;
+	}
 
 	l_mg->bb_aux = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
-	if (!l_mg->bb_aux)
+	if (!l_mg->bb_aux) {
+		ret = -ENOMEM;
 		goto fail_free_bb_template;
+	}
 
 	bb_distance = (geo->nr_luns) * geo->sec_per_pl;
 	for (i = 0; i < lm->sec_per_line; i += bb_distance)
@@ -667,8 +671,10 @@ static int pblk_lines_init(struct pblk *pblk)
 
 	pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line),
 								GFP_KERNEL);
-	if (!pblk->lines)
+	if (!pblk->lines) {
+		ret = -ENOMEM;
 		goto fail_free_bb_aux;
+	}
 
 	nr_free_blks = 0;
 	for (i = 0; i < l_mg->nr_lines; i++) {
@@ -682,8 +688,10 @@ static int pblk_lines_init(struct pblk *pblk)
 		spin_lock_init(&line->lock);
 
 		nr_bad_blks = pblk_bb_line(pblk, line);
-		if (nr_bad_blks < 0 || nr_bad_blks > lm->blk_per_line)
+		if (nr_bad_blks < 0 || nr_bad_blks > lm->blk_per_line) {
+			ret = -EINVAL;
 			goto fail_free_lines;
+		}
 
 		line->blk_in_line = lm->blk_per_line - nr_bad_blks;
 		if (line->blk_in_line < lm->min_blk_line) {
@@ -733,7 +741,7 @@ static int pblk_writer_init(struct pblk *pblk)
 	pblk->writer_ts = kthread_create(pblk_write_ts, pblk, "pblk-writer-t");
 	if (IS_ERR(pblk->writer_ts)) {
 		pr_err("pblk: could not allocate writer kthread\n");
-		return 1;
+		return PTR_ERR(pblk->writer_ts);
 	}
 
 	return 0;
-- 
2.9.3

^ permalink raw reply related

* [GIT PULL 18/19] lightnvm: fix some WARN() messages
From: Matias Bjørling @ 2017-04-15 18:55 UTC (permalink / raw)
  To: axboe; +Cc: linux-block, linux-kernel, Dan Carpenter, Matias Bjørling
In-Reply-To: <20170415185553.16098-1-matias@cnexlabs.com>

From: Dan Carpenter <dan.carpenter@oracle.com>

WARN_ON() takes a condition, not an error message.  I slightly tweaked
some conditions so hopefully it's more clear.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Matias Bjørling <matias@cnexlabs.com>
---
 drivers/lightnvm/pblk-read.c     | 12 ++++++------
 drivers/lightnvm/pblk-recovery.c |  2 +-
 drivers/lightnvm/pblk-write.c    |  2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index eff0982..bce7ed5 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -49,8 +49,8 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
 	int i, j = 0;
 
 	/* logic error: lba out-of-bounds. Ignore read request */
-	if (!(blba + nr_secs < pblk->rl.nr_secs)) {
-		WARN_ON("pblk: read lbas out of bounds\n");
+	if (blba + nr_secs >= pblk->rl.nr_secs) {
+		WARN(1, "pblk: read lbas out of bounds\n");
 		return;
 	}
 
@@ -254,8 +254,8 @@ static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd,
 	sector_t lba = pblk_get_lba(bio);
 
 	/* logic error: lba out-of-bounds. Ignore read request */
-	if (!(lba < pblk->rl.nr_secs)) {
-		WARN_ON("pblk: read lba out of bounds\n");
+	if (lba >= pblk->rl.nr_secs) {
+		WARN(1, "pblk: read lba out of bounds\n");
 		return;
 	}
 
@@ -411,8 +411,8 @@ static int read_rq_gc(struct pblk *pblk, struct nvm_rq *rqd,
 	int valid_secs = 0;
 
 	/* logic error: lba out-of-bounds */
-	if (!(lba < pblk->rl.nr_secs)) {
-		WARN_ON("pblk: read lba out of bounds\n");
+	if (lba >= pblk->rl.nr_secs) {
+		WARN(1, "pblk: read lba out of bounds\n");
 		goto out;
 	}
 
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index 0d50f41..f8f8508 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -167,7 +167,7 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
 		if (le64_to_cpu(lba_list[i]) == ADDR_EMPTY) {
 			spin_lock(&line->lock);
 			if (test_and_set_bit(i, line->invalid_bitmap))
-				WARN_ON_ONCE("pblk: rec. double invalidate:\n");
+				WARN_ONCE(1, "pblk: rec. double invalidate:\n");
 			else
 				line->vsc--;
 			spin_unlock(&line->lock);
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index ee57db9..74f7413 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -141,7 +141,7 @@ static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd)
 
 		/* Logic error */
 		if (bit > c_ctx->nr_valid) {
-			WARN_ON_ONCE("pblk: corrupted write request\n");
+			WARN_ONCE(1, "pblk: corrupted write request\n");
 			goto out;
 		}
 
-- 
2.9.3

^ permalink raw reply related

* RE: Outstanding MQ questions from MMC
From: Avri Altman @ 2017-04-15 19:24 UTC (permalink / raw)
  To: Linus Walleij
  Cc: Arnd Bergmann, Ulf Hansson, linux-mmc@vger.kernel.org,
	linux-block@vger.kernel.org, Jens Axboe, Christoph Hellwig,
	Adrian Hunter, Paolo Valente
In-Reply-To: <CACRpkdbstMEC-_JcEfEm1debQRPeYBDmTPm0L_jGFnrgRZg1Zw@mail.gmail.com>

WW91IGNhbiBzZWUgaG93IGl0J3MgZG9uZSBpbiBtbWNfYmxrX2lvY3RsX3JwbWJfY21kKCkuDQoN
ClRoZSBSUE1CIHByb3RvY29sIGRlZmluZXMgNiB0eXBlcyBvZiBhY2Nlc3NlczoNCkFjY2Vzc2Vz
IHRoYXQgcGVyZm9ybXMgcmVhZCBvcGVyYXRpb24gKHJlYWQgY291bnRlciwgcmVhZCBkYXRhLCBh
bmQgcmVhZCBjb25maWd1cmF0aW9uKSAtIHJlcXVpcmVzIHNlbmRpbmcgMiByZXF1ZXN0cy4gDQpB
Y2Nlc3NlcyB0aGF0IHBlcmZvcm1zIHdyaXRlIG9wZXJhdGlvbiAocHJvZ3JhbSBrZXksIHdyaXRl
IGRhdGEsIHdyaXRlIGNvbmZpZ3VyYXRpb24pIC0gcmVxdWlyZXMgc2VuZGluZyAzIHJlcXVlc3Rz
LA0KQnV0IHlvdSBtdXN0IGRvIHJlYWQgY291bnRlciBiZWZvcmVoYW5kIChhY2NlcHQgZnJvbSBw
cm9ncmFtIGtleSksIGhlbmNlIHRoZSA1IGRpZmZlcmVudCByZXF1ZXN0cy4NCg0KVGhlIHN0YW5k
YXJkIGRvZXMgbm90IGRlZmluZSBhICJzcGVjaWFsIiByZXF1ZXN0IHRoYXQgZG9lcyBpdCBhbGwg
aW4gb25jZSwgDQpidXQgZXhwZWN0cyBhIHByZS1kZWZpbmUgc2VyaWVzIG9mIGNtZDE4ICYgY21k
MjUgZm9yIGVhY2ggYWNjZXNzIHR5cGUsIA0KaW4gd2hpY2ggdGhlIHBheWxvYWQgYXJlIDUxMiBi
eXRlcyBmcmFtZXMgaW4gYSBwcmUtZGVmaW5lZCBzdHJ1Y3R1cmUuDQogIA0KQ2hlZXJzLA0KQXZy
aQ0KDQo+IC0tLS0tT3JpZ2luYWwgTWVzc2FnZS0tLS0tDQo+IEZyb206IExpbnVzIFdhbGxlaWog
W21haWx0bzpsaW51cy53YWxsZWlqQGxpbmFyby5vcmddDQo+IFNlbnQ6IFNhdHVyZGF5LCBBcHJp
bCAxNSwgMjAxNyA5OjM1IFBNDQo+IFRvOiBBdnJpIEFsdG1hbiA8QXZyaS5BbHRtYW5Ac2FuZGlz
ay5jb20+DQo+IENjOiBBcm5kIEJlcmdtYW5uIDxhcm5kQGFybmRiLmRlPjsgVWxmIEhhbnNzb24N
Cj4gPHVsZi5oYW5zc29uQGxpbmFyby5vcmc+OyBsaW51eC1tbWNAdmdlci5rZXJuZWwub3JnOyBs
aW51eC0NCj4gYmxvY2tAdmdlci5rZXJuZWwub3JnOyBKZW5zIEF4Ym9lIDxheGJvZUBrZXJuZWwu
ZGs+OyBDaHJpc3RvcGggSGVsbHdpZw0KPiA8aGNoQGxzdC5kZT47IEFkcmlhbiBIdW50ZXIgPGFk
cmlhbi5odW50ZXJAaW50ZWwuY29tPjsgUGFvbG8gVmFsZW50ZQ0KPiA8cGFvbG8udmFsZW50ZUBs
aW5hcm8ub3JnPg0KPiBTdWJqZWN0OiBSZTogT3V0c3RhbmRpbmcgTVEgcXVlc3Rpb25zIGZyb20g
TU1DDQo+IA0KPiBPbiBGcmksIEFwciAxNCwgMjAxNyBhdCA4OjQxIFBNLCBBdnJpIEFsdG1hbiA8
QXZyaS5BbHRtYW5Ac2FuZGlzay5jb20+DQo+IHdyb3RlOg0KPiA+IFtNZV0NCj4gPj4gMi4gVHVy
biBSUE1CIGFuZCBvdGhlciBpb2N0bCgpIE1NQyBvcGVyYXRpb25zIGludG8gbW1jX3F1ZXVlX3Jl
cQ0KPiA+PiAgICB0aGluZ3MgYW5kIGZ1bm5lbCB0aGVtIGludG8gdGhlIGJsb2NrIHNjaGVkdWxl
cg0KPiA+PiAgICB1c2luZyBSRVFfT1BfRFJWX0lOL09VVCByZXF1ZXN0cy4NCj4gPj4NCj4gPg0K
PiA+IEFjY2Vzc2luZyB0aGUgUlBNQiBpcyBkb25lIHZpYSBhIHN0cmFuZ2UgcHJvdG9jb2wsIGlu
IHdoaWNoIGVhY2ggYWNjZXNzIGlzDQo+IGNvbXByaXNlZCBvZiBzZXZlcmFsIHJlcXVlc3RzLg0K
PiA+IEZvciBleGFtcGxlLCB3cml0aW5nIHRvIHRoZSBSUE1CIHdpbGwgcmVxdWlyZSBzZW5kaW5n
IDUgZGlmZmVyZW50IHJlcXVlc3RzOg0KPiA+IDIgcmVxdWVzdHMgdG8gcmVhZCB0aGUgd3JpdGUg
Y291bnRlciwgYW5kIHRoZW4gMyBtb3JlIHJlcXVlc3RzIGZvciB0aGUNCj4gd3JpdGUgb3BlcmF0
aW9uIGl0c2VsZi4NCj4gPg0KPiA+IE9uY2UgdGhlIHNlcXVlbmNlIGhhcyBzdGFydGVkLCBpdCBz
aG91bGQgbm90IGdldCBpbnRlcmZlcmVkIGJ5IG90aGVyDQo+IHJlcXVlc3RzLCBvciB0aGUgb3Bl
cmF0aW9uIHdpbGwgZmFpbC4NCj4gDQo+IFNvIEkgZ3Vlc3MgY3VycmVudGx5IHNvbWV0aGluZyB0
YWtlcyBhIGhvc3QgbG9jayBhbmQgdGhlbiBwZXJmb3JtcyB0aGUNCj4gNSByZXF1ZXN0cy4NCj4g
DQo+IFRodXMgd2UgbmVlZCB0byBzZW5kIGEgc2luZ2xlIGN1c3RvbSByZXF1ZXN0IGNvbnRhaW5p
bmcgYSBsaXN0IG9mIDUgdGhpbmdzIHRvDQo+IGRvLCBhbmQgcmV0dXJuIGFmdGVyIHRoYXQuDQo+
IA0KPiBPciBkbyB5b3UgbWVhbiB0aGF0IHdlIHJldHVybiB0byB1c2Vyc3BhY2UgaW5iZXR3ZWVu
IHRoZXNlIGRpZmZlcmVudA0KPiByZXF1ZXN0cyBhbmQgdGhlIHNlcXVlbmNpbmcgaXMgZG9uZSBp
biB1c2Vyc3BhY2U/DQo+IA0KPiBJIGhvcGUgbm90IGJlY2F1c2UgdGhhdCBzb3VuZHMgZnJhZ2ls
ZSwgbGlrZSB1c2Vyc3BhY2UgY291bGQgY3Jhc2ggYW5kIGxlYXZlDQo+IHRoZSBob3N0IGxvY2sg
ZGFuZ2xpbmcgOi8NCj4gDQo+IFlvdXJzLA0KPiBMaW51cyBXYWxsZWlqDQo=

^ permalink raw reply

* [PATCH 4.9 26/31] blk-mq: Avoid memory reclaim when remapping queues
From: Greg Kroah-Hartman @ 2017-04-16  8:04 UTC (permalink / raw)
  To: linux-kernel
  Cc: Greg Kroah-Hartman, stable, Gabriel Krisman Bertazi, Brian King,
	Douglas Miller, linux-block, linux-scsi, Jens Axboe, Sumit Semwal
In-Reply-To: <20170416080221.808058771@linuxfoundation.org>

4.9-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com>

commit 36e1f3d107867b25c616c2fd294f5a1c9d4e5d09 upstream.

While stressing memory and IO at the same time we changed SMT settings,
we were able to consistently trigger deadlocks in the mm system, which
froze the entire machine.

I think that under memory stress conditions, the large allocations
performed by blk_mq_init_rq_map may trigger a reclaim, which stalls
waiting on the block layer remmaping completion, thus deadlocking the
system.  The trace below was collected after the machine stalled,
waiting for the hotplug event completion.

The simplest fix for this is to make allocations in this path
non-reclaimable, with GFP_NOIO.  With this patch, We couldn't hit the
issue anymore.

This should apply on top of Jens's for-next branch cleanly.

Changes since v1:
  - Use GFP_NOIO instead of GFP_NOWAIT.

 Call Trace:
[c000000f0160aaf0] [c000000f0160ab50] 0xc000000f0160ab50 (unreliable)
[c000000f0160acc0] [c000000000016624] __switch_to+0x2e4/0x430
[c000000f0160ad20] [c000000000b1a880] __schedule+0x310/0x9b0
[c000000f0160ae00] [c000000000b1af68] schedule+0x48/0xc0
[c000000f0160ae30] [c000000000b1b4b0] schedule_preempt_disabled+0x20/0x30
[c000000f0160ae50] [c000000000b1d4fc] __mutex_lock_slowpath+0xec/0x1f0
[c000000f0160aed0] [c000000000b1d678] mutex_lock+0x78/0xa0
[c000000f0160af00] [d000000019413cac] xfs_reclaim_inodes_ag+0x33c/0x380 [xfs]
[c000000f0160b0b0] [d000000019415164] xfs_reclaim_inodes_nr+0x54/0x70 [xfs]
[c000000f0160b0f0] [d0000000194297f8] xfs_fs_free_cached_objects+0x38/0x60 [xfs]
[c000000f0160b120] [c0000000003172c8] super_cache_scan+0x1f8/0x210
[c000000f0160b190] [c00000000026301c] shrink_slab.part.13+0x21c/0x4c0
[c000000f0160b2d0] [c000000000268088] shrink_zone+0x2d8/0x3c0
[c000000f0160b380] [c00000000026834c] do_try_to_free_pages+0x1dc/0x520
[c000000f0160b450] [c00000000026876c] try_to_free_pages+0xdc/0x250
[c000000f0160b4e0] [c000000000251978] __alloc_pages_nodemask+0x868/0x10d0
[c000000f0160b6f0] [c000000000567030] blk_mq_init_rq_map+0x160/0x380
[c000000f0160b7a0] [c00000000056758c] blk_mq_map_swqueue+0x33c/0x360
[c000000f0160b820] [c000000000567904] blk_mq_queue_reinit+0x64/0xb0
[c000000f0160b850] [c00000000056a16c] blk_mq_queue_reinit_notify+0x19c/0x250
[c000000f0160b8a0] [c0000000000f5d38] notifier_call_chain+0x98/0x100
[c000000f0160b8f0] [c0000000000c5fb0] __cpu_notify+0x70/0xe0
[c000000f0160b930] [c0000000000c63c4] notify_prepare+0x44/0xb0
[c000000f0160b9b0] [c0000000000c52f4] cpuhp_invoke_callback+0x84/0x250
[c000000f0160ba10] [c0000000000c570c] cpuhp_up_callbacks+0x5c/0x120
[c000000f0160ba60] [c0000000000c7cb8] _cpu_up+0xf8/0x1d0
[c000000f0160bac0] [c0000000000c7eb0] do_cpu_up+0x120/0x150
[c000000f0160bb40] [c0000000006fe024] cpu_subsys_online+0x64/0xe0
[c000000f0160bb90] [c0000000006f5124] device_online+0xb4/0x120
[c000000f0160bbd0] [c0000000006f5244] online_store+0xb4/0xc0
[c000000f0160bc20] [c0000000006f0a68] dev_attr_store+0x68/0xa0
[c000000f0160bc60] [c0000000003ccc30] sysfs_kf_write+0x80/0xb0
[c000000f0160bca0] [c0000000003cbabc] kernfs_fop_write+0x17c/0x250
[c000000f0160bcf0] [c00000000030fe6c] __vfs_write+0x6c/0x1e0
[c000000f0160bd90] [c000000000311490] vfs_write+0xd0/0x270
[c000000f0160bde0] [c0000000003131fc] SyS_write+0x6c/0x110
[c000000f0160be30] [c000000000009204] system_call+0x38/0xec

Signed-off-by: Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com>
Cc: Brian King <brking@linux.vnet.ibm.com>
Cc: Douglas Miller <dougmill@linux.vnet.ibm.com>
Cc: linux-block@vger.kernel.org
Cc: linux-scsi@vger.kernel.org
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 block/blk-mq.c |    6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1474,7 +1474,7 @@ static struct blk_mq_tags *blk_mq_init_r
 	INIT_LIST_HEAD(&tags->page_list);
 
 	tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *),
-				 GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
+				 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
 				 set->numa_node);
 	if (!tags->rqs) {
 		blk_mq_free_tags(tags);
@@ -1500,7 +1500,7 @@ static struct blk_mq_tags *blk_mq_init_r
 
 		do {
 			page = alloc_pages_node(set->numa_node,
-				GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO,
+				GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO,
 				this_order);
 			if (page)
 				break;
@@ -1521,7 +1521,7 @@ static struct blk_mq_tags *blk_mq_init_r
 		 * Allow kmemleak to scan these pages as they contain pointers
 		 * to additional allocations like via ops->init_request().
 		 */
-		kmemleak_alloc(p, order_to_size(this_order), 1, GFP_KERNEL);
+		kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO);
 		entries_per_page = order_to_size(this_order) / rq_size;
 		to_do = min(entries_per_page, set->queue_depth - i);
 		left -= to_do * rq_size;

^ permalink raw reply

* Re: [PATCH V3 00/16] Introduce the BFQ I/O scheduler
From: Heinz Diehl @ 2017-04-16  8:14 UTC (permalink / raw)
  To: linux-kernel
  Cc: Jens Axboe, Tejun Heo, Fabio Checconi, Arianna Avanzini,
	linux-block, ulf.hansson, linus.walleij, broonie, Paolo Valente
In-Reply-To: <20170411134315.44135-1-paolo.valente@linaro.org>

On 11.04.2017, Paolo Valente wrote: 

> new patch series, addressing (both) issues raised by Bart [1].

I'm doing a lot of automatic video transcoding in order to get my
collection of homemade videos down to an acceptable size (mainly
landscapes and boats all over the Norwegian west coast, taken with an old
cam that only produces uncompressed files). This process
involves heavy permanent writing to disk, often over a period of 10
min and more. When this happens, the whole system is kind of
unresponsive. I'm running Fedora 25, but with a self-customised kernel
that is fully low-latency, and the machine is a quadcore Intel Xeon
which should have enough power (Intel(R) Xeon(R) CPU E3-1241 v3 @
3.50GHz).

Using plain blk-mq, the system is very sluggish when there is heavy
disk writing, and it can take up to several minutes (up to the point
where the disk writing actually finishes) to start programs like gimp
or Libreoffice. In fact, when I click on the "applications" button
within XFCE, it can take a long time before the window even opens.
I played with deadline-mq too, and the situation remains the same
unless I do some heavy tuning like this:

echo "mq-deadline" > /sys/block/nvme0n1/queue/scheduler
echo "1" > /sys/block/nvme0n1/queue/iosched/fifo_batch
echo "4" > /sys/block/nvme0n1/queue/iosched/writes_starved
echo "100" > /sys/block/nvme0n1/queue/iosched/read_expire
echo "2000" > /sys/block/nvme0n1/queue/iosched/write_expire

With deadline-mq tuned like this, overall responsiveness is a little bit
better, but not nearly as good as when using bfq. With plain bfq, no
tuning is needed. The system is no longer sluggish. Any program starts
within seconds, and all is very much responsive. Max throughput isn't
important to me, the nvme "harddisk" is fast enough that some MB/s
more or less do not really matter.

[root@chiara ~]# lspci -v | grep -i nvme
01:00.0 Non-Volatile memory controller: Samsung Electronics Co Ltd NVMe
SSD Controller SM951/PM951 (rev 01) (prog-if 02 [NVM Express])
	Kernel driver in use: nvme
	Kernel modules: nvme

As an end-user with no relevant programming skills to be able to
contribute, I would wish that developers would combine their forces and
help Paolo to get bfq into the kernel and to make bfq even better.

Thanks,
 Heinz
 

^ permalink raw reply

* Re: [PATCH v4 6/6] dm rq: Avoid that request processing stalls sporadically
From: Ming Lei @ 2017-04-16 10:21 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: linux-scsi@vger.kernel.org, dm-devel@redhat.com,
	linux-block@vger.kernel.org, snitzer@redhat.com, axboe@kernel.dk
In-Reply-To: <1492189969.2644.9.camel@sandisk.com>

On Fri, Apr 14, 2017 at 05:12:50PM +0000, Bart Van Assche wrote:
> On Fri, 2017-04-14 at 09:13 +0800, Ming Lei wrote:
> > On Thu, Apr 13, 2017 at 09:59:57AM -0700, Bart Van Assche wrote:
> > > On 04/12/17 19:20, Ming Lei wrote:
> > > > On Wed, Apr 12, 2017 at 06:38:07PM +0000, Bart Van Assche wrote:
> > > > > If the blk-mq core would always rerun a hardware queue if a block driver
> > > > > returns BLK_MQ_RQ_QUEUE_BUSY then that would cause 100% of a single CPU core
> > > > 
> > > > It won't casue 100% CPU utilization since we restart queue in completion
> > > > path and at that time at least one tag is available, then progress can be
> > > > made.
> > > 
> > > Hello Ming,
> > > 
> > > Sorry but you are wrong. If .queue_rq() returns BLK_MQ_RQ_QUEUE_BUSY
> > > then it's likely that calling .queue_rq() again after only a few
> > > microseconds will cause it to return BLK_MQ_RQ_QUEUE_BUSY again. If you
> > > don't believe me, change "if (!blk_mq_sched_needs_restart(hctx) &&
> > > !test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state)) blk_mq_run_hw_queue(hctx,
> > > true);" into "blk_mq_run_hw_queue(hctx, true);", trigger a busy
> > 
> > Yes, that can be true, but I mean it is still OK to run the queue again
> > with
> > 
> > 	if (!blk_mq_sched_needs_restart(hctx) &&
> > 	    !test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state))
> > 			blk_mq_run_hw_queue(hctx, true);
> > 
> > and restarting queue in __blk_mq_finish_request() when
> > BLK_MQ_RQ_QUEUE_BUSY is returned from .queue_rq(). And both are in current
> > blk-mq implementation.
> > 
> > Then why do we need blk_mq_delay_run_hw_queue(hctx, 100/*ms*/) in dm?
> 
> Because if dm_mq_queue_rq() returns BLK_MQ_RQ_QUEUE_BUSY that there is no
> guarantee that __blk_mq_finish_request() will be called later on for the
> same queue. dm_mq_queue_rq() can e.g. return BLK_MQ_RQ_QUEUE_BUSY while no
> dm requests are in progress because the SCSI error handler is active for
> all underlying paths. See also scsi_lld_busy() and scsi_host_in_recovery().

OK, thanks Bart for the explanation.

Looks a very interesting BLK_MQ_RQ_QUEUE_BUSY case which isn't casued by
too many pending I/O, and will study more about this case.


Thanks,
Ming

^ permalink raw reply

* Re: [PATCH 0/4] blk-mq-sched: allow to use hw tag for sched
From: Ming Lei @ 2017-04-16 16:03 UTC (permalink / raw)
  To: Jens Axboe, linux-block; +Cc: Christoph Hellwig, Omar Sandoval, Jozef Mikovic
In-Reply-To: <20170415123825.32716-1-ming.lei@redhat.com>

On Sat, Apr 15, 2017 at 08:38:21PM +0800, Ming Lei wrote:
> The 1st patch enhances BLK_MQ_F_NO_SCHED so that we can't change/
> show available io schedulers on devices which don't support io
> scheduler.
> 
> The 2nd patch passes BLK_MQ_F_NO_SCHED for avoiding one regression
> on mtip32xx, which is introduced by blk-mq io scheduler.
> 
> The last two patches introduce BLK_MQ_F_SCHED_USE_HW_TAG so that
> we can allow to use hardware tag for scheduler, then mq-deadline
> can work well on mtip32xx. Even though other devices with enough
> hardware tag space can benefit from this feature too.
> 
> The 1st two patches aims on v4.11, and the last two are for
> v4.12.

Please ignore this patchset, and I will post another serial for
mtip32xx fix.

thanks,
Ming

^ permalink raw reply

* [GIT PULL] A few small fixes for 4.11-rc
From: Jens Axboe @ 2017-04-16 16:04 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-block@vger.kernel.org

Hi Linus,

Four small fixes. Three of them fix the same error in NVMe, in loop, fc,
and rdma respectively. The last one fix from Ming fixes a regression in
this series, where our bvec gap logic was wrong and causes an oops on
NVMe for certain conditions.

Please pull!


  git://git.kernel.dk/linux-block.git for-linus


----------------------------------------------------------------
Ming Lei (1):
      block: fix bio_will_gap() for first bvec with offset

Sagi Grimberg (3):
      nvme-loop: Fix sqsize wrong assignment based on ctrl MQES capability
      nvme-rdma: Fix sqsize wrong assignment based on ctrl MQES capability
      nvme-fc: Fix sqsize wrong assignment based on ctrl MQES capability

 drivers/nvme/host/fc.c     |  2 +-
 drivers/nvme/host/rdma.c   |  2 +-
 drivers/nvme/target/loop.c |  2 +-
 include/linux/blkdev.h     | 32 ++++++++++++++++++++++++++++----
 4 files changed, 31 insertions(+), 7 deletions(-)

-- 
Jens Axboe

^ permalink raw reply

* Kernel Oops: BUG: unable to handle kernel NULL pointer dereference at 0000000000000050; IP is at blk_mq_poll+0xa0/0x2e0
From: Stephen  Bates @ 2017-04-16 16:17 UTC (permalink / raw)
  To: linux-block@vger.kernel.org, linux-kernel@vger.kernel.org
  Cc: Jens Axboe, Damien.LeMoal@wdc.com

SGkgQWxsDQoNCkFzIHBhcnQgb2YgbXkgdGVzdGluZyBvZiBJTyBwb2xsaW5nIFsxXSBJIGFtIHNl
ZWluZyBhIE5VTEwgcG9pbnRlciBkZXJlZmVyZW5jZSBvb3BzIHRoYXQgc2VlbXMgdG8gaGF2ZSBi
ZWVuIGludHJvZHVjZWQgaW4gdGhlIHByZXBhcmF0aW9uIGZvciA0LjExLiBUaGUga2VybmVsIG9v
cHMgb3V0cHV0IGlzIGJlbG93IGFuZCB0aGlzIHNlZW1zIHRvIGJlIGR1ZSB0byBibGtfbXFfdGFn
X3RvX3JxIHJldHVybmluZyBOVUxMIGluIGJsa19tcV9wb2xsIGluIGJsay1tcS5jLiBJIGhhdmUg
bm90IGhhZCBhIGNoYW5jZSB0byBiaXNlY3QgdGhpcyBkb3duIHRvIGEgc2luZ2xlIGNvbW1pdCB5
ZXQgYnV0IHRoZSBzYW1lIHRlc3Qgd29ya3MgZmluZSBpbiA0LjEwIGJ1dCBub3QgaW4gNC4xMS1y
YzYuIEkgd2lsbCB0cnkgYW5kIGdldCBhIGJldHRlciBiaXNlY3QgYW5kIHNlbmQgb24gbW9yZSBp
bmZvcm1hdGlvbiB3aGVuIEkgZ2V0IGl0LiANCg0KSSBhbSBydW5uaW5nIHRoZSBmb2xsb3dpbmcg
c2ltcGxlIHNoZWxsIHNjcmlwdCB3aGljaCBhbHdheXMgdHJpZ2dlcnMgdGhlIE9vcHMuIE5vdGUg
dGhpcyBzY3JpcHQgbmVlZHMgc29tZSB0d2Vha2luZyB0byB3b3JrIG9uIDQuMTEgYW5kIGVhcmxp
ZXIgc2luY2UgdGhlIHNvbWUgdGhpbmdzIHdlcmUgbW92ZWQgZnJvbSBzeXNmcyB0byBkZWJ1Z2Zz
IGluIHRoZSA0LjExIGFuZCA0LjEyIHRyZWVzLg0KDQojIS9iaW4vYmFzaA0KQkxPQ0s9bnZtZTFu
MQ0KREVWPS9kZXYvJHtCTE9DS30NCg0KZWNobyAiVGVzdGluZyBwb2xsaW5nIG9uICR7REVWfSIN
Cg0KIyBEaXNwbGF5IHRoZSBpbml0YWwgcG9sbGluZyBzZXR0aW5ncyBmb3IgdGhpcyBkZXZpY2Uu
Li4NCg0KY2F0IC9zeXMvYmxvY2svJHtCTE9DS30vcXVldWUvaW9fcG9sbA0KY2F0IC9zeXMvYmxv
Y2svJHtCTE9DS30vcXVldWUvaW9fcG9sbF9kZWxheQ0KDQojIERpc3BsYXkgdGhlIHBvbGxpbmcg
cmVzdWx0cyBhbmQgc3RhdHMNCg0KY2F0IC9zeXMva2VybmVsL2RlYnVnL2Jsb2NrLyR7QkxPQ0t9
L21xL3BvbGxfc3RhdA0KY2F0IC9zeXMva2VybmVsL2RlYnVnL2Jsb2NrLyR7QkxPQ0t9L21xLzAv
aW9fcG9sbA0KDQojIE5vdyBkbyBzb21lIHBvbGxpbmcgSU8gYWdhaW5zdCB0aGUgYmxvY2sgZGV2
aWNlIGluIHF1ZXN0aW9uLg0KDQpmaW8gLS1maWxlbmFtZT0ke0RFVn0gLS1zaXplPTEwMCUgLS1u
dW1qb2JzPTEgLS1pb2RlcHRoPTEgXA0KICAgIC0tYnM9NGsgLS1udW1iZXJfaW9zPTFrIC0tcnVu
dGltZT02MCAtLWlvZW5naW5lPXB2c3luYzIgLS1oaXByaSBcDQogICAgLS1ydz1yYW5kcncgLS1y
YW5kb21fZ2VuZXJhdG9yPWxmc3IgLS1kaXJlY3Q9MSAtLWdyb3VwX3JlcG9ydGluZz0xIFwNCiAg
ICAtLXJ3bWl4cmVhZD0xMDAgLS1sb29wcz0xIC0tbmFtZSBwb2xsLmZpbw0KDQojIERpc3BsYXkg
dGhlIHBvbGxpbmcgcmVzdWx0cyBhbmQgc3RhdHMNCg0KY2F0IC9zeXMva2VybmVsL2RlYnVnL2Js
b2NrLyR7QkxPQ0t9L21xL3BvbGxfc3RhdA0KY2F0IC9zeXMva2VybmVsL2RlYnVnL2Jsb2NrLyR7
QkxPQ0t9L21xLzAvaW9fcG9sbA0KDQpbICAgMjYuMDI0NTI5XSBCVUc6IHVuYWJsZSB0byBoYW5k
bGUga2VybmVsIE5VTEwgcG9pbnRlciBkZXJlZmVyZW5jZSBhdCAwMDAwMDAwMDAwMDAwMDUwDQpb
ICAgMjYuMDI3MTY3XSBJUDogYmxrX21xX3BvbGwrMHhhMC8weDJlMA0KWyAgIDI2LjAyNzMyNl0g
UEdEIDdhZDUxMDY3IA0KWyAgIDI2LjAyNzU4NF0gUFVEIDdhZGJlMDY3IA0KWyAgIDI2LjAyODAw
Nl0gUE1EIDAgDQpbICAgMjYuMDI4MjM0XSANClsgICAyNi4wMjkzMTldIE9vcHM6IDAwMDAgWyMx
XSBTTVANClsgICAyNi4wMzA0MDVdIENQVTogMCBQSUQ6IDE0NzQgQ29tbTogZmlvIE5vdCB0YWlu
dGVkIDQuMTEuMC1yYzYgIzQyDQpbICAgMjYuMDMxNjc4XSBIYXJkd2FyZSBuYW1lOiBRRU1VIFN0
YW5kYXJkIFBDIChpNDQwRlggKyBQSUlYLCAxOTk2KSwgQklPUyByZWwtMS43LjUtMC1nZTUxNDg4
Yy0yMDE0MDYwMl8xNjQ2MTItbmlsc3Nvbi5ob21lLmtyYXhlbC5vcmcgMDQvMDEvMjAxNA0KWyAg
IDI2LjAzMzc0OV0gdGFzazogZmZmZjg4MDA3Y2Y3NWEwMCB0YXNrLnN0YWNrOiBmZmZmYzkwMDAw
NjljMDAwDQpbICAgMjYuMDM0NTc1XSBSSVA6IDAwMTA6YmxrX21xX3BvbGwrMHhhMC8weDJlMA0K
WyAgIDI2LjAzNTIzNF0gUlNQOiAwMDE4OmZmZmZjOTAwMDA2OWZhMzggRUZMQUdTOiAwMDAwMDIx
Ng0KWyAgIDI2LjAzNjI2OV0gUkFYOiBmZmZmODgwMDdjNWQ1MDAwIFJCWDogMDAwMDAwMDAwMDAw
MDAwMCBSQ1g6IDAwMDAwMDAwMDAwMDAwMDANClsgICAyNi4wMzczMzBdIFJEWDogZmZmZjg4MDA3
YzU2ZDFlOCBSU0k6IDAwMDAwMDAwMDAwMDAwYTcgUkRJOiAwMDAwMDAwMDAwMDAwMDAwDQpbICAg
MjYuMDM5Mjg1XSBSQlA6IGZmZmZjOTAwMDA2OWZhZDAgUjA4OiAwMDAwMDAwMDAwMDAwMDAwIFIw
OTogMDAwMDAwMDA4MDAwMDBhNw0KWyAgIDI2LjA0MDk2Ml0gUjEwOiBmZmZmYzkwMDAwNjlmYWEw
IFIxMTogMDAwMDAwMDAwMDAwMTAwMCBSMTI6IGZmZmY4ODAwN2M1NjFmZTANClsgICAyNi4wNDIw
MDVdIFIxMzogZmZmZmM5MDAwMDY5ZmQwMSBSMTQ6IGZmZmY4ODAwN2QzZjhhOTggUjE1OiBmZmZm
ODgwMDdjNzcyODAwDQpbICAgMjYuMDQzNzM0XSBGUzogIDAwMDA3Zjg4MDhkN2M1ODAoMDAwMCkg
R1M6ZmZmZjg4MDA3ZmMwMDAwMCgwMDAwKSBrbmxHUzowMDAwMDAwMDAwMDAwMDAwDQpbICAgMjYu
MDQ1MzY2XSBDUzogIDAwMTAgRFM6IDAwMDAgRVM6IDAwMDAgQ1IwOiAwMDAwMDAwMDgwMDUwMDMz
DQpbICAgMjYuMDQ3MjM5XSBDUjI6IDAwMDAwMDAwMDAwMDAwNTAgQ1IzOiAwMDAwMDAwMDdhZDRk
MDAwIENSNDogMDAwMDAwMDAwMDAwMDZmMA0KWyAgIDI2LjA0ODgwMV0gRFIwOiAwMDAwMDAwMDAw
MDAwMDAwIERSMTogMDAwMDAwMDAwMDAwMDAwMCBEUjI6IDAwMDAwMDAwMDAwMDAwMDANClsgICAy
Ni4wNDk4OThdIERSMzogMDAwMDAwMDAwMDAwMDAwMCBEUjY6IDAwMDAwMDAwMDAwMDAwMDAgRFI3
OiAwMDAwMDAwMDAwMDAwMDAwDQpbICAgMjYuMDUxMzAwXSBDYWxsIFRyYWNlOg0KWyAgIDI2LjA1
MjU4MF0gID8gZ2VuZXJpY19tYWtlX3JlcXVlc3QrMHhmYi8weDJhMA0KWyAgIDI2LjA1Mzg2OV0g
ID8gc3VibWl0X2JpbysweDY0LzB4MTIwDQpbICAgMjYuMDU0MTIzXSAgPyBzdWJtaXRfYmlvKzB4
NjQvMHgxMjANClsgICAyNi4wNTQ5ODldICBfX2Jsa2Rldl9kaXJlY3RfSU9fc2ltcGxlKzB4MWJj
LzB4MmYwDQpbICAgMjYuMDU1OTc4XSAgPyBfX2RfbG9va3VwX2RvbmUrMHg3OS8weGUwDQpbICAg
MjYuMDU3MDA5XSAgPyBibGtkZXZfZnN5bmMrMHg1MC8weDUwDQpbICAgMjYuMDU4MzgyXSAgYmxr
ZGV2X2RpcmVjdF9JTysweDM3ZC8weDM5MA0KWyAgIDI2LjA1OTMyMF0gID8gYmxrZGV2X2RpcmVj
dF9JTysweDM3ZC8weDM5MA0KWyAgIDI2LjA1OTkzOV0gIGdlbmVyaWNfZmlsZV9yZWFkX2l0ZXIr
MHgyYzIvMHg4YzANClsgICAyNi4wNjA0ODhdICA/IGdlbmVyaWNfZmlsZV9yZWFkX2l0ZXIrMHgy
YzIvMHg4YzANClsgICAyNi4wNjA2NTZdICA/IHBhdGhfb3BlbmF0KzB4NmU0LzB4MTMyMA0KWyAg
IDI2LjA2MDkxMF0gIGJsa2Rldl9yZWFkX2l0ZXIrMHgzNS8weDQwDQpbICAgMjYuMDYxNDMwXSAg
X19kb19yZWFkdl93cml0ZXYrMHgxZWYvMHgzYjANClsgICAyNi4wNjE2MDldICBkb19yZWFkdl93
cml0ZXYrMHg3ZC8weGIwDQpbICAgMjYuMDYyNjI5XSAgPyBoYW5kbGVfbW1fZmF1bHQrMHg4OC8w
eDE1MA0KWyAgIDI2LjA2Mjg5NV0gIHZmc19yZWFkdisweDM5LzB4NTANClsgICAyNi4wNjMyOTFd
ICA/IHZmc19yZWFkdisweDM5LzB4NTANClsgICAyNi4wNjM2NjZdICBkb19wcmVhZHYrMHhiMS8w
eGQwDQpbICAgMjYuMDYzOTc1XSAgU3lTX3ByZWFkdjIrMHgxNy8weDMwDQpbICAgMjYuMDY0MTc1
XSAgZW50cnlfU1lTQ0FMTF82NF9mYXN0cGF0aCsweDFhLzB4YTkNClsgICAyNi4wNjQ2ODJdIFJJ
UDogMDAzMzoweDdmODgwNjRlMzBmOQ0KWyAgIDI2LjA2NDg3NV0gUlNQOiAwMDJiOjAwMDA3ZmZm
NzRhMWI0ZjggRUZMQUdTOiAwMDAwMDIwMiBPUklHX1JBWDogMDAwMDAwMDAwMDAwMDE0Nw0KWyAg
IDI2LjA2NTUzOF0gUkFYOiBmZmZmZmZmZmZmZmZmZmRhIFJCWDogMDAwMDAwMDAwMjQxMzUwMCBS
Q1g6IDAwMDA3Zjg4MDY0ZTMwZjkNClsgICAyNi4wNjYyNzRdIFJEWDogMDAwMDAwMDAwMDAwMDAw
MSBSU0k6IDAwMDAwMDAwMDI0MGQ3NzAgUkRJOiAwMDAwMDAwMDAwMDAwMDAzDQpbICAgMjYuMDY2
NzAxXSBSQlA6IDAwMDA3Zjg3ZWViYjgwMDAgUjA4OiAwMDAwMDAwMDAwMDAwMDAwIFIwOTogMDAw
MDAwMDAwMDAwMDAwMQ0KWyAgIDI2LjA2Njg2OF0gUjEwOiAwMDAwMDAwMDI2NmZiMDAwIFIxMTog
MDAwMDAwMDAwMDAwMDIwMiBSMTI6IDAwMDAwMDAwMDAwMDAwMDANClsgICAyNi4wNjczODRdIFIx
MzogMDAwMDAwMDAwMDAwMTAwMCBSMTQ6IDAwMDAwMDAwMDI0MTM1MjggUjE1OiAwMDAwN2Y4N2Vl
YmI4MDAwDQpbICAgMjYuMDY3ODc3XSBDb2RlOiAwMSAwMCAwMCAwZiBiNyBmMyA0OCBjMSBlMCAy
MSA0OCBjMSBlOCAzMSA4NSBkYiA0YyA4YiAzYyBjMiAwZiA4OCBiNCAwMSAwMCAwMCA0OSA4YiA4
NyBmMCAwMCAwMCAwMCAzMSBkYiAzOSAzMCAwZiA4NyBiNCAwMSAwMCAwMCA8NDg+IDhiIDQzIDUw
IDRkIDhiIGI3IDgwIDAwIDAwIDAwIGE4IDA0IDBmIDg1IGU4IDAwIDAwIDAwIDQxIDhiIA0KWyAg
IDI2LjA3MzgyMV0gUklQOiBibGtfbXFfcG9sbCsweGEwLzB4MmUwIFJTUDogZmZmZmM5MDAwMDY5
ZmEzOA0KWyAgIDI2LjA3NDA3OF0gQ1IyOiAwMDAwMDAwMDAwMDAwMDUwDQpbICAgMjYuMDc2MTAx
XSAtLS1bIGVuZCB0cmFjZSA5Zjk1NjY0NTVjZDI3YzIyIF0tLS0NCg0KQ2hlZXJzDQogDQpTdGVw
aGVuDQoNClsxXSBodHRwOi8vbWFyYy5pbmZvLz9sPWxpbnV4LWJsb2NrJm09MTQ5MTU2Nzg1MjE1
OTE5Jnc9Mg0KDQoNCg==

^ permalink raw reply

* Re: [GIT PULL 00/19] LightNVM patches for 4.12.
From: Jens Axboe @ 2017-04-16 20:37 UTC (permalink / raw)
  To: Matias Bjørling; +Cc: linux-block, linux-kernel
In-Reply-To: <20170415185553.16098-1-matias@cnexlabs.com>

On 04/15/2017 12:55 PM, Matias Bjørling wrote:
> Hi Jens,
> 
> With this merge window, we like to push pblk upstream. It is a new
> host-side translation layer that implements support for exposing
> Open-Channel SSDs as block devices.
> 
> We have described pblk in the LightNVM paper "LightNVM: The Linux
> Open-Channel SSD Subsystem" that was accepted at FAST 2017. The paper
> defines open-channel SSDs, the subsystem, pblk and has an evaluation as
> well. Over the past couple of kernel versions we have shipped the
> support patches for pblk, and we are now comfortable pushing the core of
> pblk upstream.
> 
> The core contains the logic to control data placement and I/O scheduling
> on open-channel SSDs. Including implementation of translation table
> management, GC, recovery, rate-limiting, and similar components. It
> assumes that the SSD is media-agnostic, and runs on both 1.2 and 2.0 of
> the Open-Channel SSD specification without modifications.
> 
> I want to point out two neat features of pblk. First, pblk can be
> instantiated multiple times on the same SSD, enabling I/O isolation
> between tenants, and makes it able to fulfill strict QoS requirements.
> We showed results from this at the NVMW '17 workshop this year, while
> presenting the "Multi-Tenant I/O Isolation with Open-Channel SSDs" talk.
> Second, now that a full host-side translation layer is implemented, one
> can begin to optimize its data placement and I/O scheduling algorithms
> to match user workloads. We have shown a couple of the benefits in the
> LightNVM paper, and we know of a couple of companies and universities
> that have begun making new algorithms.
> 
> In detail, this pull request contains:
> 
>  - The new host-side FTL pblk from Javier, and other contributors.
> 
>  - Add support to the "create" ioctl to force a target to be
>    re-initialized at using "factory" flag from Javier.
> 
>  - Fix various errors in LightNVM core from Javier and me.
> 
>  - An optimization from Neil Brown to skip error checking on mempool
>    allocations that can sleep.
> 
>  - A buffer overflow fix in nvme_nvm_identify from Scott Bauer.
> 
>  - Fix for bad block discovery handle error handling from Christophe
>    Jaillet.
> 
>  - Fixes from Dan Carpenter to pblk after it went into linux-next.
> 
> Please pull from the for-jens branch or apply the patches posted with
> this mail:
> 
>    https://github.com/OpenChannelSSD/linux.git for-jens

Applied for 4.12, thanks Matias.

-- 
Jens Axboe

^ permalink raw reply

* [PATCH] nbd: set the max segment size to UINT_MAX
From: Josef Bacik @ 2017-04-17 13:44 UTC (permalink / raw)
  To: axboe, nbd-general, linux-block, kernel-team

NBD doesn't care about limiting the segment size, let the user push the
largest bio's they want.  This allows us to control the request size
solely through max_sectors_kb.

Signed-off-by: Josef Bacik <jbacik@fb.com>
---
 drivers/block/nbd.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index d220045..0f62d86 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1469,6 +1469,8 @@ static int nbd_dev_add(int index)
 	queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue);
 	disk->queue->limits.discard_granularity = 512;
 	blk_queue_max_discard_sectors(disk->queue, UINT_MAX);
+	blk_queue_max_segment_size(disk->queue, UINT_MAX);
+
 	disk->queue->limits.discard_zeroes_data = 0;
 	blk_queue_max_hw_sectors(disk->queue, 65536);
 	disk->queue->limits.max_sectors = 256;
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH] nbd: set the max segment size to UINT_MAX
From: Jens Axboe @ 2017-04-17 15:55 UTC (permalink / raw)
  To: Josef Bacik, axboe, nbd-general, linux-block, kernel-team
In-Reply-To: <1492436677-11209-1-git-send-email-jbacik@fb.com>

On 04/17/2017 07:44 AM, Josef Bacik wrote:
> NBD doesn't care about limiting the segment size, let the user push the
> largest bio's they want.  This allows us to control the request size
> solely through max_sectors_kb.

Doesn't apply to the 4.12 branch.

-- 
Jens Axboe

^ permalink raw reply

* Re: [PATCH] nbd: set the max segment size to UINT_MAX
From: Jens Axboe @ 2017-04-17 15:57 UTC (permalink / raw)
  To: Josef Bacik, axboe, nbd-general, linux-block, kernel-team
In-Reply-To: <cd50c670-3f2c-b075-1328-a670d810e175@fb.com>

On 04/17/2017 09:55 AM, Jens Axboe wrote:
> On 04/17/2017 07:44 AM, Josef Bacik wrote:
>> NBD doesn't care about limiting the segment size, let the user push the
>> largest bio's they want.  This allows us to control the request size
>> solely through max_sectors_kb.
> 
> Doesn't apply to the 4.12 branch.

Maybe it does on top of your previous series. I'll check.

-- 
Jens Axboe

^ permalink raw reply

* Re: [PATCH 00/12] nbd: Netlink interface and path failure enhancements
From: Jens Axboe @ 2017-04-17 15:59 UTC (permalink / raw)
  To: Josef Bacik, nbd-general, linux-block, kernel-team
In-Reply-To: <1491512527-4286-1-git-send-email-jbacik@fb.com>

On 04/06/2017 03:01 PM, Josef Bacik wrote:
> This patchset adds a new netlink configuration interface to NBD as well as a
> bunch of enhancments around path failures.  The patches provide the following
> enhancemnts to NBD
> 
>  - Netlink configuration interface that doesn't leave a userspace application
>    waiting in kernel space for the device to disconnect.
>  - Netlink reconfigure interface for adding re-connected sockets to replace dead
>    sockets.
>  - A flag to destroy the NBD device on disconnect, much like how mount -o loop
>    works.
>  - A status interface that currently will only report whether a device is
>    connected or not, but can be extended to include whatever in the future.
>  - A netlink multicast notification scheme to notify user space when there are
>    connection issues to allow for seamless reconnects.
>  - Dead link handling.  You can specify a dead link timeout and the NBD device
>    will pause IO for that timeout waiting to see if the connection can be
>    re-established.  This is helpful to allow for things like nbd server upgrades
>    where the whole server disappears for a short period of time.
> 
> These patches have been thorougly and continuously tested for about a month.
> I've been finding bugs in various places, but this batch has been solid for the
> last few days of testing, which include a constant disconnect/reconnect torture
> test.  Thanks,

Added for 4.12, thanks.

-- 
Jens Axboe

^ permalink raw reply

* Re: [PATCH] nbd: set the max segment size to UINT_MAX
From: Jens Axboe @ 2017-04-17 15:59 UTC (permalink / raw)
  To: Josef Bacik, axboe, nbd-general, linux-block, kernel-team
In-Reply-To: <3ae72da7-95d8-04bf-72ec-f1c1ed4be8c2@fb.com>

On 04/17/2017 09:57 AM, Jens Axboe wrote:
> On 04/17/2017 09:55 AM, Jens Axboe wrote:
>> On 04/17/2017 07:44 AM, Josef Bacik wrote:
>>> NBD doesn't care about limiting the segment size, let the user push the
>>> largest bio's they want.  This allows us to control the request size
>>> solely through max_sectors_kb.
>>
>> Doesn't apply to the 4.12 branch.
> 
> Maybe it does on top of your previous series. I'll check.

Nope, does not.

-- 
Jens Axboe

^ permalink raw reply

* Re: [PATCH 02/25] block: remove the blk_execute_rq return value
From: Jens Axboe @ 2017-04-17 16:01 UTC (permalink / raw)
  To: hch@lst.de, Bart Van Assche
  Cc: linux-block@vger.kernel.org, konrad.wilk@oracle.com,
	roger.pau@citrix.com, linux-scsi@vger.kernel.org,
	linux-nvme@lists.infradead.org, jbacik@fb.com,
	james.smart@broadcom.com, dm-devel@redhat.com
In-Reply-To: <20170414082231.GC24901@lst.de>

On 04/14/2017 02:22 AM, hch@lst.de wrote:
> On Thu, Apr 13, 2017 at 08:03:22PM +0000, Bart Van Assche wrote:
>> That blk_execute_rq() call can only be reached if a few lines above 0 was
>> assigned to the "error" variable. Since nfsd4_scsi_identify_device() returns
>> the value of the "error" variable I think -EIO should be assigned to that
>> variable before the "goto out_put_request" statement is reached.
> 
> You're right!  I'll fix it up.

Are you respinning this series for 4.12?

-- 
Jens Axboe

^ permalink raw reply

* [PATCH v2 0/8] blk-mq debugfs patches for kernel v4.12
From: Bart Van Assche @ 2017-04-17 16:56 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, Bart Van Assche

Hello Jens,

Please consider the eight patches in this series for kernel v4.12.
These patches improve blk-mq debugfs support.

Thanks,

Bart.

Changes compared to v1:
- Added two patches and replaced patch 1/6 such that debugfs
  attributes are now unregistered before freeing of a blk-mq queue
  starts instead of checking the "dead" queue flag.
- Changed "rq->cmd_flags ^ op" into "rq->cmd_flags & ~REQ_OP_MASK" as
  proposed by Omar.
- A seq_file pointer is now passed to the new queue_rq callback function
  instead of a fixed-size char buffer.

Bart Van Assche (8):
  blk-mq: Register <dev>/queue/mq after having registered <dev>/queue
  blk-mq: Let blk_mq_debugfs_register() look up the queue name
  blk-mq: Unregister debugfs attributes earlier
  blk-mq: Move the "state" debugfs attribute one level down
  blk-mq: Make blk_flags_show() callers append a newline character
  blk-mq: Show operation, cmd_flags and rq_flags names
  blk-mq: Add blk_mq_ops.show_rq()
  scsi: Implement blk_mq_ops.show_rq()

 block/blk-mq-debugfs.c  | 94 +++++++++++++++++++++++++++++++++++++++++--------
 block/blk-mq-sysfs.c    | 60 ++++++++++++++++---------------
 block/blk-mq.h          |  6 ++--
 block/blk-sysfs.c       |  9 +++--
 drivers/scsi/scsi_lib.c | 26 ++++++++++++++
 include/linux/blk-mq.h  |  6 ++++
 6 files changed, 151 insertions(+), 50 deletions(-)

-- 
2.12.2

^ permalink raw reply

* [PATCH v2 1/8] blk-mq: Register <dev>/queue/mq after having registered <dev>/queue
From: Bart Van Assche @ 2017-04-17 16:56 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, Bart Van Assche, Omar Sandoval, Hannes Reinecke
In-Reply-To: <20170417165651.12693-1-bart.vanassche@sandisk.com>

A later patch in this series will modify blk_mq_debugfs_register()
such that it uses q->kobj.parent to determine the name of a
request queue. Hence make sure that that pointer is initialized
before blk_mq_debugfs_register() is called. To avoid lock inversion,
protect sysfs / debugfs registration with the queue sysfs_lock
instead of the global mutex all_q_mutex.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Hannes Reinecke <hare@suse.com>
---
 block/blk-mq-sysfs.c | 37 ++++++++++++++++++++++++++++++-------
 block/blk-mq.h       |  1 +
 block/blk-sysfs.c    |  6 +++---
 3 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index d745ab81033a..dc547369c875 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -253,6 +253,8 @@ static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
 	struct blk_mq_hw_ctx *hctx;
 	int i;
 
+	lockdep_assert_held(&q->sysfs_lock);
+
 	queue_for_each_hw_ctx(q, hctx, i)
 		blk_mq_unregister_hctx(hctx);
 
@@ -267,9 +269,9 @@ static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
 
 void blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
 {
-	blk_mq_disable_hotplug();
+	mutex_lock(&q->sysfs_lock);
 	__blk_mq_unregister_dev(dev, q);
-	blk_mq_enable_hotplug();
+	mutex_unlock(&q->sysfs_lock);
 }
 
 void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx)
@@ -302,12 +304,13 @@ void blk_mq_sysfs_init(struct request_queue *q)
 	}
 }
 
-int blk_mq_register_dev(struct device *dev, struct request_queue *q)
+int __blk_mq_register_dev(struct device *dev, struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
 	int ret, i;
 
-	blk_mq_disable_hotplug();
+	WARN_ON_ONCE(!q->kobj.parent);
+	lockdep_assert_held(&q->sysfs_lock);
 
 	ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq");
 	if (ret < 0)
@@ -327,8 +330,18 @@ int blk_mq_register_dev(struct device *dev, struct request_queue *q)
 		__blk_mq_unregister_dev(dev, q);
 	else
 		q->mq_sysfs_init_done = true;
+
 out:
-	blk_mq_enable_hotplug();
+	return ret;
+}
+
+int blk_mq_register_dev(struct device *dev, struct request_queue *q)
+{
+	int ret;
+
+	mutex_lock(&q->sysfs_lock);
+	ret = blk_mq_register_dev(dev, q);
+	mutex_unlock(&q->sysfs_lock);
 
 	return ret;
 }
@@ -339,13 +352,18 @@ void blk_mq_sysfs_unregister(struct request_queue *q)
 	struct blk_mq_hw_ctx *hctx;
 	int i;
 
+	mutex_lock(&q->sysfs_lock);
+
 	if (!q->mq_sysfs_init_done)
-		return;
+		goto unlock;
 
 	blk_mq_debugfs_unregister_hctxs(q);
 
 	queue_for_each_hw_ctx(q, hctx, i)
 		blk_mq_unregister_hctx(hctx);
+
+unlock:
+	mutex_unlock(&q->sysfs_lock);
 }
 
 int blk_mq_sysfs_register(struct request_queue *q)
@@ -353,8 +371,10 @@ int blk_mq_sysfs_register(struct request_queue *q)
 	struct blk_mq_hw_ctx *hctx;
 	int i, ret = 0;
 
+	mutex_lock(&q->sysfs_lock);
+
 	if (!q->mq_sysfs_init_done)
-		return ret;
+		goto unlock;
 
 	blk_mq_debugfs_register_hctxs(q);
 
@@ -364,5 +384,8 @@ int blk_mq_sysfs_register(struct request_queue *q)
 			break;
 	}
 
+unlock:
+	mutex_unlock(&q->sysfs_lock);
+
 	return ret;
 }
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 524f44742816..7d955c756810 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -78,6 +78,7 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
  */
 extern void blk_mq_sysfs_init(struct request_queue *q);
 extern void blk_mq_sysfs_deinit(struct request_queue *q);
+extern int __blk_mq_register_dev(struct device *dev, struct request_queue *q);
 extern int blk_mq_sysfs_register(struct request_queue *q);
 extern void blk_mq_sysfs_unregister(struct request_queue *q);
 extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index fc20489f0d2b..726ca28584dc 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -894,9 +894,6 @@ int blk_register_queue(struct gendisk *disk)
 	if (ret)
 		return ret;
 
-	if (q->mq_ops)
-		blk_mq_register_dev(dev, q);
-
 	/* Prevent changes through sysfs until registration is completed. */
 	mutex_lock(&q->sysfs_lock);
 
@@ -906,6 +903,9 @@ int blk_register_queue(struct gendisk *disk)
 		goto unlock;
 	}
 
+	if (q->mq_ops)
+		__blk_mq_register_dev(dev, q);
+
 	kobject_uevent(&q->kobj, KOBJ_ADD);
 
 	blk_wb_init(q);
-- 
2.12.2

^ permalink raw reply related

* [PATCH v2 2/8] blk-mq: Let blk_mq_debugfs_register() look up the queue name
From: Bart Van Assche @ 2017-04-17 16:56 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, Bart Van Assche, Omar Sandoval, Hannes Reinecke
In-Reply-To: <20170417165651.12693-1-bart.vanassche@sandisk.com>

A later patch will move the call of blk_mq_debugfs_register() to
a function to which the queue name is not passed as an argument.
To avoid having to add a 'name' argument to multiple callers, let
blk_mq_debugfs_register() look up the queue name.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Hannes Reinecke <hare@suse.com>
---
 block/blk-mq-debugfs.c | 5 +++--
 block/blk-mq-sysfs.c   | 2 +-
 block/blk-mq.h         | 5 ++---
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index df9b688b877c..2a5d6d83d57c 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -782,12 +782,13 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = {
 	{},
 };
 
-int blk_mq_debugfs_register(struct request_queue *q, const char *name)
+int blk_mq_debugfs_register(struct request_queue *q)
 {
 	if (!blk_debugfs_root)
 		return -ENOENT;
 
-	q->debugfs_dir = debugfs_create_dir(name, blk_debugfs_root);
+	q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent),
+					    blk_debugfs_root);
 	if (!q->debugfs_dir)
 		goto err;
 
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index dc547369c875..34a594470fc5 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -318,7 +318,7 @@ int __blk_mq_register_dev(struct device *dev, struct request_queue *q)
 
 	kobject_uevent(&q->mq_kobj, KOBJ_ADD);
 
-	blk_mq_debugfs_register(q, kobject_name(&dev->kobj));
+	blk_mq_debugfs_register(q);
 
 	queue_for_each_hw_ctx(q, hctx, i) {
 		ret = blk_mq_register_hctx(hctx);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 7d955c756810..9049c0f11505 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -87,13 +87,12 @@ extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
  * debugfs helpers
  */
 #ifdef CONFIG_BLK_DEBUG_FS
-int blk_mq_debugfs_register(struct request_queue *q, const char *name);
+int blk_mq_debugfs_register(struct request_queue *q);
 void blk_mq_debugfs_unregister(struct request_queue *q);
 int blk_mq_debugfs_register_hctxs(struct request_queue *q);
 void blk_mq_debugfs_unregister_hctxs(struct request_queue *q);
 #else
-static inline int blk_mq_debugfs_register(struct request_queue *q,
-					  const char *name)
+static inline int blk_mq_debugfs_register(struct request_queue *q)
 {
 	return 0;
 }
-- 
2.12.2

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox