From: Ross Zwisler <ross.zwisler@linux.intel.com>
To: Dave Jiang <dave.jiang@intel.com>
Cc: vinod.koul@intel.com, dmaengine@vger.kernel.org,
linux-nvdimm@lists.01.org
Subject: Re: [PATCH 4/5] libnvdimm: Adding blk-mq support to the pmem driver
Date: Tue, 1 Aug 2017 13:02:01 -0600 [thread overview]
Message-ID: <20170801190201.GD20061@linux.intel.com> (raw)
In-Reply-To: <150153988056.49768.18093169977231724535.stgit@djiang5-desk3.ch.intel.com>
On Mon, Jul 31, 2017 at 03:24:40PM -0700, Dave Jiang wrote:
> Adding blk-mq support to the pmem driver in addition to the direct bio
> support. This allows for hardware offloading via DMA engines. By default
> the bio method will be enabled. The blk-mq support can be turned on via
> module parameter queue_mode=1.
>
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
> ---
> drivers/nvdimm/pmem.c | 131 +++++++++++++++++++++++++++++++++++++++++++++----
> drivers/nvdimm/pmem.h | 3 +
> 2 files changed, 122 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
> index c544d46..347835b 100644
> --- a/drivers/nvdimm/pmem.c
> +++ b/drivers/nvdimm/pmem.c
> @@ -31,10 +31,24 @@
> #include <linux/pmem.h>
> #include <linux/dax.h>
> #include <linux/nd.h>
> +#include <linux/blk-mq.h>
> #include "pmem.h"
> #include "pfn.h"
> #include "nd.h"
>
> +enum {
> + PMEM_Q_BIO = 0,
> + PMEM_Q_MQ = 1,
> +};
> +
> +static int queue_mode = PMEM_Q_BIO;
> +module_param(queue_mode, int, 0444);
> +MODULE_PARM_DESC(queue_mode, "Pmem Queue Mode");
Maybe in the parameter description we can give the user a hint as to what the
settings are? i.e. "PMEM queue mode (0=BIO, 1=MQ+DMA)" or something?
> +
> +struct pmem_cmd {
> + struct request *rq;
> +};
> +
> static struct device *to_dev(struct pmem_device *pmem)
> {
> /*
> @@ -239,9 +253,12 @@ static const struct dax_operations pmem_dax_ops = {
> .direct_access = pmem_dax_direct_access,
> };
>
> -static void pmem_release_queue(void *q)
> +static void pmem_release_queue(void *pmem)
> {
> - blk_cleanup_queue(q);
> +
Unneeded whitespace. Also, a nit, can we maybe add a local pmem with type
"struct pmem_device *" so we don't have to cast twice?
> + blk_cleanup_queue(((struct pmem_device *)pmem)->q);
> + if (queue_mode == PMEM_Q_MQ)
> + blk_mq_free_tag_set(&((struct pmem_device *)pmem)->tag_set);
> }
>
> static void pmem_freeze_queue(void *q)
> @@ -259,6 +276,65 @@ static void pmem_release_disk(void *__pmem)
> put_disk(pmem->disk);
> }
>
> +static int pmem_handle_cmd(struct pmem_cmd *cmd)
> +{
> + struct request *req = cmd->rq;
> + struct request_queue *q = req->q;
> + struct pmem_device *pmem = q->queuedata;
> + struct nd_region *nd_region = to_region(pmem);
> + struct bio_vec bvec;
> + struct req_iterator iter;
> + int rc = 0;
> +
> + if (req->cmd_flags & REQ_FLUSH)
> + nvdimm_flush(nd_region);
> +
> + rq_for_each_segment(bvec, req, iter) {
> + rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len,
> + bvec.bv_offset, op_is_write(req_op(req)),
> + iter.iter.bi_sector);
> + if (rc < 0)
> + break;
> + }
> +
> + if (req->cmd_flags & REQ_FUA)
> + nvdimm_flush(nd_region);
> +
> + blk_mq_end_request(cmd->rq, rc);
> +
> + return 0;
return rc;
?
> +}
> +
> +static int pmem_queue_rq(struct blk_mq_hw_ctx *hctx,
> + const struct blk_mq_queue_data *bd)
> +{
> + struct pmem_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
> + int rc;
> +
> + cmd->rq = bd->rq;
> +
> + blk_mq_start_request(bd->rq);
> +
> + rc = pmem_handle_cmd(cmd);
> + if (rc < 0)
> + rc = BLK_MQ_RQ_QUEUE_ERROR;
> + else
> + rc = BLK_MQ_RQ_QUEUE_OK;
> +
> + return rc;
> +}
You can get rid of 'rc' altogether and simplify this to:
static int pmem_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct pmem_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
cmd->rq = bd->rq;
blk_mq_start_request(bd->rq);
if (pmem_handle_cmd(cmd) < 0)
return BLK_MQ_RQ_QUEUE_ERROR;
else
return BLK_MQ_RQ_QUEUE_OK;
}
> +static int pmem_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
> + unsigned int index)
> +{
> + return 0;
> +}
I'm pretty sure you don't need a dummy implementation of .init_hctx. All
callers check to make sure it's defined before calling, so if you just leave
the OP NULL it'll do the same thing. scsi_mq_ops for example does this.
> @@ -303,17 +380,47 @@ static int pmem_attach_disk(struct device *dev,
> return -EBUSY;
> }
>
> - q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev));
> - if (!q)
> - return -ENOMEM;
> + if (queue_mode == PMEM_Q_MQ) {
> + pmem->tag_set.ops = &pmem_mq_ops;
> + pmem->tag_set.nr_hw_queues = nr_online_nodes;
> + pmem->tag_set.queue_depth = 64;
> + pmem->tag_set.numa_node = dev_to_node(dev);
> + pmem->tag_set.cmd_size = sizeof(struct pmem_cmd);
> + pmem->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
> + pmem->tag_set.driver_data = pmem;
> +
> + rc = blk_mq_alloc_tag_set(&pmem->tag_set);
> + if (rc < 0)
> + return rc;
> +
> + pmem->q = blk_mq_init_queue(&pmem->tag_set);
> + if (IS_ERR(pmem->q)) {
> + blk_mq_free_tag_set(&pmem->tag_set);
> + return -ENOMEM;
> + }
>
> - if (devm_add_action_or_reset(dev, pmem_release_queue, q))
> - return -ENOMEM;
> + if (devm_add_action_or_reset(dev, pmem_release_queue, pmem)) {
> + blk_mq_free_tag_set(&pmem->tag_set);
> + return -ENOMEM;
> + }
> + } else if (queue_mode == PMEM_Q_BIO) {
> + pmem->q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev));
> + if (!pmem->q)
> + return -ENOMEM;
> +
> + if (devm_add_action_or_reset(dev, pmem_release_queue, pmem))
> + return -ENOMEM;
> +
> + blk_queue_make_request(pmem->q, pmem_make_request);
Later on in this function there is a big block that is still manipulating 'q'
instead of pmem->q:
blk_queue_write_cache(q, true, true);
blk_queue_make_request(q, pmem_make_request);
blk_queue_physical_block_size(q, PAGE_SIZE);
blk_queue_max_hw_sectors(q, UINT_MAX);
blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
queue_flag_set_unlocked(QUEUE_FLAG_DAX, q);
q->queuedata = pmem;
I'm pretty sure this all needs to be looking at pmem->q instead, and we may
just end up killing the local 'q'.
Also, this block duplicates the blk_queue_make_request() call, which we
manually make happen in the PMEM_Q_BIO case above. Not sure if this needs to
be common for both that and the PMEM_Q_MQ case, but we probably don't want it
in both places.
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm
next prev parent reply other threads:[~2017-08-01 19:00 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-07-31 22:24 [PATCH 0/5] Adding blk-mq and DMA support to pmem block driver Dave Jiang
2017-07-31 22:24 ` [PATCH 1/5] dmaengine: ioatdma: revert 7618d035 to allow sharing of DMA channels Dave Jiang
2017-07-31 22:24 ` [PATCH 2/5] dmaengine: ioatdma: dma_prep_memcpy_to/from_sg support Dave Jiang
2017-08-01 2:14 ` Dan Williams
2017-08-01 16:39 ` Dave Jiang
2017-08-02 4:57 ` Vinod Koul
2017-07-31 22:24 ` [PATCH 3/5] dmaengine: add SG support to dmaengine_unmap Dave Jiang
2017-07-31 22:24 ` [PATCH 4/5] libnvdimm: Adding blk-mq support to the pmem driver Dave Jiang
2017-08-01 19:02 ` Ross Zwisler [this message]
2017-07-31 22:24 ` [PATCH 5/5] libnvdimm: add DMA support for pmem blk-mq Dave Jiang
2017-08-01 7:34 ` Johannes Thumshirn
2017-08-01 16:40 ` Dave Jiang
2017-08-01 17:43 ` Dan Williams
2017-08-03 8:06 ` Johannes Thumshirn
2017-08-03 15:41 ` Dan Williams
2017-08-03 16:12 ` Dave Jiang
2017-08-03 16:15 ` Dan Williams
2017-08-04 6:07 ` Johannes Thumshirn
2017-08-04 15:47 ` Dan Williams
2017-08-01 20:42 ` Ross Zwisler
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170801190201.GD20061@linux.intel.com \
--to=ross.zwisler@linux.intel.com \
--cc=dave.jiang@intel.com \
--cc=dmaengine@vger.kernel.org \
--cc=linux-nvdimm@lists.01.org \
--cc=vinod.koul@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.