linux-nvme.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
From: hch@lst.de (Christoph Hellwig)
Subject: [PATCH] nvme: take node locality into account when selecting a path
Date: Wed, 3 Oct 2018 14:43:48 +0200	[thread overview]
Message-ID: <20181003124348.GA10981@lst.de> (raw)
In-Reply-To: <a033c3a2-b825-4d9a-e844-adddea2e155f@suse.de>

Yes, something like that.  Let me know when this passes some basic
FC and RDMA testing and we can merge it.

On Wed, Oct 03, 2018@10:56:12AM +0200, Hannes Reinecke wrote:
> On 10/2/18 7:39 PM, Christoph Hellwig wrote:
>> On Tue, Oct 02, 2018@07:30:02PM +0200, Hannes Reinecke wrote:
>>>> Fair enough... I can follow up on that.
>>>>
>>> Something like this?
>>
>> As ?aid I'd rather avoid the indirect call if at all possible.
>>
>> Please either add a numa_id field to struct nvme_ctrl, or a
>> locality_dev or something.
>>
> Ah. So that should be more like it.
>
> Cheers,
>
> Hannes
>
>

> >From 478db61eab3f7a178a0c1f2e5c88c742cf5006ab Mon Sep 17 00:00:00 2001
> From: Hannes Reinecke <hare at suse.com>
> Date: Wed, 3 Oct 2018 10:53:05 +0200
> Subject: [PATCH] nvme: NUMA locality information for fabrics
> 
> Add a new field 'node_id' to the nvme_ctrl structure to hold the
> NUMA locality information of the underlying hardware.
> With that we can allocate the memory structures on the same NUMA
> node as the underlying hardware.
> 
> Signed-off-by: Hannes Reinecke <hare at suse.com>
> ---
>  drivers/nvme/host/core.c      | 2 +-
>  drivers/nvme/host/fc.c        | 5 +++--
>  drivers/nvme/host/multipath.c | 4 ++--
>  drivers/nvme/host/nvme.h      | 1 +
>  drivers/nvme/host/pci.c       | 1 +
>  drivers/nvme/host/rdma.c      | 6 +++---
>  6 files changed, 11 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 2db33a752e2b..0ec56e4916ea 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -3055,7 +3055,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
>  	struct gendisk *disk;
>  	struct nvme_id_ns *id;
>  	char disk_name[DISK_NAME_LEN];
> -	int node = dev_to_node(ctrl->dev), flags = GENHD_FL_EXT_DEVT;
> +	int node = ctrl->node_id, flags = GENHD_FL_EXT_DEVT;
>  
>  	ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
>  	if (!ns)
> diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
> index 9d201b35397d..7f246dd04bc5 100644
> --- a/drivers/nvme/host/fc.c
> +++ b/drivers/nvme/host/fc.c
> @@ -2422,7 +2422,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
>  	ctrl->tag_set.ops = &nvme_fc_mq_ops;
>  	ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
>  	ctrl->tag_set.reserved_tags = 1; /* fabric connect */
> -	ctrl->tag_set.numa_node = NUMA_NO_NODE;
> +	ctrl->tag_set.numa_node = ctrl->ctrl.node_id;
>  	ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
>  	ctrl->tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) +
>  					(SG_CHUNK_SIZE *
> @@ -2990,6 +2990,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
>  
>  	ctrl->ctrl.opts = opts;
>  	ctrl->ctrl.nr_reconnects = 0;
> +	ctrl->ctrl.node_id = dev_to_node(lport->dev);
>  	INIT_LIST_HEAD(&ctrl->ctrl_list);
>  	ctrl->lport = lport;
>  	ctrl->rport = rport;
> @@ -3028,7 +3029,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
>  	ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops;
>  	ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
>  	ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */
> -	ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
> +	ctrl->admin_tag_set.numa_node = ctrl->ctrl.node_id;
>  	ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) +
>  					(SG_CHUNK_SIZE *
>  						sizeof(struct scatterlist)) +
> diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
> index 31f29f97374b..2616251b3236 100644
> --- a/drivers/nvme/host/multipath.c
> +++ b/drivers/nvme/host/multipath.c
> @@ -141,7 +141,7 @@ static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
>  		    test_bit(NVME_NS_ANA_PENDING, &ns->flags))
>  			continue;
>  
> -		distance = node_distance(node, dev_to_node(ns->ctrl->dev));
> +		distance = node_distance(node, ns->ctrl->node_id);
>  
>  		switch (ns->ana_state) {
>  		case NVME_ANA_OPTIMIZED:
> @@ -276,7 +276,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
>  	if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath)
>  		return 0;
>  
> -	q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL);
> +	q = blk_alloc_queue_node(GFP_KERNEL, ctrl->node_id, NULL);
>  	if (!q)
>  		goto out;
>  	q->queuedata = head;
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index 9fefba039d1e..55347a547d84 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -153,6 +153,7 @@ struct nvme_ctrl {
>  	struct request_queue *connect_q;
>  	struct device *dev;
>  	int instance;
> +	int node_id;
>  	struct blk_mq_tag_set *tagset;
>  	struct blk_mq_tag_set *admin_tagset;
>  	struct list_head namespaces;
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index d668682f91df..b5d37aacf212 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -2517,6 +2517,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  
>  	dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
>  
> +	dev->ctrl.node_id = node;
>  	nvme_get_ctrl(&dev->ctrl);
>  	async_schedule(nvme_async_probe, dev);
>  
> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
> index dc042017c293..a64b02c13934 100644
> --- a/drivers/nvme/host/rdma.c
> +++ b/drivers/nvme/host/rdma.c
> @@ -686,7 +686,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
>  		set->ops = &nvme_rdma_admin_mq_ops;
>  		set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
>  		set->reserved_tags = 2; /* connect + keep-alive */
> -		set->numa_node = NUMA_NO_NODE;
> +		set->numa_node = nctrl->node_id;
>  		set->cmd_size = sizeof(struct nvme_rdma_request) +
>  			SG_CHUNK_SIZE * sizeof(struct scatterlist);
>  		set->driver_data = ctrl;
> @@ -699,7 +699,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
>  		set->ops = &nvme_rdma_mq_ops;
>  		set->queue_depth = nctrl->sqsize + 1;
>  		set->reserved_tags = 1; /* fabric connect */
> -		set->numa_node = NUMA_NO_NODE;
> +		set->numa_node = nctrl->node_id;
>  		set->flags = BLK_MQ_F_SHOULD_MERGE;
>  		set->cmd_size = sizeof(struct nvme_rdma_request) +
>  			SG_CHUNK_SIZE * sizeof(struct scatterlist);
> @@ -1975,7 +1975,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
>  	ctrl->ctrl.queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
>  	ctrl->ctrl.sqsize = opts->queue_size - 1;
>  	ctrl->ctrl.kato = opts->kato;
> -
> +	ctrl->ctrl.node_id = NUMA_NO_NODE;
>  	ret = -ENOMEM;
>  	ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues),
>  				GFP_KERNEL);
> -- 
> 2.13.7
> 

---end quoted text---

  reply	other threads:[~2018-10-03 12:43 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-27 23:05 [PATCH] nvme: take node locality into account when selecting a path Christoph Hellwig
2018-09-28 14:12 ` Keith Busch
2018-09-28 22:31 ` Sagi Grimberg
2018-09-30 23:01   ` Christoph Hellwig
2018-10-01 19:45     ` Sagi Grimberg
2018-10-02 17:30       ` Hannes Reinecke
2018-10-02 17:39         ` Christoph Hellwig
2018-10-03  8:56           ` Hannes Reinecke
2018-10-03 12:43             ` Christoph Hellwig [this message]
2018-10-04  1:30             ` Sagi Grimberg
2018-10-04 15:40               ` Hannes Reinecke
2018-09-29 12:09 ` Hannes Reinecke
2018-09-30 22:59   ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181003124348.GA10981@lst.de \
    --to=hch@lst.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).