All of lore.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org,
	Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>,
	Bart Van Assche <bvanassche@acm.org>,
	Jason Gunthorpe <jgg@nvidia.com>, Yi Zhang <yi.zhang@redhat.com>
Subject: [PATCH 5.11 26/31] RDMA/srp: Fix support for unpopulated and unbalanced NUMA nodes
Date: Fri, 19 Mar 2021 13:19:20 +0100	[thread overview]
Message-ID: <20210319121748.047018321@linuxfoundation.org> (raw)
In-Reply-To: <20210319121747.203523570@linuxfoundation.org>

From: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>

commit 2b5715fc17386a6223490d5b8f08d031999b0c0b upstream.

The current code computes a number of channels per SRP target and spreads
them equally across all online NUMA nodes.  Each channel is then assigned
a CPU within this node.

In the case of unbalanced, or even unpopulated nodes, some channels do not
get a CPU associated and thus do not get connected.  This causes the SRP
connection to fail.

This patch solves the issue by rewriting channel computation and
allocation:

- Drop channel to node/CPU association as it had no real effect on
  locality but added unnecessary complexity.

- Tweak the number of channels allocated to reduce CPU contention when
  possible:
  - Up to one channel per CPU (instead of up to 4 by node)
  - At least 4 channels per node, unless ch_count module parameter is
    used.

Link: https://lore.kernel.org/r/9cb4d9d3-30ad-2276-7eff-e85f7ddfb411@suse.com
Signed-off-by: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Cc: Yi Zhang <yi.zhang@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/ulp/srp/ib_srp.c |  116 ++++++++++++++----------------------
 1 file changed, 48 insertions(+), 68 deletions(-)

--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -3628,7 +3628,7 @@ static ssize_t srp_create_target(struct
 	struct srp_rdma_ch *ch;
 	struct srp_device *srp_dev = host->srp_dev;
 	struct ib_device *ibdev = srp_dev->dev;
-	int ret, node_idx, node, cpu, i;
+	int ret, i, ch_idx;
 	unsigned int max_sectors_per_mr, mr_per_cmd = 0;
 	bool multich = false;
 	uint32_t max_iu_len;
@@ -3753,81 +3753,61 @@ static ssize_t srp_create_target(struct
 		goto out;
 
 	ret = -ENOMEM;
-	if (target->ch_count == 0)
+	if (target->ch_count == 0) {
 		target->ch_count =
-			max_t(unsigned int, num_online_nodes(),
-			      min(ch_count ?:
-					  min(4 * num_online_nodes(),
-					      ibdev->num_comp_vectors),
-				  num_online_cpus()));
+			min(ch_count ?:
+				max(4 * num_online_nodes(),
+				    ibdev->num_comp_vectors),
+				num_online_cpus());
+	}
+
 	target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
 			     GFP_KERNEL);
 	if (!target->ch)
 		goto out;
 
-	node_idx = 0;
-	for_each_online_node(node) {
-		const int ch_start = (node_idx * target->ch_count /
-				      num_online_nodes());
-		const int ch_end = ((node_idx + 1) * target->ch_count /
-				    num_online_nodes());
-		const int cv_start = node_idx * ibdev->num_comp_vectors /
-				     num_online_nodes();
-		const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors /
-				   num_online_nodes();
-		int cpu_idx = 0;
-
-		for_each_online_cpu(cpu) {
-			if (cpu_to_node(cpu) != node)
-				continue;
-			if (ch_start + cpu_idx >= ch_end)
-				continue;
-			ch = &target->ch[ch_start + cpu_idx];
-			ch->target = target;
-			ch->comp_vector = cv_start == cv_end ? cv_start :
-				cv_start + cpu_idx % (cv_end - cv_start);
-			spin_lock_init(&ch->lock);
-			INIT_LIST_HEAD(&ch->free_tx);
-			ret = srp_new_cm_id(ch);
-			if (ret)
-				goto err_disconnect;
-
-			ret = srp_create_ch_ib(ch);
-			if (ret)
-				goto err_disconnect;
-
-			ret = srp_alloc_req_data(ch);
-			if (ret)
-				goto err_disconnect;
-
-			ret = srp_connect_ch(ch, max_iu_len, multich);
-			if (ret) {
-				char dst[64];
-
-				if (target->using_rdma_cm)
-					snprintf(dst, sizeof(dst), "%pIS",
-						 &target->rdma_cm.dst);
-				else
-					snprintf(dst, sizeof(dst), "%pI6",
-						 target->ib_cm.orig_dgid.raw);
-				shost_printk(KERN_ERR, target->scsi_host,
-					     PFX "Connection %d/%d to %s failed\n",
-					     ch_start + cpu_idx,
-					     target->ch_count, dst);
-				if (node_idx == 0 && cpu_idx == 0) {
-					goto free_ch;
-				} else {
-					srp_free_ch_ib(target, ch);
-					srp_free_req_data(target, ch);
-					target->ch_count = ch - target->ch;
-					goto connected;
-				}
+	for (ch_idx = 0; ch_idx < target->ch_count; ++ch_idx) {
+		ch = &target->ch[ch_idx];
+		ch->target = target;
+		ch->comp_vector = ch_idx % ibdev->num_comp_vectors;
+		spin_lock_init(&ch->lock);
+		INIT_LIST_HEAD(&ch->free_tx);
+		ret = srp_new_cm_id(ch);
+		if (ret)
+			goto err_disconnect;
+
+		ret = srp_create_ch_ib(ch);
+		if (ret)
+			goto err_disconnect;
+
+		ret = srp_alloc_req_data(ch);
+		if (ret)
+			goto err_disconnect;
+
+		ret = srp_connect_ch(ch, max_iu_len, multich);
+		if (ret) {
+			char dst[64];
+
+			if (target->using_rdma_cm)
+				snprintf(dst, sizeof(dst), "%pIS",
+					&target->rdma_cm.dst);
+			else
+				snprintf(dst, sizeof(dst), "%pI6",
+					target->ib_cm.orig_dgid.raw);
+			shost_printk(KERN_ERR, target->scsi_host,
+				PFX "Connection %d/%d to %s failed\n",
+				ch_idx,
+				target->ch_count, dst);
+			if (ch_idx == 0) {
+				goto free_ch;
+			} else {
+				srp_free_ch_ib(target, ch);
+				srp_free_req_data(target, ch);
+				target->ch_count = ch - target->ch;
+				goto connected;
 			}
-
-			multich = true;
-			cpu_idx++;
 		}
-		node_idx++;
+		multich = true;
 	}
 
 connected:



  parent reply	other threads:[~2021-03-19 12:21 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-19 12:18 [PATCH 5.11 00/31] 5.11.8-rc1 review Greg Kroah-Hartman
2021-03-19 12:18 ` [PATCH 5.11 01/31] io_uring: dont attempt IO reissue from the ring exit path Greg Kroah-Hartman
2021-03-19 12:18 ` [PATCH 5.11 02/31] KVM: x86/mmu: Expand on the comment in kvm_vcpu_ad_need_write_protect() Greg Kroah-Hartman
2021-03-19 12:18 ` [PATCH 5.11 03/31] KVM: x86/mmu: Set SPTE_AD_WRPROT_ONLY_MASK if and only if PML is enabled Greg Kroah-Hartman
2021-03-19 12:18 ` [PATCH 5.11 04/31] mptcp: send ack for every add_addr Greg Kroah-Hartman
2021-03-19 12:18 ` [PATCH 5.11 05/31] mptcp: pm: add lockdep assertions Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 06/31] mptcp: dispose initial struct socket when its subflow is closed Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 07/31] io_uring: refactor scheduling in io_cqring_wait Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 08/31] io_uring: refactor io_cqring_wait Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 09/31] io_uring: dont keep looping for more events if we cant flush overflow Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 10/31] io_uring: simplify do_read return parsing Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 11/31] io_uring: clear IOCB_WAITQ for non -EIOCBQUEUED return Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 12/31] gpiolib: Read "gpio-line-names" from a firmware node Greg Kroah-Hartman
2021-03-19 12:27   ` Marek Vasut
2021-03-19 12:36     ` Greg Kroah-Hartman
2021-03-19 12:45       ` Marek Vasut
2021-03-19 12:19 ` [PATCH 5.11 13/31] net: bonding: fix error return code of bond_neigh_init() Greg Kroah-Hartman
2021-03-19 14:12   ` Jiri Kosina
2021-03-19 14:24     ` Jiri Kosina
2021-03-19 14:29       ` Greg Kroah-Hartman
2021-03-19 14:25     ` Greg Kroah-Hartman
2021-03-19 15:14       ` Jiri Kosina
2021-03-19 12:19 ` [PATCH 5.11 14/31] regulator: pca9450: Add SD_VSEL GPIO for LDO5 Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 15/31] regulator: pca9450: Enable system reset on WDOG_B assertion Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 16/31] regulator: pca9450: Clear PRESET_EN bit to fix BUCK1/2/3 voltage setting Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 17/31] gfs2: Add common helper for holding and releasing the freeze glock Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 18/31] gfs2: move freeze glock outside the make_fs_rw and _ro functions Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 19/31] gfs2: bypass signal_our_withdraw if no journal Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 20/31] bpf: Prohibit alu ops for pointer types not defining ptr_limit Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 21/31] bpf: Fix off-by-one for area size in creating mask to left Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 22/31] bpf: Simplify alu_limit masking for pointer arithmetic Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 23/31] bpf: Add sanity check for upper ptr_limit Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 24/31] bpf, selftests: Fix up some test_verifier cases for unprivileged Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 25/31] arm64: Unconditionally set virtual cpu id registers Greg Kroah-Hartman
2021-03-19 12:19 ` Greg Kroah-Hartman [this message]
2021-03-19 12:19 ` [PATCH 5.11 27/31] fuse: fix live lock in fuse_iget() Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 28/31] Revert "nfsd4: remove check_conflicting_opens warning" Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 29/31] Revert "nfsd4: a clients own opens neednt prevent delegations" Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 30/31] net: dsa: b53: Support setting learning on port Greg Kroah-Hartman
2021-03-19 12:19 ` [PATCH 5.11 31/31] crypto: x86/aes-ni-xts - use direct calls to and 4-way stride Greg Kroah-Hartman
2021-03-19 19:38 ` [PATCH 5.11 00/31] 5.11.8-rc1 review Naresh Kamboju
2021-03-20  9:52   ` Greg Kroah-Hartman
2021-03-19 21:23 ` Guenter Roeck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210319121748.047018321@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=bvanassche@acm.org \
    --cc=jgg@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=nmoreychaisemartin@suse.com \
    --cc=stable@vger.kernel.org \
    --cc=yi.zhang@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.