From: James Simmons <jsimmons@infradead.org>
To: Andreas Dilger <adilger@whamcloud.com>,
Oleg Drokin <green@whamcloud.com>, NeilBrown <neilb@suse.de>
Cc: Serguei Smirnov <ssmirnov@whamcloud.com>,
Amir Shehata <ashehata@whamcloud.com>,
Lustre Development List <lustre-devel@lists.lustre.org>
Subject: [lustre-devel] [PATCH 10/41] lnet: Select NI/peer NI with highest prio
Date: Sun, 4 Apr 2021 20:50:39 -0400 [thread overview]
Message-ID: <1617583870-32029-11-git-send-email-jsimmons@infradead.org> (raw)
In-Reply-To: <1617583870-32029-1-git-send-email-jsimmons@infradead.org>
From: Amir Shehata <ashehata@whamcloud.com>
Modify the selection algorithm to select the highest priority
local and peer NI. Health always trumps all other selection
criteria
WC-bug-id: https://jira.whamcloud.com/browse/LU-9121
Lustre-commit: 374fcb2caea3ca0 ("LU-9121 lnet: Select NI/peer NI with highest prio")
Signed-off-by: Amir Shehata <ashehata@whamcloud.com>
Signed-off-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/34351
Reviewed-by: Chris Horn <chris.horn@hpe.com>
Signed-off-by: James Simmons <jsimmons@infradead.org>
---
net/lnet/lnet/lib-move.c | 148 ++++++++++++++++++++++++++++++-----------------
1 file changed, 95 insertions(+), 53 deletions(-)
diff --git a/net/lnet/lnet/lib-move.c b/net/lnet/lnet/lib-move.c
index 8763c3f..166ebcc 100644
--- a/net/lnet/lnet/lib-move.c
+++ b/net/lnet/lnet/lib-move.c
@@ -1112,65 +1112,91 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
*/
struct lnet_peer_ni *lpni = NULL;
int best_lpni_credits = (best_lpni) ? best_lpni->lpni_txcredits :
- INT_MIN;
+ INT_MIN;
int best_lpni_healthv = (best_lpni) ?
atomic_read(&best_lpni->lpni_healthv) : 0;
- bool preferred = false;
- bool ni_is_pref;
+ bool best_lpni_is_preferred = false;
+ bool lpni_is_preferred;
int lpni_healthv;
+ u32 lpni_sel_prio;
+ u32 best_sel_prio = LNET_MAX_SELECTION_PRIORITY;
while ((lpni = lnet_get_next_peer_ni_locked(peer, peer_net, lpni))) {
/* if the best_ni we've chosen aleady has this lpni
* preferred, then let's use it
*/
if (best_ni) {
- ni_is_pref = lnet_peer_is_pref_nid_locked(lpni,
- best_ni->ni_nid);
- CDEBUG(D_NET, "%s ni_is_pref = %d\n",
- libcfs_nid2str(best_ni->ni_nid), ni_is_pref);
+ lpni_is_preferred = lnet_peer_is_pref_nid_locked(lpni,
+ best_ni->ni_nid);
+ CDEBUG(D_NET, "%s lpni_is_preferred = %d\n",
+ libcfs_nid2str(best_ni->ni_nid),
+ lpni_is_preferred);
} else {
- ni_is_pref = false;
+ lpni_is_preferred = false;
}
lpni_healthv = atomic_read(&lpni->lpni_healthv);
+ lpni_sel_prio = lpni->lpni_sel_priority;
if (best_lpni)
- CDEBUG(D_NET, "%s c:[%d, %d], s:[%d, %d]\n",
+ CDEBUG(D_NET,
+ "n:[%s, %s] h:[%d, %d] p:[%d, %d] c:[%d, %d] s:[%d, %d]\n",
libcfs_nid2str(lpni->lpni_nid),
+ libcfs_nid2str(best_lpni->lpni_nid),
+ lpni_healthv, best_lpni_healthv,
+ lpni_sel_prio, best_sel_prio,
lpni->lpni_txcredits, best_lpni_credits,
lpni->lpni_seq, best_lpni->lpni_seq);
+ else
+ goto select_lpni;
/* pick the healthiest peer ni */
if (lpni_healthv < best_lpni_healthv) {
continue;
} else if (lpni_healthv > best_lpni_healthv) {
- best_lpni_healthv = lpni_healthv;
+ if (best_lpni_is_preferred)
+ best_lpni_is_preferred = false;
+ goto select_lpni;
+ }
+
+ if (lpni_sel_prio > best_sel_prio) {
+ continue;
+ } else if (lpni_sel_prio < best_sel_prio) {
+ if (best_lpni_is_preferred)
+ best_lpni_is_preferred = false;
+ goto select_lpni;
+ }
+
/* if this is a preferred peer use it */
- } else if (!preferred && ni_is_pref) {
- preferred = true;
- } else if (preferred && !ni_is_pref) {
+ if (!best_lpni_is_preferred && lpni_is_preferred) {
+ best_lpni_is_preferred = true;
+ goto select_lpni;
+ } else if (best_lpni_is_preferred && !lpni_is_preferred) {
/* this is not the preferred peer so let's ignore
* it.
*/
continue;
- } else if (lpni->lpni_txcredits < best_lpni_credits) {
+ }
+
+ if (lpni->lpni_txcredits < best_lpni_credits)
/* We already have a peer that has more credits
* available than this one. No need to consider
* this peer further.
*/
continue;
- } else if (lpni->lpni_txcredits == best_lpni_credits) {
- /* The best peer found so far and the current peer
- * have the same number of available credits let's
- * make sure to select between them using Round
- * Robin
- */
- if (best_lpni) {
- if (best_lpni->lpni_seq <= lpni->lpni_seq)
- continue;
- }
- }
+ else if (lpni->lpni_txcredits > best_lpni_credits)
+ goto select_lpni;
+ /* The best peer found so far and the current peer
+ * have the same number of available credits let's
+ * make sure to select between them using Round Robin
+ */
+ if (best_lpni && best_lpni->lpni_seq <= lpni->lpni_seq)
+ continue;
+select_lpni:
+ best_lpni_is_preferred = lpni_is_preferred;
+ best_lpni_healthv = lpni_healthv;
+ best_sel_prio = lpni_sel_prio;
best_lpni = lpni;
best_lpni_credits = lpni->lpni_txcredits;
}
@@ -1178,7 +1204,7 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
/* if we still can't find a peer ni then we can't reach it */
if (!best_lpni) {
u32 net_id = (peer_net) ? peer_net->lpn_net_id :
- LNET_NIDNET(dst_nid);
+ LNET_NIDNET(dst_nid);
CDEBUG(D_NET, "no peer_ni found on peer net %s\n",
libcfs_net2str(net_id));
return NULL;
@@ -1396,6 +1422,7 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
unsigned int shortest_distance;
int best_credits;
int best_healthv;
+ u32 best_sel_prio;
/* If there is no peer_ni that we can send to on this network,
* then there is no point in looking for a new best_ni here.
@@ -1404,6 +1431,7 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
return best_ni;
if (!best_ni) {
+ best_sel_prio = LNET_MAX_SELECTION_PRIORITY;
shortest_distance = UINT_MAX;
best_credits = INT_MIN;
best_healthv = 0;
@@ -1412,6 +1440,7 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
best_ni->ni_dev_cpt);
best_credits = atomic_read(&best_ni->ni_tx_credits);
best_healthv = atomic_read(&best_ni->ni_healthv);
+ best_sel_prio = best_ni->ni_sel_priority;
}
while ((ni = lnet_get_next_ni_locked(local_net, ni))) {
@@ -1419,10 +1448,12 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
int ni_credits;
int ni_healthv;
int ni_fatal;
+ u32 ni_sel_prio;
ni_credits = atomic_read(&ni->ni_tx_credits);
ni_healthv = atomic_read(&ni->ni_healthv);
ni_fatal = atomic_read(&ni->ni_fatal_error_on);
+ ni_sel_prio = ni->ni_sel_priority;
/*
* calculate the distance from the CPT on which
@@ -1433,13 +1464,6 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
md_cpt,
ni->ni_dev_cpt);
- CDEBUG(D_NET,
- "compare ni %s [c:%d, d:%d, s:%d] with best_ni %s [c:%d, d:%d, s:%d]\n",
- libcfs_nid2str(ni->ni_nid), ni_credits, distance,
- ni->ni_seq, (best_ni) ? libcfs_nid2str(best_ni->ni_nid)
- : "not seleced", best_credits, shortest_distance,
- (best_ni) ? best_ni->ni_seq : 0);
-
/*
* All distances smaller than the NUMA range
* are treated equally.
@@ -1451,30 +1475,48 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
* Select on health, shorter distance, available
* credits, then round-robin.
*/
- if (ni_fatal) {
+ if (ni_fatal)
continue;
- } else if (ni_healthv < best_healthv) {
+
+ if (best_ni)
+ CDEBUG(D_NET,
+ "compare ni %s [c:%d, d:%d, s:%d, p:%u] with best_ni %s [c:%d, d:%d, s:%d, p:%u]\n",
+ libcfs_nid2str(ni->ni_nid), ni_credits, distance,
+ ni->ni_seq, ni_sel_prio,
+ (best_ni) ? libcfs_nid2str(best_ni->ni_nid)
+ : "not selected", best_credits, shortest_distance,
+ (best_ni) ? best_ni->ni_seq : 0,
+ best_sel_prio);
+ else
+ goto select_ni;
+
+ if (ni_healthv < best_healthv)
continue;
- } else if (ni_healthv > best_healthv) {
- best_healthv = ni_healthv;
- /* If we're going to prefer this ni because it's
- * the healthiest, then we should set the
- * shortest_distance in the algorithm in case
- * there are multiple NIs with the same health but
- * different distances.
- */
- if (distance < shortest_distance)
- shortest_distance = distance;
- } else if (distance > shortest_distance) {
+ else if (ni_healthv > best_healthv)
+ goto select_ni;
+
+ if (ni_sel_prio > best_sel_prio)
continue;
- } else if (distance < shortest_distance) {
- shortest_distance = distance;
- } else if (ni_credits < best_credits) {
+ else if (ni_sel_prio < best_sel_prio)
+ goto select_ni;
+
+ if (distance > shortest_distance)
continue;
- } else if (ni_credits == best_credits) {
- if (best_ni && best_ni->ni_seq <= ni->ni_seq)
- continue;
- }
+ else if (distance < shortest_distance)
+ goto select_ni;
+
+ if (ni_credits < best_credits)
+ continue;
+ else if (ni_credits > best_credits)
+ goto select_ni;
+
+ if (best_ni && best_ni->ni_seq <= ni->ni_seq)
+ continue;
+
+select_ni:
+ best_sel_prio = ni_sel_prio;
+ shortest_distance = distance;
+ best_healthv = ni_healthv;
best_ni = ni;
best_credits = ni_credits;
}
--
1.8.3.1
_______________________________________________
lustre-devel mailing list
lustre-devel@lists.lustre.org
http://lists.lustre.org/listinfo.cgi/lustre-devel-lustre.org
next prev parent reply other threads:[~2021-04-05 0:51 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-04-05 0:50 [lustre-devel] [PATCH 00/41] lustre: sync to OpenSFS branch as of March 1 James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 01/41] lustre: llite: data corruption due to RPC reordering James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 02/41] lustre: llite: make readahead aware of hints James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 03/41] lustre: lov: avoid NULL dereference in cleanup James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 04/41] lustre: llite: quiet spurious ioctl warning James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 05/41] lustre: ptlrpc: do not output error when imp_sec is freed James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 06/41] lustre: update version to 2.14.0 James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 07/41] lnet: UDSP storage and marshalled structs James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 08/41] lnet: foundation patch for selection mod James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 09/41] lnet: Preferred gateway selection James Simmons
2021-04-05 0:50 ` James Simmons [this message]
2021-04-05 0:50 ` [lustre-devel] [PATCH 11/41] lnet: select best peer and local net James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 12/41] lnet: UDSP handling James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 13/41] lnet: Apply UDSP on local and remote NIs James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 14/41] lnet: Add the kernel level Marshalling API James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 15/41] lnet: Add the kernel level De-Marshalling API James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 16/41] lnet: Add the ioctl handler for "add policy" James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 17/41] lnet: ioctl handler for "delete policy" James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 18/41] lnet: ioctl handler for get policy info James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 19/41] lustre: update version to 2.14.50 James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 20/41] lustre: gss: handle empty reqmsg in sptlrpc_req_ctx_switch James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 21/41] lustre: sec: file ioctls to handle encryption policies James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 22/41] lustre: obdclass: try to skip corrupted llog records James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 23/41] lustre: lov: fix layout generation inc for mirror split James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 24/41] lnet: modify assertion in lnet_post_send_locked James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 25/41] lustre: lov: fixes bitfield in lod qos code James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 26/41] lustre: lov: grant deadlock if same OSC in two components James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 27/41] lustre: change EWOULDBLOCK to EAGAIN James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 28/41] lsutre: ldlm: return error from ldlm_namespace_new() James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 29/41] lustre: llite: remove unused ll_teardown_mmaps() James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 30/41] lustre: lov: style cleanups in lov_set_osc_active() James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 31/41] lustre: change various operations structs to const James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 32/41] lustre: mark strings in char arrays as const James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 33/41] lustre: convert snprintf to scnprintf as appropriate James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 34/41] lustre: remove non-static 'inline' markings James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 35/41] lustre: llite: use is_root_inode() James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 36/41] lnet: libcfs: discard cfs_firststr James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 37/41] lnet: place wire protocol data int own headers James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 38/41] lnet: libcfs: use wait_event_timeout() in tracefiled() James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 39/41] lnet: use init_wait() rather than init_waitqueue_entry() James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 40/41] lnet: discard LNET_MD_PHYS James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 41/41] lnet: o2iblnd: convert peers hash table to hashtable.h James Simmons
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1617583870-32029-11-git-send-email-jsimmons@infradead.org \
--to=jsimmons@infradead.org \
--cc=adilger@whamcloud.com \
--cc=ashehata@whamcloud.com \
--cc=green@whamcloud.com \
--cc=lustre-devel@lists.lustre.org \
--cc=neilb@suse.de \
--cc=ssmirnov@whamcloud.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).