lustre-devel-lustre.org archive mirror
 help / color / mirror / Atom feed
From: James Simmons <jsimmons@infradead.org>
To: Andreas Dilger <adilger@whamcloud.com>,
	Oleg Drokin <green@whamcloud.com>, NeilBrown <neilb@suse.de>
Cc: Serguei Smirnov <ssmirnov@whamcloud.com>,
	Lustre Development List <lustre-devel@lists.lustre.org>
Subject: [lustre-devel] [PATCH 10/27] lnet: use discovered ni status to set initial health
Date: Mon, 17 Apr 2023 09:47:06 -0400	[thread overview]
Message-ID: <1681739243-29375-11-git-send-email-jsimmons@infradead.org> (raw)
In-Reply-To: <1681739243-29375-1-git-send-email-jsimmons@infradead.org>

From: Serguei Smirnov <ssmirnov@whamcloud.com>

If not routing, track local NI status in the ping buffer
such that locally recognized "down" state, for example,
due to a downed network interface/link, is available
to any discovering peer.
If NI 'fatal' status is changed, push update to peers.

On the active side of discovery, check peer NI status so if NI
is down, decrement its health score and queue for recovery.

WC-bug-id: https://jira.whamcloud.com/browse/LU-16563
Lustre-commit: da230373bd14306cb ("LU-16563 lnet: use discovered ni status to set initial health")
Signed-off-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50027
Reviewed-by: Chris Horn <chris.horn@hpe.com>
Reviewed-by: Cyril Bordage <cbordage@whamcloud.com>
Reviewed-by: Frank Sehr <fsehr@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Signed-off-by: James Simmons <jsimmons@infradead.org>
---
 include/linux/lnet/lib-lnet.h    |  3 ++-
 net/lnet/klnds/o2iblnd/o2iblnd.c | 51 ++++++++++++++++++++++++++++++----------
 net/lnet/klnds/socklnd/socklnd.c | 38 +++++++++++++++++++++++-------
 net/lnet/lnet/api-ni.c           | 20 ++++++++++++++++
 net/lnet/lnet/peer.c             | 14 +++++++++++
 5 files changed, 104 insertions(+), 22 deletions(-)

diff --git a/include/linux/lnet/lib-lnet.h b/include/linux/lnet/lib-lnet.h
index e26e150..f9f4815 100644
--- a/include/linux/lnet/lib-lnet.h
+++ b/include/linux/lnet/lib-lnet.h
@@ -127,7 +127,7 @@
 		return LNET_NI_STATUS_UP;
 	else if (atomic_read(&ni->ni_fatal_error_on))
 		return LNET_NI_STATUS_DOWN;
-	else if (ni->ni_status)
+	else if (the_lnet.ln_routing && ni->ni_status)
 		return *ni->ni_status;
 	else
 		return LNET_NI_STATUS_UP;
@@ -1216,4 +1216,5 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
 		       old ? "up" : "down",
 		       alive ? "up" : "down");
 }
+void lnet_update_ping_buffer(void);
 #endif
diff --git a/net/lnet/klnds/o2iblnd/o2iblnd.c b/net/lnet/klnds/o2iblnd/o2iblnd.c
index a7a3c79..fc59f88 100644
--- a/net/lnet/klnds/o2iblnd/o2iblnd.c
+++ b/net/lnet/klnds/o2iblnd/o2iblnd.c
@@ -2382,15 +2382,23 @@ static int kiblnd_port_get_attr(struct kib_hca_dev *hdev)
 static inline void
 kiblnd_set_ni_fatal_on(struct kib_hca_dev *hdev, int val)
 {
-	struct kib_net  *net;
+	struct kib_net *net;
+	u32 ni_state_before;
+	bool update_ping_buf = false;
 
 	/* for health check */
 	list_for_each_entry(net, &hdev->ibh_dev->ibd_nets, ibn_list) {
 		if (val)
 			CDEBUG(D_NETERROR, "Fatal device error for NI %s\n",
 			       libcfs_nidstr(&net->ibn_ni->ni_nid));
-		atomic_set(&net->ibn_ni->ni_fatal_error_on, val);
+		ni_state_before = atomic_xchg(&net->ibn_ni->ni_fatal_error_on,
+					      val);
+		if (!update_ping_buf && val != ni_state_before)
+			update_ping_buf = true;
 	}
+
+	if (update_ping_buf)
+		lnet_update_ping_buffer();
 }
 
 void
@@ -2748,6 +2756,8 @@ void kiblnd_destroy_dev(struct kib_dev *dev)
 	bool link_down = !(operstate == IF_OPER_UP);
 	struct in_device *in_dev;
 	bool found_ip = false;
+	u32 ni_state_before;
+	bool update_ping_buf = false;
 	const struct in_ifaddr *ifa;
 
 	event_kibdev = kiblnd_dev_search(dev->name);
@@ -2757,7 +2767,6 @@ void kiblnd_destroy_dev(struct kib_dev *dev)
 
 	list_for_each_entry_safe(net, cnxt, &event_kibdev->ibd_nets, ibn_list) {
 		found_ip = false;
-
 		ni = net->ibn_ni;
 
 		in_dev = __in_dev_get_rtnl(dev);
@@ -2766,8 +2775,9 @@ void kiblnd_destroy_dev(struct kib_dev *dev)
 			       dev->name);
 			CDEBUG(D_NET, "%s: set link fatal state to 1\n",
 			       libcfs_nidstr(&net->ibn_ni->ni_nid));
-			atomic_set(&ni->ni_fatal_error_on, 1);
-			continue;
+			ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
+						      1);
+			goto ni_done;
 		}
 		in_dev_for_each_ifa_rtnl(ifa, in_dev) {
 			if (htonl(event_kibdev->ibd_ifip) == ifa->ifa_local)
@@ -2779,22 +2789,31 @@ void kiblnd_destroy_dev(struct kib_dev *dev)
 			       dev->name);
 			CDEBUG(D_NET, "%s: set link fatal state to 1\n",
 			       libcfs_nidstr(&net->ibn_ni->ni_nid));
-			atomic_set(&ni->ni_fatal_error_on, 1);
-			continue;
+			ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
+						      1);
+			goto ni_done;
 		}
 
 		if (link_down) {
 			CDEBUG(D_NET, "%s: set link fatal state to 1\n",
 			       libcfs_nidstr(&net->ibn_ni->ni_nid));
-			atomic_set(&ni->ni_fatal_error_on, link_down);
+			ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
+						      link_down);
 		} else {
 			CDEBUG(D_NET, "%s: set link fatal state to %u\n",
 			       libcfs_nidstr(&net->ibn_ni->ni_nid),
 			       (kiblnd_get_link_status(dev) == 0));
-			atomic_set(&ni->ni_fatal_error_on,
-				   (kiblnd_get_link_status(dev) == 0));
+			ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
+						      (kiblnd_get_link_status(dev) == 0));
 		}
+ni_done:
+		if (!update_ping_buf &&
+		    (atomic_read(&ni->ni_fatal_error_on) != ni_state_before))
+			update_ping_buf = true;
 	}
+
+	if (update_ping_buf)
+		lnet_update_ping_buffer();
 out:
 	return 0;
 }
@@ -2806,6 +2825,8 @@ void kiblnd_destroy_dev(struct kib_dev *dev)
 	struct kib_net *net;
 	struct kib_net *cnxt;
 	struct net_device *event_netdev = ifa->ifa_dev->dev;
+	u32 ni_state_before;
+	bool update_ping_buf = false;
 
 	event_kibdev = kiblnd_dev_search(event_netdev->name);
 
@@ -2820,9 +2841,15 @@ void kiblnd_destroy_dev(struct kib_dev *dev)
 		CDEBUG(D_NET, "%s: set link fatal state to %u\n",
 		       libcfs_nidstr(&net->ibn_ni->ni_nid),
 		       (event == NETDEV_DOWN));
-		atomic_set(&net->ibn_ni->ni_fatal_error_on,
-			   (event == NETDEV_DOWN));
+		ni_state_before = atomic_xchg(&net->ibn_ni->ni_fatal_error_on,
+					      (event == NETDEV_DOWN));
+		if (!update_ping_buf &&
+		    ((event == NETDEV_DOWN) != ni_state_before))
+			update_ping_buf = true;
 	}
+
+	if (update_ping_buf)
+		lnet_update_ping_buffer();
 out:
 	return 0;
 }
diff --git a/net/lnet/klnds/socklnd/socklnd.c b/net/lnet/klnds/socklnd/socklnd.c
index b8d6e28..435762f 100644
--- a/net/lnet/klnds/socklnd/socklnd.c
+++ b/net/lnet/klnds/socklnd/socklnd.c
@@ -2000,6 +2000,8 @@ static int ksocknal_get_link_status(struct net_device *dev)
 	bool found_ip = false;
 	struct ksock_interface *ksi = NULL;
 	struct sockaddr_in *sa;
+	u32 ni_state_before;
+	bool update_ping_buf = false;
 	const struct in_ifaddr *ifa;
 
 	ifindex = dev->ifindex;
@@ -2045,8 +2047,9 @@ static int ksocknal_get_link_status(struct net_device *dev)
 			CDEBUG(D_NET, "Interface %s has no IPv4 status.\n",
 			       dev->name);
 			CDEBUG(D_NET, "set link fatal state to 1\n");
-			atomic_set(&ni->ni_fatal_error_on, 1);
-			continue;
+			ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
+						      1);
+			goto ni_done;
 		}
 		in_dev_for_each_ifa_rtnl(ifa, in_dev) {
 			if (sa->sin_addr.s_addr == ifa->ifa_local)
@@ -2057,20 +2060,29 @@ static int ksocknal_get_link_status(struct net_device *dev)
 			CDEBUG(D_NET, "Interface %s has no matching ip\n",
 			       dev->name);
 			CDEBUG(D_NET, "set link fatal state to 1\n");
-			atomic_set(&ni->ni_fatal_error_on, 1);
-			continue;
+			ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
+						      1);
+			goto ni_done;
 		}
 
 		if (link_down) {
 			CDEBUG(D_NET, "set link fatal state to 1\n");
-			atomic_set(&ni->ni_fatal_error_on, link_down);
+			ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
+						      1);
 		} else {
 			CDEBUG(D_NET, "set link fatal state to %u\n",
 			       (ksocknal_get_link_status(dev) == 0));
-			atomic_set(&ni->ni_fatal_error_on,
-				   (ksocknal_get_link_status(dev) == 0));
+			ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
+						      (ksocknal_get_link_status(dev) == 0));
 		}
+ni_done:
+		if (!update_ping_buf &&
+		    (atomic_read(&ni->ni_fatal_error_on) != ni_state_before))
+			update_ping_buf = true;
 	}
+
+	if (update_ping_buf)
+		lnet_update_ping_buffer();
 out:
 	return 0;
 }
@@ -2086,6 +2098,8 @@ static int ksocknal_get_link_status(struct net_device *dev)
 	int ifindex;
 	struct ksock_interface *ksi = NULL;
 	struct sockaddr_in *sa;
+	u32 ni_state_before;
+	bool update_ping_buf = false;
 
 	if (!ksocknal_data.ksnd_nnets)
 		goto out;
@@ -2106,10 +2120,16 @@ static int ksocknal_get_link_status(struct net_device *dev)
 			CDEBUG(D_NET, "set link fatal state to %u\n",
 			       (event == NETDEV_DOWN));
 			ni = net->ksnn_ni;
-			atomic_set(&ni->ni_fatal_error_on,
-				   (event == NETDEV_DOWN));
+			ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
+						      (event == NETDEV_DOWN));
+			if (!update_ping_buf &&
+			    ((event == NETDEV_DOWN) != ni_state_before))
+				update_ping_buf = true;
 		}
 	}
+
+	if (update_ping_buf)
+		lnet_update_ping_buffer();
 out:
 	return 0;
 }
diff --git a/net/lnet/lnet/api-ni.c b/net/lnet/lnet/api-ni.c
index 8b0ab53..9f01dbe 100644
--- a/net/lnet/lnet/api-ni.c
+++ b/net/lnet/lnet/api-ni.c
@@ -3841,6 +3841,26 @@ int lnet_dyn_del_ni(struct lnet_nid *nid)
 	return rc;
 }
 
+void lnet_update_ping_buffer(void)
+{
+	struct lnet_ping_buffer *pbuf;
+	struct lnet_handle_md ping_mdh;
+
+	if (the_lnet.ln_routing)
+		return;
+
+	mutex_lock(&the_lnet.ln_api_mutex);
+
+	if (!lnet_ping_target_setup(&pbuf, &ping_mdh,
+				    LNET_PING_INFO_HDR_SIZE +
+				    lnet_get_ni_bytes(),
+				    false))
+		lnet_ping_target_update(pbuf, ping_mdh);
+
+	mutex_unlock(&the_lnet.ln_api_mutex);
+}
+EXPORT_SYMBOL(lnet_update_ping_buffer);
+
 void lnet_incr_dlc_seq(void)
 {
 	atomic_inc(&lnet_dlc_seq_no);
diff --git a/net/lnet/lnet/peer.c b/net/lnet/lnet/peer.c
index 619973b..ef924ce 100644
--- a/net/lnet/lnet/peer.c
+++ b/net/lnet/lnet/peer.c
@@ -3079,6 +3079,15 @@ int ping_info_count_entries(struct lnet_ping_buffer *pbuf)
 	return nnis;
 }
 
+static inline void handle_disc_lpni_health(struct lnet_peer_ni *lpni)
+{
+	if (lpni->lpni_ns_status == LNET_NI_STATUS_DOWN)
+		lnet_handle_remote_failure_locked(lpni);
+	else if (lpni->lpni_ns_status == LNET_NI_STATUS_UP &&
+		 !lpni->lpni_last_alive)
+		atomic_set(&lpni->lpni_healthv, LNET_MAX_HEALTH_VALUE);
+}
+
 /*
  * Build a peer from incoming data.
  *
@@ -3118,6 +3127,7 @@ static int lnet_peer_merge_data(struct lnet_peer *lp,
 	int i;
 	int j;
 	int rc;
+	u32 old_st;
 
 	flags = LNET_PEER_DISCOVERED;
 	if (pbuf->pb_info.pi_features & LNET_PING_FEAT_MULTI_RAIL)
@@ -3194,7 +3204,10 @@ static int lnet_peer_merge_data(struct lnet_peer *lp,
 				 */
 				lpni = lnet_peer_ni_find_locked(&curnis[i]);
 				if (lpni) {
+					old_st = lpni->lpni_ns_status;
 					lpni->lpni_ns_status = *stp;
+					if (old_st != lpni->lpni_ns_status)
+						handle_disc_lpni_health(lpni);
 					lnet_peer_ni_decref_locked(lpni);
 				}
 				break;
@@ -3224,6 +3237,7 @@ static int lnet_peer_merge_data(struct lnet_peer *lp,
 		lpni = lnet_peer_ni_find_locked(&addnis[i].ns_nid);
 		if (lpni) {
 			lpni->lpni_ns_status = addnis[i].ns_status;
+			handle_disc_lpni_health(lpni);
 			lnet_peer_ni_decref_locked(lpni);
 		}
 	}
-- 
1.8.3.1

_______________________________________________
lustre-devel mailing list
lustre-devel@lists.lustre.org
http://lists.lustre.org/listinfo.cgi/lustre-devel-lustre.org

  parent reply	other threads:[~2023-04-17 13:59 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-17 13:46 [lustre-devel] [PATCH 00/27] lustre: sync to OpenSFS branch April 17, 2023 James Simmons
2023-04-17 13:46 ` [lustre-devel] [PATCH 01/27] lustre: llite: fix the wrong beyond read end calculation James Simmons
2023-04-17 13:46 ` [lustre-devel] [PATCH 02/27] lustre: lov: continue fsync on other OST objs even on -ENOENT James Simmons
2023-04-17 13:46 ` [lustre-devel] [PATCH 03/27] lustre: llite: protect cp_state with vmpage lock James Simmons
2023-04-17 13:47 ` [lustre-devel] [PATCH 04/27] lustre: llite: restart clio for AIO if necessary James Simmons
2023-04-17 13:47 ` [lustre-devel] [PATCH 05/27] lustre: protocol: add OBD_BRW_COMPRESSED James Simmons
2023-04-17 13:47 ` [lustre-devel] [PATCH 06/27] lustre: llite: call truncate_inode_pages() under inode lock James Simmons
2023-04-17 13:47 ` [lustre-devel] [PATCH 07/27] lustre: fid: reduce LUSTRE_DATA_SEQ_MAX_WIDTH James Simmons
2023-04-17 13:47 ` [lustre-devel] [PATCH 08/27] lnet: handle multi-rail setups James Simmons
2023-04-17 13:47 ` [lustre-devel] [PATCH 09/27] lustre: readahead: clip readahead with kms James Simmons
2023-04-17 13:47 ` James Simmons [this message]
2023-04-17 13:47 ` [lustre-devel] [PATCH 11/27] lnet: add 'lock_prim_nid" lnet module parameter James Simmons
2023-04-17 13:47 ` [lustre-devel] [PATCH 12/27] lustre: obdclass: fix rpc slot leakage James Simmons
2023-04-17 13:47 ` [lustre-devel] [PATCH 13/27] lnet: libcfs: cleanup console messages James Simmons
2023-04-17 13:47 ` [lustre-devel] [PATCH 14/27] lustre: ldlm: clear lock converting flag on resource cleanup James Simmons
2023-04-17 13:47 ` [lustre-devel] [PATCH 15/27] lustre: statahead: statahead thread doesn't stop James Simmons
2023-04-17 13:47 ` [lustre-devel] [PATCH 16/27] lustre: uapi: fix unused function errors James Simmons
2023-04-17 13:47 ` [lustre-devel] [PATCH 17/27] lnet: Health logging improvements James Simmons
2023-04-17 13:47 ` [lustre-devel] [PATCH 18/27] lustre: update version to 2.15.54 James Simmons
2023-04-17 13:47 ` [lustre-devel] [PATCH 19/27] lustre: misc: remove unnecessary ioctl typecasts James Simmons
2023-04-17 13:47 ` [lustre-devel] [PATCH 20/27] lustre: llite: move common ioctl code to ll_iocontrol() James Simmons
2023-04-17 13:47 ` [lustre-devel] [PATCH 21/27] lnet: change LNetAddPeer() to take struct lnet_nid James Simmons
2023-04-17 13:47 ` [lustre-devel] [PATCH 22/27] lustre: obdclass: change class_add/check_uuid to large nid James Simmons
2023-04-17 13:47 ` [lustre-devel] [PATCH 23/27] lustre: obdclass: rename class_parse_nid to class_parse_nid4 James Simmons
2023-04-17 13:47 ` [lustre-devel] [PATCH 24/27] lustre: llite: only first sync to MDS matter James Simmons
2023-04-17 13:47 ` [lustre-devel] [PATCH 25/27] lustre: statahead: batched statahead processing James Simmons
2023-04-17 13:47 ` [lustre-devel] [PATCH 26/27] lustre: llite: fix LSOM blocks for ftruncate and close James Simmons
2023-04-17 13:47 ` [lustre-devel] [PATCH 27/27] lnet: fix clang build errors James Simmons

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1681739243-29375-11-git-send-email-jsimmons@infradead.org \
    --to=jsimmons@infradead.org \
    --cc=adilger@whamcloud.com \
    --cc=green@whamcloud.com \
    --cc=lustre-devel@lists.lustre.org \
    --cc=neilb@suse.de \
    --cc=ssmirnov@whamcloud.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).