All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 10/20] multipathd: adjust when mpp is synced with the kernel
  2024-07-19 19:40 [PATCH v3 00/20] path checker refactor and misc fixes Benjamin Marzinski
@ 2024-07-19 19:40 ` Benjamin Marzinski
  0 siblings, 0 replies; 6+ messages in thread
From: Benjamin Marzinski @ 2024-07-19 19:40 UTC (permalink / raw)
  To: Christophe Varoqui; +Cc: device-mapper development, Martin Wilck

Move the code to sync the mpp device state into a helper function and
add a counter to to make sure that the device is synced at least once
every max_checkint secs. This makes sure that multipath devices with no
paths will still get synced with the kernel.  Also, if multiple paths
are checked in the same loop, the multipath device will only be synced
with the kernel once, since every time the mpp is synced in any code
path, mpp->sync_tick is reset.

The code still syncs the mpp before updating the path state for two
main reasons.

1. Sometimes multipathd leaves the mpp with a garbage state. Future
   patches will fix most of these cases, but the code intentially
   does not remove the mpp is resyncing fails while checking paths.
   But this does leave the mpp with a garbage state.

2. The kernel chages the multipath state independently of multipathd. If
   the kernel fails a path, a uevent will arrive shortly. But the kernel
   doesn't provide any notification when it switches the active
   path group or if it ends up picking a different one than multipathd
   selected. Multipathd needs to know the actual current pathgroup to
   know when it should be switching them.

Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
---
 libmultipath/configure.c   |  1 +
 libmultipath/structs.h     |  2 ++
 libmultipath/structs_vec.c |  5 +++
 multipathd/main.c          | 64 +++++++++++++++++++++++++-------------
 4 files changed, 50 insertions(+), 22 deletions(-)

diff --git a/libmultipath/configure.c b/libmultipath/configure.c
index b4de863c..34158e31 100644
--- a/libmultipath/configure.c
+++ b/libmultipath/configure.c
@@ -358,6 +358,7 @@ int setup_map(struct multipath *mpp, char **params, struct vectors *vecs)
 
 	sysfs_set_scsi_tmo(conf, mpp);
 	marginal_pathgroups = conf->marginal_pathgroups;
+	mpp->sync_tick = conf->max_checkint;
 	pthread_cleanup_pop(1);
 
 	if (!mpp->features || !mpp->hwhandler || !mpp->selector) {
diff --git a/libmultipath/structs.h b/libmultipath/structs.h
index 3b91e39c..002eeae1 100644
--- a/libmultipath/structs.h
+++ b/libmultipath/structs.h
@@ -453,6 +453,8 @@ struct multipath {
 	int ghost_delay;
 	int ghost_delay_tick;
 	int queue_mode;
+	unsigned int sync_tick;
+	bool is_checked;
 	uid_t uid;
 	gid_t gid;
 	mode_t mode;
diff --git a/libmultipath/structs_vec.c b/libmultipath/structs_vec.c
index d58ef5a7..7f267ba0 100644
--- a/libmultipath/structs_vec.c
+++ b/libmultipath/structs_vec.c
@@ -505,11 +505,16 @@ update_multipath_table (struct multipath *mpp, vector pathvec, int flags)
 	char __attribute__((cleanup(cleanup_charp))) *params = NULL;
 	char __attribute__((cleanup(cleanup_charp))) *status = NULL;
 	unsigned long long size;
+	struct config *conf;
 
 	if (!mpp)
 		return r;
 
 	size = mpp->size;
+	conf = get_multipath_config();
+	mpp->sync_tick = conf->max_checkint;
+	put_multipath_config(conf);
+
 	r = libmp_mapinfo(DM_MAP_BY_NAME | MAPINFO_MPATH_ONLY,
 			  (mapid_t) { .str = mpp->alias },
 			  (mapinfo_t) {
diff --git a/multipathd/main.c b/multipathd/main.c
index 86f7ab1f..ae40f599 100644
--- a/multipathd/main.c
+++ b/multipathd/main.c
@@ -2342,6 +2342,37 @@ check_path_state(struct path *pp)
 	return newstate;
 }
 
+static void
+do_sync_mpp(struct vectors * vecs, struct multipath *mpp)
+{
+	int i, ret;
+	struct path *pp;
+
+	mpp->is_checked = true;
+	ret = update_multipath_strings(mpp, vecs->pathvec);
+	if (ret != DMP_OK) {
+		condlog(1, "%s: %s", mpp->alias, ret == DMP_NOT_FOUND ?
+			"device not found" :
+			"couldn't synchronize with kernel state");
+		vector_foreach_slot (mpp->paths, pp, i)
+			pp->dmstate = PSTATE_UNDEF;
+		return;
+	}
+	set_no_path_retry(mpp);
+}
+
+static void
+sync_mpp(struct vectors * vecs, struct multipath *mpp, unsigned int ticks)
+{
+	if (mpp->sync_tick)
+		mpp->sync_tick -= (mpp->sync_tick > ticks) ? ticks :
+				  mpp->sync_tick;
+	if (mpp->sync_tick)
+		return;
+
+	do_sync_mpp(vecs, mpp);
+}
+
 /*
  * Returns '1' if the path has been checked and '0' otherwise
  */
@@ -2356,7 +2387,6 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
 	unsigned int checkint, max_checkint;
 	struct config *conf;
 	int marginal_pathgroups, marginal_changed = 0;
-	int ret;
 	bool need_reload;
 
 	if (pp->initialized == INIT_REMOVED)
@@ -2395,26 +2425,6 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
 		pp->tick = 1;
 		return 0;
 	}
-	/*
-	 * Synchronize with kernel state
-	 */
-	ret = update_multipath_strings(pp->mpp, vecs->pathvec);
-	if (ret != DMP_OK) {
-		if (ret == DMP_NOT_FOUND) {
-			/* multipath device missing. Likely removed */
-			condlog(1, "%s: multipath device '%s' not found",
-				pp->dev, pp->mpp ? pp->mpp->alias : "");
-			return 0;
-		} else
-			condlog(1, "%s: Couldn't synchronize with kernel state",
-				pp->dev);
-		pp->dmstate = PSTATE_UNDEF;
-	}
-	/* if update_multipath_strings orphaned the path, quit early */
-	if (!pp->mpp)
-		return 0;
-	set_no_path_retry(pp->mpp);
-
 	if (pp->recheck_wwid == RECHECK_WWID_ON &&
 	    (newstate == PATH_UP || newstate == PATH_GHOST) &&
 	    ((pp->state != PATH_UP && pp->state != PATH_GHOST) ||
@@ -2424,7 +2434,12 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
 		handle_path_wwid_change(pp, vecs);
 		return 0;
 	}
-
+	if (!pp->mpp->is_checked) {
+		do_sync_mpp(vecs, pp->mpp);
+		/* if update_multipath_strings orphaned the path, quit early */
+		if (!pp->mpp)
+			return 0;
+	}
 	if ((newstate != PATH_UP && newstate != PATH_GHOST &&
 	     newstate != PATH_PENDING) && (pp->state == PATH_DELAYED)) {
 		/* If path state become failed again cancel path delay state */
@@ -2752,12 +2767,17 @@ checkerloop (void *ap)
 		while (checker_state != CHECKER_FINISHED) {
 			unsigned int paths_checked = 0, i;
 			struct timespec chk_start_time;
+			struct multipath *mpp;
 
 			pthread_cleanup_push(cleanup_lock, &vecs->lock);
 			lock(&vecs->lock);
 			pthread_testcancel();
+			vector_foreach_slot(vecs->mpvec, mpp, i)
+				mpp->is_checked = false;
 			get_monotonic_time(&chk_start_time);
 			if (checker_state == CHECKER_STARTING) {
+				vector_foreach_slot(vecs->mpvec, mpp, i)
+					sync_mpp(vecs, mpp, ticks);
 				vector_foreach_slot(vecs->pathvec, pp, i)
 					pp->is_checked = false;
 				checker_state = CHECKER_RUNNING;
-- 
2.45.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v3 00/20] path checker refactor and misc fixes
@ 2024-07-22 23:23 Benjamin Marzinski
  2024-07-22 23:23 ` [PATCH v3 10/20] multipathd: adjust when mpp is synced with the kernel Benjamin Marzinski
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Benjamin Marzinski @ 2024-07-22 23:23 UTC (permalink / raw)
  To: Christophe Varoqui; +Cc: device-mapper development, Martin Wilck

his patchset is based on discussions I had with Martin Wilck about my
last, partially applied patchset. It's based on top of his
mwilck/coverity branch, 27053732 fixup '"libmultipath: use
libmp_pathinfo() in update_multipath_table()"'.

The first two patches are reposts of patches from my earlier patchset,
redone to work with the new libmp_mapinfo() API. They make it possible
to add maps by WWID with "multipathd add map".

The rest of the patches are work and bugfixes related to refactoring
checkerloop so that

1. multipath devices get resynced with the kernel occasionally, even if
they have not paths

2. If multiple paths from a multipath device are checked in the same
loop, the multipath device will only get resynced once.

3. All the paths of a multipath device will converge to being checked at
the same time (at least as much as their differing checkint values will
allow).

4. The times for checking the paths of each multipath device will spread
out as much as possible so multipathd doesn't do all of it's checking in
a burst.

5. path checking is done my multipath device (for initialized paths,
the uninitialized paths are handled after all the adopted paths are
handled).

Items 1 & 2 are handled by patch 10 and preceding patches.

Items 3 & 4 and handled by patch 17 and preceding patches.

Item 5 is handled by patch 18 and 19.

Changes in v3 (only posting the patches that have changed):
10: adapt to 27053732 'fixup "libmultipath: use libmp_pathinfo() in
    update_multipath_table()"'. This is identical to 1d564df4
    ("multipathd: adjust when mpp is synced with the kernel") on
    mwilck/tip.
11: fix commit message (Martin)
17: use symbolic return code (Martin)


Changes in v2 (old patch number in quotes):
01: change from returning minor number to dm info (Martin)
02: adapt to change in patch 01
05 (05-07): squash into one commit (Martin)
08 (10): fix commit message (Matin)
10 (12): rename do_check_mpp amd check_mpp
11 (13): move the code that saved the path name to right before it was
         removed (Martin)
14 (16): use symbolic return codes (Martin)
15 (17): set path state to PATH_UNCHECKED when orphaned, and skip paths
         in INIT_REMOVED in sync_map_state() (Martin)
17 (19): clear pp->pending_ticks in check_path() if the path is delayed
         instead of pending. Use a different equation to check if we
         need to modify the ticks, that can adjust the ticks on every
         check. adapt to change in patch 14
18 (20): adapt to change in patch 14
19 (21): adapt to change in patch 10 and 14
20 (22): Add fixes trailer


Benjamin Marzinski (20):
  libmultipath: rename dm_map_present_by_wwid() and add outputs
  multipathd: make cli_add_map() handle adding maps by WWID correctly
  multipathd: remove checker restart optimization
  multipathd: refactor path state getting code into a helper
  multipathd: handle uninitialized paths in new function
  multipathd: check paths immediately after failing udev initialization
  multipathd: set pp->tick = max_checkint in handle_uninitialized_path
  multipathd: return 0 from check_path() if that path wasn't checked
  multipathd: reorder path state checks
  multipathd: adjust when mpp is synced with the kernel
  multipathd: resync map after setup_map in ev_remove_path
  multipathd: resync map after setup_map in resize_map
  multipathd: always resync map in reload_and_sync_map
  multipathd: correctly handle paths removed for a wwid change
  multipathd: handle changed wwid when adding partial path
  multipathd: don't read conf->checkint twice in check_path
  multipathd: make multipath devices manage their path check times
  multipathd: factor out actual path checking loop from checkerloop
  multipathd: check paths in order by mpp
  multipathd: clean up the correct wwid in check_path_wwid_change

 libmultipath/config.c             |  12 +
 libmultipath/config.h             |   1 +
 libmultipath/configure.c          |   1 +
 libmultipath/devmapper.c          |   5 +-
 libmultipath/devmapper.h          |   2 +-
 libmultipath/libmultipath.version |   5 +
 libmultipath/structs.c            |   2 +
 libmultipath/structs.h            |   3 +
 libmultipath/structs_vec.c        |   9 +-
 libmultipath/valid.c              |   2 +-
 multipathd/cli_handlers.c         |  66 ++--
 multipathd/main.c                 | 582 +++++++++++++++++++-----------
 multipathd/main.h                 |   2 +-
 tests/valid.c                     |  13 +-
 14 files changed, 432 insertions(+), 273 deletions(-)

-- 
2.45.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v3 10/20] multipathd: adjust when mpp is synced with the kernel
  2024-07-22 23:23 [PATCH v3 00/20] path checker refactor and misc fixes Benjamin Marzinski
@ 2024-07-22 23:23 ` Benjamin Marzinski
  2024-07-22 23:35   ` Benjamin Marzinski
  2024-07-22 23:23 ` [PATCH v3 11/20] multipathd: resync map after setup_map in ev_remove_path Benjamin Marzinski
  2024-07-22 23:23 ` [PATCH v3 17/20] multipathd: make multipath devices manage their path check times Benjamin Marzinski
  2 siblings, 1 reply; 6+ messages in thread
From: Benjamin Marzinski @ 2024-07-22 23:23 UTC (permalink / raw)
  To: Christophe Varoqui; +Cc: device-mapper development, Martin Wilck

Move the code to sync the mpp device state into a helper function and
add a counter to to make sure that the device is synced at least once
every max_checkint secs. This makes sure that multipath devices with no
paths will still get synced with the kernel.  Also, if multiple paths
are checked in the same loop, the multipath device will only be synced
with the kernel once, since every time the mpp is synced in any code
path, mpp->sync_tick is reset.

The code still syncs the mpp before updating the path state for two
main reasons.

1. Sometimes multipathd leaves the mpp with a garbage state. Future
   patches will fix most of these cases, but the code intentially
   does not remove the mpp is resyncing fails while checking paths.
   But this does leave the mpp with a garbage state.

2. The kernel chages the multipath state independently of multipathd. If
   the kernel fails a path, a uevent will arrive shortly. But the kernel
   doesn't provide any notification when it switches the active
   path group or if it ends up picking a different one than multipathd
   selected. Multipathd needs to know the actual current pathgroup to
   know when it should be switching them.

Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
---
 libmultipath/configure.c   |  1 +
 libmultipath/structs.h     |  2 ++
 libmultipath/structs_vec.c |  5 +++
 multipathd/main.c          | 64 +++++++++++++++++++++++++-------------
 4 files changed, 50 insertions(+), 22 deletions(-)

diff --git a/libmultipath/configure.c b/libmultipath/configure.c
index b4de863c..34158e31 100644
--- a/libmultipath/configure.c
+++ b/libmultipath/configure.c
@@ -358,6 +358,7 @@ int setup_map(struct multipath *mpp, char **params, struct vectors *vecs)
 
 	sysfs_set_scsi_tmo(conf, mpp);
 	marginal_pathgroups = conf->marginal_pathgroups;
+	mpp->sync_tick = conf->max_checkint;
 	pthread_cleanup_pop(1);
 
 	if (!mpp->features || !mpp->hwhandler || !mpp->selector) {
diff --git a/libmultipath/structs.h b/libmultipath/structs.h
index 3b91e39c..002eeae1 100644
--- a/libmultipath/structs.h
+++ b/libmultipath/structs.h
@@ -453,6 +453,8 @@ struct multipath {
 	int ghost_delay;
 	int ghost_delay_tick;
 	int queue_mode;
+	unsigned int sync_tick;
+	bool is_checked;
 	uid_t uid;
 	gid_t gid;
 	mode_t mode;
diff --git a/libmultipath/structs_vec.c b/libmultipath/structs_vec.c
index d58ef5a7..7f267ba0 100644
--- a/libmultipath/structs_vec.c
+++ b/libmultipath/structs_vec.c
@@ -505,11 +505,16 @@ update_multipath_table (struct multipath *mpp, vector pathvec, int flags)
 	char __attribute__((cleanup(cleanup_charp))) *params = NULL;
 	char __attribute__((cleanup(cleanup_charp))) *status = NULL;
 	unsigned long long size;
+	struct config *conf;
 
 	if (!mpp)
 		return r;
 
 	size = mpp->size;
+	conf = get_multipath_config();
+	mpp->sync_tick = conf->max_checkint;
+	put_multipath_config(conf);
+
 	r = libmp_mapinfo(DM_MAP_BY_NAME | MAPINFO_MPATH_ONLY,
 			  (mapid_t) { .str = mpp->alias },
 			  (mapinfo_t) {
diff --git a/multipathd/main.c b/multipathd/main.c
index 86f7ab1f..ae40f599 100644
--- a/multipathd/main.c
+++ b/multipathd/main.c
@@ -2342,6 +2342,37 @@ check_path_state(struct path *pp)
 	return newstate;
 }
 
+static void
+do_sync_mpp(struct vectors * vecs, struct multipath *mpp)
+{
+	int i, ret;
+	struct path *pp;
+
+	mpp->is_checked = true;
+	ret = update_multipath_strings(mpp, vecs->pathvec);
+	if (ret != DMP_OK) {
+		condlog(1, "%s: %s", mpp->alias, ret == DMP_NOT_FOUND ?
+			"device not found" :
+			"couldn't synchronize with kernel state");
+		vector_foreach_slot (mpp->paths, pp, i)
+			pp->dmstate = PSTATE_UNDEF;
+		return;
+	}
+	set_no_path_retry(mpp);
+}
+
+static void
+sync_mpp(struct vectors * vecs, struct multipath *mpp, unsigned int ticks)
+{
+	if (mpp->sync_tick)
+		mpp->sync_tick -= (mpp->sync_tick > ticks) ? ticks :
+				  mpp->sync_tick;
+	if (mpp->sync_tick)
+		return;
+
+	do_sync_mpp(vecs, mpp);
+}
+
 /*
  * Returns '1' if the path has been checked and '0' otherwise
  */
@@ -2356,7 +2387,6 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
 	unsigned int checkint, max_checkint;
 	struct config *conf;
 	int marginal_pathgroups, marginal_changed = 0;
-	int ret;
 	bool need_reload;
 
 	if (pp->initialized == INIT_REMOVED)
@@ -2395,26 +2425,6 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
 		pp->tick = 1;
 		return 0;
 	}
-	/*
-	 * Synchronize with kernel state
-	 */
-	ret = update_multipath_strings(pp->mpp, vecs->pathvec);
-	if (ret != DMP_OK) {
-		if (ret == DMP_NOT_FOUND) {
-			/* multipath device missing. Likely removed */
-			condlog(1, "%s: multipath device '%s' not found",
-				pp->dev, pp->mpp ? pp->mpp->alias : "");
-			return 0;
-		} else
-			condlog(1, "%s: Couldn't synchronize with kernel state",
-				pp->dev);
-		pp->dmstate = PSTATE_UNDEF;
-	}
-	/* if update_multipath_strings orphaned the path, quit early */
-	if (!pp->mpp)
-		return 0;
-	set_no_path_retry(pp->mpp);
-
 	if (pp->recheck_wwid == RECHECK_WWID_ON &&
 	    (newstate == PATH_UP || newstate == PATH_GHOST) &&
 	    ((pp->state != PATH_UP && pp->state != PATH_GHOST) ||
@@ -2424,7 +2434,12 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
 		handle_path_wwid_change(pp, vecs);
 		return 0;
 	}
-
+	if (!pp->mpp->is_checked) {
+		do_sync_mpp(vecs, pp->mpp);
+		/* if update_multipath_strings orphaned the path, quit early */
+		if (!pp->mpp)
+			return 0;
+	}
 	if ((newstate != PATH_UP && newstate != PATH_GHOST &&
 	     newstate != PATH_PENDING) && (pp->state == PATH_DELAYED)) {
 		/* If path state become failed again cancel path delay state */
@@ -2752,12 +2767,17 @@ checkerloop (void *ap)
 		while (checker_state != CHECKER_FINISHED) {
 			unsigned int paths_checked = 0, i;
 			struct timespec chk_start_time;
+			struct multipath *mpp;
 
 			pthread_cleanup_push(cleanup_lock, &vecs->lock);
 			lock(&vecs->lock);
 			pthread_testcancel();
+			vector_foreach_slot(vecs->mpvec, mpp, i)
+				mpp->is_checked = false;
 			get_monotonic_time(&chk_start_time);
 			if (checker_state == CHECKER_STARTING) {
+				vector_foreach_slot(vecs->mpvec, mpp, i)
+					sync_mpp(vecs, mpp, ticks);
 				vector_foreach_slot(vecs->pathvec, pp, i)
 					pp->is_checked = false;
 				checker_state = CHECKER_RUNNING;
-- 
2.45.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v3 11/20] multipathd: resync map after setup_map in ev_remove_path
  2024-07-22 23:23 [PATCH v3 00/20] path checker refactor and misc fixes Benjamin Marzinski
  2024-07-22 23:23 ` [PATCH v3 10/20] multipathd: adjust when mpp is synced with the kernel Benjamin Marzinski
@ 2024-07-22 23:23 ` Benjamin Marzinski
  2024-07-22 23:23 ` [PATCH v3 17/20] multipathd: make multipath devices manage their path check times Benjamin Marzinski
  2 siblings, 0 replies; 6+ messages in thread
From: Benjamin Marzinski @ 2024-07-22 23:23 UTC (permalink / raw)
  To: Christophe Varoqui; +Cc: device-mapper development, Martin Wilck

In ev_remove_path() it was possible to exit after calling setup_map()
without resyncing the mpp state with the kernel. This meant that the
mpp state in multipathd might not match with the kernel state at all.

It's safe to exit before calling setup_map() if either wait_for_udev is
set or need_do_map is not set. In both cases, setup_map() will later be
called, either by a uevent or by the calling function.

Once setup_map() has been called, setup_multipath() and sync_map_state()
are now always called, to make sure the mpp matches the kernel state.

Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
---
 multipathd/main.c | 40 +++++++++++++++++++---------------------
 1 file changed, 19 insertions(+), 21 deletions(-)

diff --git a/multipathd/main.c b/multipathd/main.c
index ae40f599..6a027213 100644
--- a/multipathd/main.c
+++ b/multipathd/main.c
@@ -1395,6 +1395,8 @@ ev_remove_path (struct path *pp, struct vectors * vecs, int need_do_map)
 	 * avoid referring to the map of an orphaned path
 	 */
 	if ((mpp = pp->mpp)) {
+		char devt[BLK_DEV_SIZE];
+
 		/*
 		 * Mark the path as removed. In case of success, we
 		 * will delete it for good. Otherwise, it will be deleted
@@ -1428,12 +1430,6 @@ ev_remove_path (struct path *pp, struct vectors * vecs, int need_do_map)
 		    flush_map_nopaths(mpp, vecs))
 			goto out;
 
-		if (setup_map(mpp, &params, vecs)) {
-			condlog(0, "%s: failed to setup map for"
-				" removal of path %s", mpp->alias, pp->dev);
-			goto fail;
-		}
-
 		if (mpp->wait_for_udev) {
 			mpp->wait_for_udev = 2;
 			retval = REMOVE_PATH_DELAY;
@@ -1444,6 +1440,12 @@ ev_remove_path (struct path *pp, struct vectors * vecs, int need_do_map)
 			retval = REMOVE_PATH_DELAY;
 			goto out;
 		}
+
+		if (setup_map(mpp, &params, vecs)) {
+			condlog(0, "%s: failed to setup map for"
+				" removal of path %s", mpp->alias, pp->dev);
+			goto fail;
+		}
 		/*
 		 * reload the map
 		 */
@@ -1453,24 +1455,20 @@ ev_remove_path (struct path *pp, struct vectors * vecs, int need_do_map)
 				"removal of path %s",
 				mpp->alias, pp->dev);
 			retval = REMOVE_PATH_FAILURE;
-		} else {
-			/*
-			 * update our state from kernel
-			 */
-			char devt[BLK_DEV_SIZE];
-
-			strlcpy(devt, pp->dev_t, sizeof(devt));
-
-			/* setup_multipath will free the path
-			 * regardless of whether it succeeds or
-			 * fails */
-			if (setup_multipath(vecs, mpp))
-				return REMOVE_PATH_MAP_ERROR;
-			sync_map_state(mpp);
+		}
+		/*
+		 * update mpp state from kernel even if domap failed.
+		 * If the path was removed from the mpp, setup_multipath will
+		 * free the path regardless of whether it succeeds or fails
+		 */
+		strlcpy(devt, pp->dev_t, sizeof(devt));
+		if (setup_multipath(vecs, mpp))
+			return REMOVE_PATH_MAP_ERROR;
+		sync_map_state(mpp);
 
+		if (retval == REMOVE_PATH_SUCCESS)
 			condlog(2, "%s: path removed from map %s",
 				devt, mpp->alias);
-		}
 	} else {
 		/* mpp == NULL */
 		if ((i = find_slot(vecs->pathvec, (void *)pp)) != -1)
-- 
2.45.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v3 17/20] multipathd: make multipath devices manage their path check times
  2024-07-22 23:23 [PATCH v3 00/20] path checker refactor and misc fixes Benjamin Marzinski
  2024-07-22 23:23 ` [PATCH v3 10/20] multipathd: adjust when mpp is synced with the kernel Benjamin Marzinski
  2024-07-22 23:23 ` [PATCH v3 11/20] multipathd: resync map after setup_map in ev_remove_path Benjamin Marzinski
@ 2024-07-22 23:23 ` Benjamin Marzinski
  2 siblings, 0 replies; 6+ messages in thread
From: Benjamin Marzinski @ 2024-07-22 23:23 UTC (permalink / raw)
  To: Christophe Varoqui; +Cc: device-mapper development, Martin Wilck

multipathd's path checking can be very bursty, with all the paths
running their path checkers at the same time, and then all doing nothing
while waiting for the next check time. Alternatively, the paths in a
multipath device might all run their path checkers at a different time,
which can keep the multipath device from having a coherent view of the
state of all of its paths.

This patch makes all the paths of a multipath device converge to running
their checkers at the same time, and spreads out when this time is for
the different multipath devices.

To do this, the checking time is divided into adjustment intervals
(conf->adjust_int), so that the checkers run at some index within this
interval. conf->adjust_int is chosen so that it is a multiple of all
possible pp->checkint values. This means that regardless of
pp->checkint, the path should always be checked on the same indexes,
each adjustement interval.

Each multipath device has a goal index. These are evenly spread out
between 0 and conf->max_checkint. Every conf->adjust_int seconds, each
multipath device should try to check all of its paths on its goal index.
If the multipath device's check times are not aligned with the goal
index, then pp->tick for the next check will be decremented by one, to
align it over time.

In order for the path checkers to run every pp->checkint seconds,
multipathd needs to track how long a path check has been pending for,
and subtract that time from the number of ticks till the checker is run
again. If the checker has been pending for more that pp->checkint,
the path will be rechecked on the next tick after the checker returns.

Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
---
 libmultipath/config.c  | 12 ++++++
 libmultipath/config.h  |  1 +
 libmultipath/structs.c |  1 +
 libmultipath/structs.h |  1 +
 multipathd/main.c      | 91 ++++++++++++++++++++++++++++++++++--------
 5 files changed, 89 insertions(+), 17 deletions(-)

diff --git a/libmultipath/config.c b/libmultipath/config.c
index 83fa7369..a59533b5 100644
--- a/libmultipath/config.c
+++ b/libmultipath/config.c
@@ -982,6 +982,18 @@ int _init_config (const char *file, struct config *conf)
 		conf->checkint = conf->max_checkint;
 	condlog(3, "polling interval: %d, max: %d",
 		conf->checkint, conf->max_checkint);
+	/*
+	 * make sure that that adjust_int is a multiple of all possible values
+	 * of pp->checkint.
+	 */
+	if (conf->max_checkint % conf->checkint == 0) {
+		conf->adjust_int = conf->max_checkint;
+	} else {
+		conf->adjust_int = conf->checkint;
+		while (2 * conf->adjust_int < conf->max_checkint)
+			conf->adjust_int *= 2;
+		conf->adjust_int *= conf->max_checkint;
+	}
 
 	if (conf->blist_devnode == NULL) {
 		conf->blist_devnode = vector_alloc();
diff --git a/libmultipath/config.h b/libmultipath/config.h
index 384193ab..800c0ca9 100644
--- a/libmultipath/config.h
+++ b/libmultipath/config.h
@@ -147,6 +147,7 @@ struct config {
 	int minio_rq;
 	unsigned int checkint;
 	unsigned int max_checkint;
+	unsigned int adjust_int;
 	bool use_watchdog;
 	int pgfailback;
 	int rr_weight;
diff --git a/libmultipath/structs.c b/libmultipath/structs.c
index 0a26096a..232b4230 100644
--- a/libmultipath/structs.c
+++ b/libmultipath/structs.c
@@ -149,6 +149,7 @@ uninitialize_path(struct path *pp)
 	pp->state = PATH_UNCHECKED;
 	pp->uid_attribute = NULL;
 	pp->checker_timeout = 0;
+	pp->pending_ticks = 0;
 
 	if (checker_selected(&pp->checker))
 		checker_put(&pp->checker);
diff --git a/libmultipath/structs.h b/libmultipath/structs.h
index 002eeae1..457d7836 100644
--- a/libmultipath/structs.h
+++ b/libmultipath/structs.h
@@ -360,6 +360,7 @@ struct path {
 	unsigned long long size;
 	unsigned int checkint;
 	unsigned int tick;
+	unsigned int pending_ticks;
 	int bus;
 	int offline;
 	int state;
diff --git a/multipathd/main.c b/multipathd/main.c
index 36e54587..b075b057 100644
--- a/multipathd/main.c
+++ b/multipathd/main.c
@@ -2388,7 +2388,7 @@ enum check_path_return {
 };
 
 static int
-check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
+do_check_path (struct vectors * vecs, struct path * pp)
 {
 	int newstate;
 	int new_path_up = 0;
@@ -2400,14 +2400,6 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
 	int marginal_pathgroups, marginal_changed = 0;
 	bool need_reload;
 
-	if (pp->initialized == INIT_REMOVED)
-		return CHECK_PATH_SKIPPED;
-
-	if (pp->tick)
-		pp->tick -= (pp->tick > ticks) ? ticks : pp->tick;
-	if (pp->tick)
-		return CHECK_PATH_SKIPPED;
-
 	conf = get_multipath_config();
 	checkint = conf->checkint;
 	max_checkint = conf->max_checkint;
@@ -2419,12 +2411,6 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
 		pp->checkint = checkint;
 	};
 
-	/*
-	 * provision a next check soonest,
-	 * in case we exit abnormally from here
-	 */
-	pp->tick = checkint;
-
 	newstate = check_path_state(pp);
 	if (newstate == PATH_WILD || newstate == PATH_UNCHECKED)
 		return CHECK_PATH_SKIPPED;
@@ -2587,7 +2573,6 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
 				condlog(4, "%s: delay next check %is",
 					pp->dev_t, pp->checkint);
 			}
-			pp->tick = pp->checkint;
 		}
 	}
 	else if (newstate != PATH_UP && newstate != PATH_GHOST) {
@@ -2640,6 +2625,77 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
 	return CHECK_PATH_CHECKED;
 }
 
+static int
+check_path (struct vectors * vecs, struct path * pp, unsigned int ticks,
+	    time_t start_secs)
+{
+	int r;
+	unsigned int adjust_int, max_checkint;
+	struct config *conf;
+	time_t next_idx, goal_idx;
+
+	if (pp->initialized == INIT_REMOVED)
+		return CHECK_PATH_SKIPPED;
+
+	if (pp->tick)
+		pp->tick -= (pp->tick > ticks) ? ticks : pp->tick;
+	if (pp->tick)
+		return CHECK_PATH_SKIPPED;
+
+	conf = get_multipath_config();
+	max_checkint = conf->max_checkint;
+	adjust_int = conf->adjust_int;
+	put_multipath_config(conf);
+
+	r = do_check_path(vecs, pp);
+
+	/*
+	 * do_check_path() removed or orphaned the path.
+	 */
+	if (r == CHECK_PATH_REMOVED || !pp->mpp)
+		return r;
+
+	if (pp->tick != 0) {
+		/* the path checker is pending */
+		if (pp->state != PATH_DELAYED)
+			pp->pending_ticks++;
+		else
+			pp->pending_ticks = 0;
+		return r;
+	}
+
+	/* schedule the next check */
+	pp->tick = pp->checkint;
+	if (pp->pending_ticks >= pp->tick)
+		pp->tick = 1;
+	else
+		pp->tick -= pp->pending_ticks;
+	pp->pending_ticks = 0;
+
+	if (pp->tick == 1)
+		return r;
+
+	/*
+	 * every mpp has a goal_idx in the range of
+	 * 0 <= goal_idx < conf->max_checkint
+	 *
+	 * The next check has an index, next_idx, in the range of
+	 * 0 <= next_idx < conf->adjust_int
+	 *
+	 * If the difference between the goal index and the next check index
+	 * is not a multiple of pp->checkint, then the device is not checking
+	 * the paths at its goal index, and pp->tick will be decremented by
+	 * one, to align it over time.
+	 */
+	goal_idx = (find_slot(vecs->mpvec, pp->mpp)) *
+		   max_checkint / VECTOR_SIZE(vecs->mpvec);
+	next_idx = (start_secs + pp->tick) % adjust_int;
+	if ((goal_idx - next_idx) % pp->checkint != 0)
+		pp->tick--;
+
+	return r;
+}
+
 static int
 handle_uninitialized_path(struct vectors * vecs, struct path * pp,
 			  unsigned int ticks)
@@ -2799,7 +2855,8 @@ checkerloop (void *ap)
 					continue;
 				pp->is_checked = true;
 				if (pp->mpp)
-					rc = check_path(vecs, pp, ticks);
+					rc = check_path(vecs, pp, ticks,
+							chk_start_time.tv_sec);
 				else
 					rc = handle_uninitialized_path(vecs, pp,
 								       ticks);
-- 
2.45.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v3 10/20] multipathd: adjust when mpp is synced with the kernel
  2024-07-22 23:23 ` [PATCH v3 10/20] multipathd: adjust when mpp is synced with the kernel Benjamin Marzinski
@ 2024-07-22 23:35   ` Benjamin Marzinski
  0 siblings, 0 replies; 6+ messages in thread
From: Benjamin Marzinski @ 2024-07-22 23:35 UTC (permalink / raw)
  To: Christophe Varoqui; +Cc: device-mapper development, Martin Wilck

These patches are identical to the ones I sent before. Please ignore the
resend.

My apologies
-Ben


On Mon, Jul 22, 2024 at 07:23:06PM -0400, Benjamin Marzinski wrote:
> Move the code to sync the mpp device state into a helper function and
> add a counter to to make sure that the device is synced at least once
> every max_checkint secs. This makes sure that multipath devices with no
> paths will still get synced with the kernel.  Also, if multiple paths
> are checked in the same loop, the multipath device will only be synced
> with the kernel once, since every time the mpp is synced in any code
> path, mpp->sync_tick is reset.
> 
> The code still syncs the mpp before updating the path state for two
> main reasons.
> 
> 1. Sometimes multipathd leaves the mpp with a garbage state. Future
>    patches will fix most of these cases, but the code intentially
>    does not remove the mpp is resyncing fails while checking paths.
>    But this does leave the mpp with a garbage state.
> 
> 2. The kernel chages the multipath state independently of multipathd. If
>    the kernel fails a path, a uevent will arrive shortly. But the kernel
>    doesn't provide any notification when it switches the active
>    path group or if it ends up picking a different one than multipathd
>    selected. Multipathd needs to know the actual current pathgroup to
>    know when it should be switching them.
> 
> Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
> ---
>  libmultipath/configure.c   |  1 +
>  libmultipath/structs.h     |  2 ++
>  libmultipath/structs_vec.c |  5 +++
>  multipathd/main.c          | 64 +++++++++++++++++++++++++-------------
>  4 files changed, 50 insertions(+), 22 deletions(-)
> 
> diff --git a/libmultipath/configure.c b/libmultipath/configure.c
> index b4de863c..34158e31 100644
> --- a/libmultipath/configure.c
> +++ b/libmultipath/configure.c
> @@ -358,6 +358,7 @@ int setup_map(struct multipath *mpp, char **params, struct vectors *vecs)
>  
>  	sysfs_set_scsi_tmo(conf, mpp);
>  	marginal_pathgroups = conf->marginal_pathgroups;
> +	mpp->sync_tick = conf->max_checkint;
>  	pthread_cleanup_pop(1);
>  
>  	if (!mpp->features || !mpp->hwhandler || !mpp->selector) {
> diff --git a/libmultipath/structs.h b/libmultipath/structs.h
> index 3b91e39c..002eeae1 100644
> --- a/libmultipath/structs.h
> +++ b/libmultipath/structs.h
> @@ -453,6 +453,8 @@ struct multipath {
>  	int ghost_delay;
>  	int ghost_delay_tick;
>  	int queue_mode;
> +	unsigned int sync_tick;
> +	bool is_checked;
>  	uid_t uid;
>  	gid_t gid;
>  	mode_t mode;
> diff --git a/libmultipath/structs_vec.c b/libmultipath/structs_vec.c
> index d58ef5a7..7f267ba0 100644
> --- a/libmultipath/structs_vec.c
> +++ b/libmultipath/structs_vec.c
> @@ -505,11 +505,16 @@ update_multipath_table (struct multipath *mpp, vector pathvec, int flags)
>  	char __attribute__((cleanup(cleanup_charp))) *params = NULL;
>  	char __attribute__((cleanup(cleanup_charp))) *status = NULL;
>  	unsigned long long size;
> +	struct config *conf;
>  
>  	if (!mpp)
>  		return r;
>  
>  	size = mpp->size;
> +	conf = get_multipath_config();
> +	mpp->sync_tick = conf->max_checkint;
> +	put_multipath_config(conf);
> +
>  	r = libmp_mapinfo(DM_MAP_BY_NAME | MAPINFO_MPATH_ONLY,
>  			  (mapid_t) { .str = mpp->alias },
>  			  (mapinfo_t) {
> diff --git a/multipathd/main.c b/multipathd/main.c
> index 86f7ab1f..ae40f599 100644
> --- a/multipathd/main.c
> +++ b/multipathd/main.c
> @@ -2342,6 +2342,37 @@ check_path_state(struct path *pp)
>  	return newstate;
>  }
>  
> +static void
> +do_sync_mpp(struct vectors * vecs, struct multipath *mpp)
> +{
> +	int i, ret;
> +	struct path *pp;
> +
> +	mpp->is_checked = true;
> +	ret = update_multipath_strings(mpp, vecs->pathvec);
> +	if (ret != DMP_OK) {
> +		condlog(1, "%s: %s", mpp->alias, ret == DMP_NOT_FOUND ?
> +			"device not found" :
> +			"couldn't synchronize with kernel state");
> +		vector_foreach_slot (mpp->paths, pp, i)
> +			pp->dmstate = PSTATE_UNDEF;
> +		return;
> +	}
> +	set_no_path_retry(mpp);
> +}
> +
> +static void
> +sync_mpp(struct vectors * vecs, struct multipath *mpp, unsigned int ticks)
> +{
> +	if (mpp->sync_tick)
> +		mpp->sync_tick -= (mpp->sync_tick > ticks) ? ticks :
> +				  mpp->sync_tick;
> +	if (mpp->sync_tick)
> +		return;
> +
> +	do_sync_mpp(vecs, mpp);
> +}
> +
>  /*
>   * Returns '1' if the path has been checked and '0' otherwise
>   */
> @@ -2356,7 +2387,6 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
>  	unsigned int checkint, max_checkint;
>  	struct config *conf;
>  	int marginal_pathgroups, marginal_changed = 0;
> -	int ret;
>  	bool need_reload;
>  
>  	if (pp->initialized == INIT_REMOVED)
> @@ -2395,26 +2425,6 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
>  		pp->tick = 1;
>  		return 0;
>  	}
> -	/*
> -	 * Synchronize with kernel state
> -	 */
> -	ret = update_multipath_strings(pp->mpp, vecs->pathvec);
> -	if (ret != DMP_OK) {
> -		if (ret == DMP_NOT_FOUND) {
> -			/* multipath device missing. Likely removed */
> -			condlog(1, "%s: multipath device '%s' not found",
> -				pp->dev, pp->mpp ? pp->mpp->alias : "");
> -			return 0;
> -		} else
> -			condlog(1, "%s: Couldn't synchronize with kernel state",
> -				pp->dev);
> -		pp->dmstate = PSTATE_UNDEF;
> -	}
> -	/* if update_multipath_strings orphaned the path, quit early */
> -	if (!pp->mpp)
> -		return 0;
> -	set_no_path_retry(pp->mpp);
> -
>  	if (pp->recheck_wwid == RECHECK_WWID_ON &&
>  	    (newstate == PATH_UP || newstate == PATH_GHOST) &&
>  	    ((pp->state != PATH_UP && pp->state != PATH_GHOST) ||
> @@ -2424,7 +2434,12 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
>  		handle_path_wwid_change(pp, vecs);
>  		return 0;
>  	}
> -
> +	if (!pp->mpp->is_checked) {
> +		do_sync_mpp(vecs, pp->mpp);
> +		/* if update_multipath_strings orphaned the path, quit early */
> +		if (!pp->mpp)
> +			return 0;
> +	}
>  	if ((newstate != PATH_UP && newstate != PATH_GHOST &&
>  	     newstate != PATH_PENDING) && (pp->state == PATH_DELAYED)) {
>  		/* If path state become failed again cancel path delay state */
> @@ -2752,12 +2767,17 @@ checkerloop (void *ap)
>  		while (checker_state != CHECKER_FINISHED) {
>  			unsigned int paths_checked = 0, i;
>  			struct timespec chk_start_time;
> +			struct multipath *mpp;
>  
>  			pthread_cleanup_push(cleanup_lock, &vecs->lock);
>  			lock(&vecs->lock);
>  			pthread_testcancel();
> +			vector_foreach_slot(vecs->mpvec, mpp, i)
> +				mpp->is_checked = false;
>  			get_monotonic_time(&chk_start_time);
>  			if (checker_state == CHECKER_STARTING) {
> +				vector_foreach_slot(vecs->mpvec, mpp, i)
> +					sync_mpp(vecs, mpp, ticks);
>  				vector_foreach_slot(vecs->pathvec, pp, i)
>  					pp->is_checked = false;
>  				checker_state = CHECKER_RUNNING;
> -- 
> 2.45.0
> 


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2024-07-22 23:35 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-07-22 23:23 [PATCH v3 00/20] path checker refactor and misc fixes Benjamin Marzinski
2024-07-22 23:23 ` [PATCH v3 10/20] multipathd: adjust when mpp is synced with the kernel Benjamin Marzinski
2024-07-22 23:35   ` Benjamin Marzinski
2024-07-22 23:23 ` [PATCH v3 11/20] multipathd: resync map after setup_map in ev_remove_path Benjamin Marzinski
2024-07-22 23:23 ` [PATCH v3 17/20] multipathd: make multipath devices manage their path check times Benjamin Marzinski
  -- strict thread matches above, loose matches on Subject: below --
2024-07-19 19:40 [PATCH v3 00/20] path checker refactor and misc fixes Benjamin Marzinski
2024-07-19 19:40 ` [PATCH v3 10/20] multipathd: adjust when mpp is synced with the kernel Benjamin Marzinski

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.