From: Benjamin Marzinski <bmarzins@redhat.com>
To: Martin Wilck <martin.wilck@suse.com>
Cc: Christophe Varoqui <christophe.varoqui@opensvc.com>,
Brian Bunker <brian@purestorage.com>,
dm-devel@lists.linux.dev, Martin Wilck <mwilck@suse.com>
Subject: Re: [PATCH 2/4] libmpathutil: add generic implementation for checker thread runners
Date: Tue, 24 Mar 2026 01:44:05 -0400 [thread overview]
Message-ID: <acIkpYcxYE3-VaCf@redhat.com> (raw)
In-Reply-To: <20260319221344.753790-3-mwilck@suse.com>
On Thu, Mar 19, 2026 at 11:13:42PM +0100, Martin Wilck wrote:
> This code adds a generic, abstract implementation of the kind of threads we
> need for checkers: detached threads that may time out. The design is such
> that these threads never access any memory of the calling program, strongly
> simplifying the management of lifetimes of objects.
>
> See the documentation of the API in "runner.h".
>
> Signed-off-by: Martin Wilck <mwilck@suse.com>
> ---
> libmpathutil/Makefile | 2 +-
> libmpathutil/libmpathutil.version | 6 +-
> libmpathutil/runner.c | 205 ++++++++++++++++++++++++++++++
> libmpathutil/runner.h | 84 ++++++++++++
> 4 files changed, 295 insertions(+), 2 deletions(-)
> create mode 100644 libmpathutil/runner.c
> create mode 100644 libmpathutil/runner.h
>
> diff --git a/libmpathutil/Makefile b/libmpathutil/Makefile
> index e1cc2c6..bdd7063 100644
> --- a/libmpathutil/Makefile
> +++ b/libmpathutil/Makefile
> @@ -13,7 +13,7 @@ LIBDEPS += -lpthread -ldl -ludev -L$(mpathcmddir) -lmpathcmd $(SYSTEMD_LIBDEPS)
>
> # other object files
> OBJS := mt-libudev.o parser.o vector.o util.o debug.o time-util.o \
> - uxsock.o log_pthread.o log.o strbuf.o globals.o msort.o
> + uxsock.o log_pthread.o log.o strbuf.o globals.o msort.o runner.o
>
> all: $(DEVLIB)
>
> diff --git a/libmpathutil/libmpathutil.version b/libmpathutil/libmpathutil.version
> index 22ef994..5a7df82 100644
> --- a/libmpathutil/libmpathutil.version
> +++ b/libmpathutil/libmpathutil.version
> @@ -43,7 +43,7 @@ LIBMPATHCOMMON_1.0.0 {
> put_multipath_config;
> };
>
> -LIBMPATHUTIL_5.0 {
> +LIBMPATHUTIL_6.0 {
> global:
> alloc_bitfield;
> alloc_strvec;
> @@ -62,6 +62,8 @@ global:
> cleanup_vector;
> cleanup_vector_free;
> convert_dev;
> + cancel_runner;
> + check_runner;
> dlog;
> filepresent;
> fill_strbuf;
> @@ -72,6 +74,7 @@ global:
> free_strvec;
> get_linux_version_code;
> get_monotonic_time;
> + get_runner;
> get_strbuf_buf__;
> get_next_string;
> get_strbuf_len;
> @@ -162,6 +165,7 @@ global:
> pthread_cond_init_mono;
> recv_packet;
> reset_strbuf;
> + runner_state_name;
> safe_write;
> send_packet;
> set_max_fds;
> diff --git a/libmpathutil/runner.c b/libmpathutil/runner.c
> new file mode 100644
> index 0000000..8a49d92
> --- /dev/null
> +++ b/libmpathutil/runner.c
> @@ -0,0 +1,205 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +// Copyright (c) 2026 SUSE LLC
> +#include <assert.h>
> +#include <time.h>
> +#include <pthread.h>
> +#include <urcu/uatomic.h>
> +#include "util.h"
> +#include "debug.h"
> +#include "time-util.h"
> +#include "runner.h"
> +
> +#define STACK_SIZE (4 * 1024)
> +#define MILLION 1000000
> +
> +const char *runner_state_name(int state)
> +{
> + // clang-format off
> + static const char * const state_name_[] = {
> + [RUNNER_IDLE] = "idle",
> + [RUNNER_RUNNING] = "running",
> + [RUNNER_DONE] = "done",
> + [RUNNER_CANCELLED] = "cancelled"
> + };
> + // clang-format on
> +
> + if (state < RUNNER_IDLE || state > RUNNER_CANCELLED)
> + return "unknown";
> + return state_name_[state];
> +}
> +
> +struct runner_context {
> + int refcount;
> + int status;
> + struct timespec deadline;
> + pthread_t thr;
> + void (*func)(void *data);
> + /* User data will be copied into this area */
The data array is currently pointer aligned. We might want to put a
comment mentioning that it must stay that way, or force an alignment on
it.
> + char data[];
> +};
> +
> +static void release_context(struct runner_context *ctx)
> +{
> + int n;
> +
> + n = uatomic_sub_return(&ctx->refcount, 1);
> + assert(n >= 0);
> +
> + if (n == 0)
> + free(ctx);
> +}
> +
> +static void cleanup_context(struct runner_context **ctx)
> +{
> + if (*ctx)
> + release_context(*ctx);
> +}
> +
> +static void *runner_thread(void *arg)
> +{
> + int st, refcount;
> + /*
> + * The cleanup function makes sure memory is freed if the thread is
> + * cancelled (-fexceptions).
> + */
> + struct runner_context *ctx __attribute__((cleanup(cleanup_context))) = arg;
> +
> + refcount = uatomic_add_return(&ctx->refcount, 1);
> + assert(refcount == 2);
> +
> + st = uatomic_cmpxchg(&ctx->status, RUNNER_IDLE, RUNNER_RUNNING);
> + /*
> + * The thread has already been cancelled before it was even started.
> + * In this case, cancel_runner() doesn't release the context.
> + * Do it here. See comments for RUNNER_IDLE in cancel_runner().
> + */
> + if (st != RUNNER_IDLE) {
> + release_context(ctx);
> + return NULL;
> + }
> +
> + (*ctx->func)(ctx->data);
> + uatomic_cmpxchg(&ctx->status, RUNNER_RUNNING, RUNNER_DONE);
> + return NULL;
> +}
> +
> +void cancel_runner(struct runner_context *ctx)
> +{
> + int st = uatomic_cmpxchg(&ctx->status, RUNNER_RUNNING, RUNNER_CANCELLED);
> + int level = 4, retry = 1;
> +
> +repeat:
> + switch (st) {
> + case RUNNER_IDLE:
> + /*
> + * Race with thread startup.
> + *
> + * If after the following cmpxchg st is still IDLE, the cmpxchg
> + * in runner_thread() will return CANCELLED, and the context
> + * will be relased there. Otherwise, the thread has switched
> + * to RUNNING in the meantime, and we will be able to cancel
> + * it regularly if we retry.
> + */
> + if (retry--) {
> + st = uatomic_cmpxchg(&ctx->status, RUNNER_IDLE,
> + RUNNER_CANCELLED);
> + goto repeat;
> + }
> + level = 3;
> + break;
> + case RUNNER_RUNNING:
> + pthread_cancel(ctx->thr);
> + /* Caller has no interest in the data any more */
> + release_context(ctx);
> + return;
> + case RUNNER_DONE:
> + /* Caller has no interest in the data any more */
> + release_context(ctx);
> + /* fallthrough */
> + default:
> + level = 3;
> + break;
> + }
> + condlog(level, "%s: runner cancelled in state '%s', ctx=%p", __func__,
> + runner_state_name(st), ctx);
> +}
> +
> +struct runner_context *get_runner(runner_func func, void *data,
> + unsigned int size, unsigned long timeout_usec)
> +{
> + static const struct timespec time_zero = {.tv_sec = 0};
> + struct runner_context *ctx;
> + pthread_attr_t attr;
> + int rc;
> +
> + assert(func);
> + assert(data);
> + assert(size > 0);
Why asserts? We already need to deal with this function failing. I'd
prefer error messages and failure, instead of crashing later when a null
value gets dereferenced, for instance. I admit, it's unlikley that these
arguments will be set wrong, but if there is a bug that makes it into
production code, these asserts will be disabled, and a log message
telling me what went wrong would be helpful.
> +
> + ctx = malloc(sizeof(*ctx) + size);
> + if (!ctx)
> + return NULL;
> +
> + ctx->func = func;
> + uatomic_set(&ctx->refcount, 1);
> + uatomic_set(&ctx->status, RUNNER_IDLE);
> + memcpy(ctx->data, data, size);
> +
> + if (timeout_usec) {
> + get_monotonic_time(&ctx->deadline);
> + ctx->deadline.tv_sec += timeout_usec / MILLION;
> + ctx->deadline.tv_nsec += (timeout_usec % MILLION) * 1000;
> + } else
> + ctx->deadline = time_zero;
> +
Don't think we need the smm_smp_wmb(). From
https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/V1_chap04.html#tag_04_15_02
"The following functions synchronize memory with respect to other threads:"
lists pthread_create().
-Ben
> + /*
> + * This pairs with the implicit barrier in uatomic_add_return()
> + * in runner_thread()
> + */
> + cmm_smp_wmb();
> +
> + setup_thread_attr(&attr, STACK_SIZE, 1);
> + rc = pthread_create(&ctx->thr, &attr, runner_thread, ctx);
> + pthread_attr_destroy(&attr);
> +
> + if (rc) {
> + condlog(1, "%s: pthread_create(): %s", __func__, strerror(rc));
> + release_context(ctx);
> + return NULL;
> + }
> + return ctx;
> +}
> +
> +int check_runner(struct runner_context *ctx, void *data, unsigned int size)
> +{
> + int st = uatomic_read(&ctx->status);
> +
> + switch (st) {
> + case RUNNER_DONE:
> + if (data)
> + /* hand back the data to the caller */
> + memcpy(data, ctx->data, size);
> + /* caller is done with this context */
> + release_context(ctx);
> + /* fallthrough */
> + case RUNNER_CANCELLED:
> + return st;
> + case RUNNER_IDLE:
> + case RUNNER_RUNNING:
> + if (ctx->deadline.tv_sec != 0 || ctx->deadline.tv_nsec != 0) {
> + struct timespec now;
> +
> + get_monotonic_time(&now);
> + if (timespeccmp(&ctx->deadline, &now) <= 0) {
> + cancel_runner(ctx);
> + return RUNNER_CANCELLED;
> + }
> + }
> + return RUNNER_RUNNING;
> + default:
> + condlog(1, "%s: runner in impossible state '%s'", __func__,
> + runner_state_name(st));
> + assert(false);
> + return st;
> + }
> +}
> diff --git a/libmpathutil/runner.h b/libmpathutil/runner.h
> new file mode 100644
> index 0000000..0c9cf80
> --- /dev/null
> +++ b/libmpathutil/runner.h
> @@ -0,0 +1,84 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +// Copyright (c) 2026 SUSE LLC
> +#ifndef RUNNER_H_INCLUDED
> +#define RUNNER_H_INCLUDED
> +
> +enum runner_status {
> + RUNNER_IDLE,
> + RUNNER_RUNNING,
> + RUNNER_DONE,
> + RUNNER_CANCELLED,
> +};
> +
> +typedef void (*runner_func)(void *data);
> +struct runner_context;
> +
> +/**
> + * runner_state_name(): helper for printing runner states
> + *
> + * @param state: a valid @runner_status value
> + * @returns: a string describing the status
> + */
> +const char *runner_state_name(int state);
> +
> +/**
> + * get_runner(): start a runner thread
> + *
> + * This function starts a runner thread that calls @func(@data).
> + * The thread is created as detached thread.
> + * @data will be copied to thread-private memory, which will be freed when
> + * the runner terminates.
> + * Output values can be retrieved with @check_runner().
> + *
> + * @param func: the worker function to invoke
> + * This parameter must not be NULL.
> + * @param data: pointer the data to pass to the function (input and output)
> + * This parameter must not be NULL.
> + * @param size: the size (in bytes) of the data passed to the function
> + * This parameter must be positive.
> + * @param timeout_sec: timeout (in seconds) after which to cancel the runner.
> + * If it is 0, the runner will not time out.
> + * @returns: a runner context that must be passed to the functions below.
> + */
> +struct runner_context *get_runner(runner_func func, void *data,
> + unsigned int size, unsigned long timeout_usec);
> +
> +/**
> + * cancel_runner(): cancel a runner thread.
> + *
> + * This function should be called to terminate runners on program exit,
> + * or to terminate runners that have no timeout set in @get_runner,
> + * and haven't completed.
> + * Upon return of this function, @ctx becomes stale and shouldn't accessed
> + * any more.
> + *
> + * @param ctx: the context of the runner to be cancelled.
> + */
> +void cancel_runner(struct runner_context *ctx);
> +
> +/**
> + * check_runner(): query the state of a runner thread and obtain results
> + *
> + * Check the state of a runner previously started with @get_runner. If the
> + * thread function has completed, @RUNNER_DONE will be returned, and the
> + * user data will be copied into the @data argument. If @check_runner returns
> + * anything else, @data contains no valid data. The @size argument
> + * will typically be the same as the @size passed to @get_runner (meaning
> + * that @data represents an object of the same type as the @data argument
> + * passed to @get_runner previously).
> + * If @check_runner returns @RUNNER_CANCELLED or @RUNNER_DONE, the @ctx object
> + * has become stale and must not be used any more.
> + * @RUNNER_CANCELLED is returned if the timeout set in @get_runner has passed
> + * and the worker function hasn't returned yet.
> + *
> + * @param ctx: the context of the runner to be queried
> + * @param data: memory pointer that will receive results of the worker
> + * function. Can be NULL, in which case no data will be copied.
> + * @param size: size of the memory pointed to by data. It must be no bigger
> + * than the size of the memory passed to @get_runner for this runner.
> + * It can be smaller, but no more than @size bytes will be copied.
> + * @returns: @RUNNER_RUNNING, @RUNNER_DONE, or @RUNNER_CANCELLED.
> + */
> +int check_runner(struct runner_context *ctx, void *data, unsigned int size);
> +
> +#endif /* RUNNER_H_INCLUDED */
> --
> 2.53.0
next prev parent reply other threads:[~2026-03-24 5:44 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-19 22:13 [PATCH 0/4] multipath-tools: generic async threads for TUR checker Martin Wilck
2026-03-19 22:13 ` [PATCH 1/4] multipathd: get_new_state: map PATH_TIMEOUT to PATH_DOWN Martin Wilck
2026-03-24 5:35 ` Benjamin Marzinski
2026-03-19 22:13 ` [PATCH 2/4] libmpathutil: add generic implementation for checker thread runners Martin Wilck
2026-03-24 5:44 ` Benjamin Marzinski [this message]
2026-03-19 22:13 ` [PATCH 3/4] multipath-tools tests: add test program for " Martin Wilck
2026-03-24 5:47 ` Benjamin Marzinski
2026-03-19 22:13 ` [PATCH 4/4] libmultipath: TUR checker: use runner threads Martin Wilck
2026-03-24 6:38 ` Benjamin Marzinski
2026-03-24 12:24 ` Martin Wilck
2026-03-24 14:46 ` Benjamin Marzinski
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=acIkpYcxYE3-VaCf@redhat.com \
--to=bmarzins@redhat.com \
--cc=brian@purestorage.com \
--cc=christophe.varoqui@opensvc.com \
--cc=dm-devel@lists.linux.dev \
--cc=martin.wilck@suse.com \
--cc=mwilck@suse.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.