From: Benjamin Marzinski <bmarzins@redhat.com>
To: Martin Wilck <martin.wilck@suse.com>
Cc: Christophe Varoqui <christophe.varoqui@opensvc.com>,
Brian Bunker <brian@purestorage.com>,
dm-devel@lists.linux.dev, Martin Wilck <mwilck@suse.com>
Subject: Re: [PATCH 2/4] libmpathutil: add generic implementation for checker thread runners
Date: Tue, 24 Mar 2026 01:44:05 -0400 [thread overview]
Message-ID: <acIkpYcxYE3-VaCf@redhat.com> (raw)
In-Reply-To: <20260319221344.753790-3-mwilck@suse.com>
On Thu, Mar 19, 2026 at 11:13:42PM +0100, Martin Wilck wrote:
> This code adds a generic, abstract implementation of the kind of threads we
> need for checkers: detached threads that may time out. The design is such
> that these threads never access any memory of the calling program, strongly
> simplifying the management of lifetimes of objects.
>
> See the documentation of the API in "runner.h".
>
> Signed-off-by: Martin Wilck <mwilck@suse.com>
> ---
> libmpathutil/Makefile | 2 +-
> libmpathutil/libmpathutil.version | 6 +-
> libmpathutil/runner.c | 205 ++++++++++++++++++++++++++++++
> libmpathutil/runner.h | 84 ++++++++++++
> 4 files changed, 295 insertions(+), 2 deletions(-)
> create mode 100644 libmpathutil/runner.c
> create mode 100644 libmpathutil/runner.h
>
> diff --git a/libmpathutil/Makefile b/libmpathutil/Makefile
> index e1cc2c6..bdd7063 100644
> --- a/libmpathutil/Makefile
> +++ b/libmpathutil/Makefile
> @@ -13,7 +13,7 @@ LIBDEPS += -lpthread -ldl -ludev -L$(mpathcmddir) -lmpathcmd $(SYSTEMD_LIBDEPS)
>
> # other object files
> OBJS := mt-libudev.o parser.o vector.o util.o debug.o time-util.o \
> - uxsock.o log_pthread.o log.o strbuf.o globals.o msort.o
> + uxsock.o log_pthread.o log.o strbuf.o globals.o msort.o runner.o
>
> all: $(DEVLIB)
>
> diff --git a/libmpathutil/libmpathutil.version b/libmpathutil/libmpathutil.version
> index 22ef994..5a7df82 100644
> --- a/libmpathutil/libmpathutil.version
> +++ b/libmpathutil/libmpathutil.version
> @@ -43,7 +43,7 @@ LIBMPATHCOMMON_1.0.0 {
> put_multipath_config;
> };
>
> -LIBMPATHUTIL_5.0 {
> +LIBMPATHUTIL_6.0 {
> global:
> alloc_bitfield;
> alloc_strvec;
> @@ -62,6 +62,8 @@ global:
> cleanup_vector;
> cleanup_vector_free;
> convert_dev;
> + cancel_runner;
> + check_runner;
> dlog;
> filepresent;
> fill_strbuf;
> @@ -72,6 +74,7 @@ global:
> free_strvec;
> get_linux_version_code;
> get_monotonic_time;
> + get_runner;
> get_strbuf_buf__;
> get_next_string;
> get_strbuf_len;
> @@ -162,6 +165,7 @@ global:
> pthread_cond_init_mono;
> recv_packet;
> reset_strbuf;
> + runner_state_name;
> safe_write;
> send_packet;
> set_max_fds;
> diff --git a/libmpathutil/runner.c b/libmpathutil/runner.c
> new file mode 100644
> index 0000000..8a49d92
> --- /dev/null
> +++ b/libmpathutil/runner.c
> @@ -0,0 +1,205 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +// Copyright (c) 2026 SUSE LLC
> +#include <assert.h>
> +#include <time.h>
> +#include <pthread.h>
> +#include <urcu/uatomic.h>
> +#include "util.h"
> +#include "debug.h"
> +#include "time-util.h"
> +#include "runner.h"
> +
> +#define STACK_SIZE (4 * 1024)
> +#define MILLION 1000000
> +
> +const char *runner_state_name(int state)
> +{
> + // clang-format off
> + static const char * const state_name_[] = {
> + [RUNNER_IDLE] = "idle",
> + [RUNNER_RUNNING] = "running",
> + [RUNNER_DONE] = "done",
> + [RUNNER_CANCELLED] = "cancelled"
> + };
> + // clang-format on
> +
> + if (state < RUNNER_IDLE || state > RUNNER_CANCELLED)
> + return "unknown";
> + return state_name_[state];
> +}
> +
> +struct runner_context {
> + int refcount;
> + int status;
> + struct timespec deadline;
> + pthread_t thr;
> + void (*func)(void *data);
> + /* User data will be copied into this area */
The data array is currently pointer aligned. We might want to put a
comment mentioning that it must stay that way, or force an alignment on
it.
> + char data[];
> +};
> +
> +static void release_context(struct runner_context *ctx)
> +{
> + int n;
> +
> + n = uatomic_sub_return(&ctx->refcount, 1);
> + assert(n >= 0);
> +
> + if (n == 0)
> + free(ctx);
> +}
> +
> +static void cleanup_context(struct runner_context **ctx)
> +{
> + if (*ctx)
> + release_context(*ctx);
> +}
> +
> +static void *runner_thread(void *arg)
> +{
> + int st, refcount;
> + /*
> + * The cleanup function makes sure memory is freed if the thread is
> + * cancelled (-fexceptions).
> + */
> + struct runner_context *ctx __attribute__((cleanup(cleanup_context))) = arg;
> +
> + refcount = uatomic_add_return(&ctx->refcount, 1);
> + assert(refcount == 2);
> +
> + st = uatomic_cmpxchg(&ctx->status, RUNNER_IDLE, RUNNER_RUNNING);
> + /*
> + * The thread has already been cancelled before it was even started.
> + * In this case, cancel_runner() doesn't release the context.
> + * Do it here. See comments for RUNNER_IDLE in cancel_runner().
> + */
> + if (st != RUNNER_IDLE) {
> + release_context(ctx);
> + return NULL;
> + }
> +
> + (*ctx->func)(ctx->data);
> + uatomic_cmpxchg(&ctx->status, RUNNER_RUNNING, RUNNER_DONE);
> + return NULL;
> +}
> +
> +void cancel_runner(struct runner_context *ctx)
> +{
> + int st = uatomic_cmpxchg(&ctx->status, RUNNER_RUNNING, RUNNER_CANCELLED);
> + int level = 4, retry = 1;
> +
> +repeat:
> + switch (st) {
> + case RUNNER_IDLE:
> + /*
> + * Race with thread startup.
> + *
> + * If after the following cmpxchg st is still IDLE, the cmpxchg
> + * in runner_thread() will return CANCELLED, and the context
> + * will be relased there. Otherwise, the thread has switched
> + * to RUNNING in the meantime, and we will be able to cancel
> + * it regularly if we retry.
> + */
> + if (retry--) {
> + st = uatomic_cmpxchg(&ctx->status, RUNNER_IDLE,
> + RUNNER_CANCELLED);
> + goto repeat;
> + }
> + level = 3;
> + break;
> + case RUNNER_RUNNING:
> + pthread_cancel(ctx->thr);
> + /* Caller has no interest in the data any more */
> + release_context(ctx);
> + return;
> + case RUNNER_DONE:
> + /* Caller has no interest in the data any more */
> + release_context(ctx);
> + /* fallthrough */
> + default:
> + level = 3;
> + break;
> + }
> + condlog(level, "%s: runner cancelled in state '%s', ctx=%p", __func__,
> + runner_state_name(st), ctx);
> +}
> +
> +struct runner_context *get_runner(runner_func func, void *data,
> + unsigned int size, unsigned long timeout_usec)
> +{
> + static const struct timespec time_zero = {.tv_sec = 0};
> + struct runner_context *ctx;
> + pthread_attr_t attr;
> + int rc;
> +
> + assert(func);
> + assert(data);
> + assert(size > 0);
Why asserts? We already need to deal with this function failing. I'd
prefer error messages and failure, instead of crashing later when a null
value gets dereferenced, for instance. I admit, it's unlikley that these
arguments will be set wrong, but if there is a bug that makes it into
production code, these asserts will be disabled, and a log message
telling me what went wrong would be helpful.
> +
> + ctx = malloc(sizeof(*ctx) + size);
> + if (!ctx)
> + return NULL;
> +
> + ctx->func = func;
> + uatomic_set(&ctx->refcount, 1);
> + uatomic_set(&ctx->status, RUNNER_IDLE);
> + memcpy(ctx->data, data, size);
> +
> + if (timeout_usec) {
> + get_monotonic_time(&ctx->deadline);
> + ctx->deadline.tv_sec += timeout_usec / MILLION;
> + ctx->deadline.tv_nsec += (timeout_usec % MILLION) * 1000;
> + } else
> + ctx->deadline = time_zero;
> +
Don't think we need the smm_smp_wmb(). From
https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/V1_chap04.html#tag_04_15_02
"The following functions synchronize memory with respect to other threads:"
lists pthread_create().
-Ben
> + /*
> + * This pairs with the implicit barrier in uatomic_add_return()
> + * in runner_thread()
> + */
> + cmm_smp_wmb();
> +
> + setup_thread_attr(&attr, STACK_SIZE, 1);
> + rc = pthread_create(&ctx->thr, &attr, runner_thread, ctx);
> + pthread_attr_destroy(&attr);
> +
> + if (rc) {
> + condlog(1, "%s: pthread_create(): %s", __func__, strerror(rc));
> + release_context(ctx);
> + return NULL;
> + }
> + return ctx;
> +}
> +
> +int check_runner(struct runner_context *ctx, void *data, unsigned int size)
> +{
> + int st = uatomic_read(&ctx->status);
> +
> + switch (st) {
> + case RUNNER_DONE:
> + if (data)
> + /* hand back the data to the caller */
> + memcpy(data, ctx->data, size);
> + /* caller is done with this context */
> + release_context(ctx);
> + /* fallthrough */
> + case RUNNER_CANCELLED:
> + return st;
> + case RUNNER_IDLE:
> + case RUNNER_RUNNING:
> + if (ctx->deadline.tv_sec != 0 || ctx->deadline.tv_nsec != 0) {
> + struct timespec now;
> +
> + get_monotonic_time(&now);
> + if (timespeccmp(&ctx->deadline, &now) <= 0) {
> + cancel_runner(ctx);
> + return RUNNER_CANCELLED;
> + }
> + }
> + return RUNNER_RUNNING;
> + default:
> + condlog(1, "%s: runner in impossible state '%s'", __func__,
> + runner_state_name(st));
> + assert(false);
> + return st;
> + }
> +}
> diff --git a/libmpathutil/runner.h b/libmpathutil/runner.h
> new file mode 100644
> index 0000000..0c9cf80
> --- /dev/null
> +++ b/libmpathutil/runner.h
> @@ -0,0 +1,84 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +// Copyright (c) 2026 SUSE LLC
> +#ifndef RUNNER_H_INCLUDED
> +#define RUNNER_H_INCLUDED
> +
> +enum runner_status {
> + RUNNER_IDLE,
> + RUNNER_RUNNING,
> + RUNNER_DONE,
> + RUNNER_CANCELLED,
> +};
> +
> +typedef void (*runner_func)(void *data);
> +struct runner_context;
> +
> +/**
> + * runner_state_name(): helper for printing runner states
> + *
> + * @param state: a valid @runner_status value
> + * @returns: a string describing the status
> + */
> +const char *runner_state_name(int state);
> +
> +/**
> + * get_runner(): start a runner thread
> + *
> + * This function starts a runner thread that calls @func(@data).
> + * The thread is created as detached thread.
> + * @data will be copied to thread-private memory, which will be freed when
> + * the runner terminates.
> + * Output values can be retrieved with @check_runner().
> + *
> + * @param func: the worker function to invoke
> + * This parameter must not be NULL.
> + * @param data: pointer the data to pass to the function (input and output)
> + * This parameter must not be NULL.
> + * @param size: the size (in bytes) of the data passed to the function
> + * This parameter must be positive.
> + * @param timeout_sec: timeout (in seconds) after which to cancel the runner.
> + * If it is 0, the runner will not time out.
> + * @returns: a runner context that must be passed to the functions below.
> + */
> +struct runner_context *get_runner(runner_func func, void *data,
> + unsigned int size, unsigned long timeout_usec);
> +
> +/**
> + * cancel_runner(): cancel a runner thread.
> + *
> + * This function should be called to terminate runners on program exit,
> + * or to terminate runners that have no timeout set in @get_runner,
> + * and haven't completed.
> + * Upon return of this function, @ctx becomes stale and shouldn't accessed
> + * any more.
> + *
> + * @param ctx: the context of the runner to be cancelled.
> + */
> +void cancel_runner(struct runner_context *ctx);
> +
> +/**
> + * check_runner(): query the state of a runner thread and obtain results
> + *
> + * Check the state of a runner previously started with @get_runner. If the
> + * thread function has completed, @RUNNER_DONE will be returned, and the
> + * user data will be copied into the @data argument. If @check_runner returns
> + * anything else, @data contains no valid data. The @size argument
> + * will typically be the same as the @size passed to @get_runner (meaning
> + * that @data represents an object of the same type as the @data argument
> + * passed to @get_runner previously).
> + * If @check_runner returns @RUNNER_CANCELLED or @RUNNER_DONE, the @ctx object
> + * has become stale and must not be used any more.
> + * @RUNNER_CANCELLED is returned if the timeout set in @get_runner has passed
> + * and the worker function hasn't returned yet.
> + *
> + * @param ctx: the context of the runner to be queried
> + * @param data: memory pointer that will receive results of the worker
> + * function. Can be NULL, in which case no data will be copied.
> + * @param size: size of the memory pointed to by data. It must be no bigger
> + * than the size of the memory passed to @get_runner for this runner.
> + * It can be smaller, but no more than @size bytes will be copied.
> + * @returns: @RUNNER_RUNNING, @RUNNER_DONE, or @RUNNER_CANCELLED.
> + */
> +int check_runner(struct runner_context *ctx, void *data, unsigned int size);
> +
> +#endif /* RUNNER_H_INCLUDED */
> --
> 2.53.0
next prev parent reply other threads:[~2026-03-24 5:44 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-19 22:13 [PATCH 0/4] multipath-tools: generic async threads for TUR checker Martin Wilck
2026-03-19 22:13 ` [PATCH 1/4] multipathd: get_new_state: map PATH_TIMEOUT to PATH_DOWN Martin Wilck
2026-03-24 5:35 ` Benjamin Marzinski
2026-03-19 22:13 ` [PATCH 2/4] libmpathutil: add generic implementation for checker thread runners Martin Wilck
2026-03-24 5:44 ` Benjamin Marzinski [this message]
2026-03-19 22:13 ` [PATCH 3/4] multipath-tools tests: add test program for " Martin Wilck
2026-03-24 5:47 ` Benjamin Marzinski
2026-03-19 22:13 ` [PATCH 4/4] libmultipath: TUR checker: use runner threads Martin Wilck
2026-03-24 6:38 ` Benjamin Marzinski
2026-03-24 12:24 ` Martin Wilck
2026-03-24 14:46 ` Benjamin Marzinski
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=acIkpYcxYE3-VaCf@redhat.com \
--to=bmarzins@redhat.com \
--cc=brian@purestorage.com \
--cc=christophe.varoqui@opensvc.com \
--cc=dm-devel@lists.linux.dev \
--cc=martin.wilck@suse.com \
--cc=mwilck@suse.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox