linux-toolchains.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Josh Poimboeuf <jpoimboe@kernel.org>
To: x86@kernel.org
Cc: Peter Zijlstra <peterz@infradead.org>,
	Steven Rostedt <rostedt@goodmis.org>,
	Ingo Molnar <mingo@kernel.org>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	linux-kernel@vger.kernel.org,
	Indu Bhagat <indu.bhagat@oracle.com>,
	Mark Rutland <mark.rutland@arm.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Jiri Olsa <jolsa@kernel.org>, Namhyung Kim <namhyung@kernel.org>,
	Ian Rogers <irogers@google.com>,
	Adrian Hunter <adrian.hunter@intel.com>,
	linux-perf-users@vger.kernel.org, Mark Brown <broonie@kernel.org>,
	linux-toolchains@vger.kernel.org, Jordan Rome <jordalgo@meta.com>,
	Sam James <sam@gentoo.org>,
	linux-trace-kernel@vger.kerne.org,
	Andrii Nakryiko <andrii.nakryiko@gmail.com>,
	Jens Remus <jremus@linux.ibm.com>,
	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
	Florian Weimer <fweimer@redhat.com>,
	Andy Lutomirski <luto@kernel.org>
Subject: [PATCH v3 11/19] unwind: Add deferred user space unwinding API
Date: Mon, 28 Oct 2024 14:47:38 -0700	[thread overview]
Message-ID: <a94eb70a80c4a13dedb2655b7848304a992cb1b0.1730150953.git.jpoimboe@kernel.org> (raw)
In-Reply-To: <cover.1730150953.git.jpoimboe@kernel.org>

Add unwind_user_deferred() which allows callers to schedule task work to
unwind the user space stack before returning to user space.  This solves
several problems for its callers:

  - Ensure the unwind happens in task context even if the caller may
    running in interrupt context.

  - Only do the unwind once, even if called multiple times either by the
    same caller or multiple callers.

  - Create a "context context" cookie which allows trace post-processing
    to correlate kernel unwinds/traces with the user unwind.

Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
---
 include/linux/entry-common.h |   3 +
 include/linux/sched.h        |   5 +
 include/linux/unwind_user.h  |  56 ++++++++++
 kernel/fork.c                |   4 +
 kernel/unwind/user.c         | 199 +++++++++++++++++++++++++++++++++++
 5 files changed, 267 insertions(+)

diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index 1e50cdb83ae5..efbe8f964f31 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -12,6 +12,7 @@
 #include <linux/resume_user_mode.h>
 #include <linux/tick.h>
 #include <linux/kmsan.h>
+#include <linux/unwind_user.h>
 
 #include <asm/entry-common.h>
 
@@ -111,6 +112,8 @@ static __always_inline void enter_from_user_mode(struct pt_regs *regs)
 	CT_WARN_ON(__ct_state() != CT_STATE_USER);
 	user_exit_irqoff();
 
+	unwind_enter_from_user_mode();
+
 	instrumentation_begin();
 	kmsan_unpoison_entry_regs(regs);
 	trace_hardirqs_off_finish();
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5007a8e2d640..31b6f1d763ef 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -47,6 +47,7 @@
 #include <linux/livepatch_sched.h>
 #include <linux/uidgid_types.h>
 #include <asm/kmap_size.h>
+#include <linux/unwind_user.h>
 
 /* task_struct member predeclarations (sorted alphabetically): */
 struct audit_context;
@@ -1592,6 +1593,10 @@ struct task_struct {
 	struct user_event_mm		*user_event_mm;
 #endif
 
+#ifdef CONFIG_UNWIND_USER
+	struct unwind_task_info		unwind_task_info;
+#endif
+
 	/*
 	 * New fields for task_struct should be added above here, so that
 	 * they are included in the randomized portion of task_struct.
diff --git a/include/linux/unwind_user.h b/include/linux/unwind_user.h
index cde0fde4923e..98e236c843b1 100644
--- a/include/linux/unwind_user.h
+++ b/include/linux/unwind_user.h
@@ -3,6 +3,9 @@
 #define _LINUX_UNWIND_USER_H
 
 #include <linux/types.h>
+#include <linux/percpu-defs.h>
+
+#define UNWIND_MAX_CALLBACKS 4
 
 enum unwind_user_type {
 	UNWIND_USER_TYPE_NONE,
@@ -30,6 +33,26 @@ struct unwind_user_state {
 	bool done;
 };
 
+struct unwind_task_info {
+	u64			ctx_cookie;
+	u32			pending_callbacks;
+	u64			last_cookies[UNWIND_MAX_CALLBACKS];
+	void			*privs[UNWIND_MAX_CALLBACKS];
+	unsigned long		*entries;
+	struct callback_head	work;
+};
+
+typedef void (*unwind_callback_t)(struct unwind_stacktrace *trace,
+				  u64 ctx_cookie, void *data);
+
+struct unwind_callback {
+	unwind_callback_t		func;
+	int				idx;
+};
+
+
+#ifdef CONFIG_UNWIND_USER
+
 /* Synchronous interfaces: */
 
 int unwind_user_start(struct unwind_user_state *state);
@@ -40,4 +63,37 @@ int unwind_user(struct unwind_stacktrace *trace, unsigned int max_entries);
 #define for_each_user_frame(state) \
 	for (unwind_user_start((state)); !(state)->done; unwind_user_next((state)))
 
+
+/* Asynchronous interfaces: */
+
+void unwind_task_init(struct task_struct *task);
+void unwind_task_free(struct task_struct *task);
+
+int unwind_user_register(struct unwind_callback *callback, unwind_callback_t func);
+int unwind_user_unregister(struct unwind_callback *callback);
+
+int unwind_user_deferred(struct unwind_callback *callback, u64 *ctx_cookie, void *data);
+
+DECLARE_PER_CPU(u64, unwind_ctx_ctr);
+
+static __always_inline void unwind_enter_from_user_mode(void)
+{
+	__this_cpu_inc(unwind_ctx_ctr);
+}
+
+
+#else /* !CONFIG_UNWIND_USER */
+
+static inline void unwind_task_init(struct task_struct *task) {}
+static inline void unwind_task_free(struct task_struct *task) {}
+
+static inline int unwind_user_register(struct unwind_callback *callback, unwind_callback_t func) { return -ENOSYS; }
+static inline int unwind_user_unregister(struct unwind_callback *callback) { return -ENOSYS; }
+
+static inline int unwind_user_deferred(struct unwind_callback *callback, u64 *ctx_cookie, void *data) { return -ENOSYS; }
+
+static inline void unwind_enter_from_user_mode(void) {}
+
+#endif /* !CONFIG_UNWIND_USER */
+
 #endif /* _LINUX_UNWIND_USER_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index 60f14fbab956..d7580067853d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -105,6 +105,7 @@
 #include <linux/rseq.h>
 #include <uapi/linux/pidfd.h>
 #include <linux/pidfs.h>
+#include <linux/unwind_user.h>
 #include <linux/sframe.h>
 
 #include <asm/pgalloc.h>
@@ -972,6 +973,7 @@ void __put_task_struct(struct task_struct *tsk)
 	WARN_ON(refcount_read(&tsk->usage));
 	WARN_ON(tsk == current);
 
+	unwind_task_free(tsk);
 	sched_ext_free(tsk);
 	io_uring_free(tsk);
 	cgroup_free(tsk);
@@ -2348,6 +2350,8 @@ __latent_entropy struct task_struct *copy_process(
 	p->bpf_ctx = NULL;
 #endif
 
+	unwind_task_init(p);
+
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	retval = sched_fork(clone_flags, p);
 	if (retval)
diff --git a/kernel/unwind/user.c b/kernel/unwind/user.c
index 8e47c80e3e54..ed7759c56551 100644
--- a/kernel/unwind/user.c
+++ b/kernel/unwind/user.c
@@ -10,6 +10,11 @@
 #include <linux/unwind_user.h>
 #include <linux/sframe.h>
 #include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <linux/task_work.h>
+#include <linux/mm.h>
+
+#define UNWIND_MAX_ENTRIES 512
 
 #ifdef CONFIG_HAVE_UNWIND_USER_FP
 #include <asm/unwind_user.h>
@@ -20,6 +25,12 @@ static struct unwind_user_frame fp_frame = {
 static struct unwind_user_frame fp_frame;
 #endif
 
+static struct unwind_callback *callbacks[UNWIND_MAX_CALLBACKS];
+static DECLARE_RWSEM(callbacks_rwsem);
+
+/* Counter for entries from user space */
+DEFINE_PER_CPU(u64, unwind_ctx_ctr);
+
 int unwind_user_next(struct unwind_user_state *state)
 {
 	struct unwind_user_frame _frame;
@@ -117,3 +128,191 @@ int unwind_user(struct unwind_stacktrace *trace, unsigned int max_entries)
 
 	return 0;
 }
+
+/*
+ * The "context cookie" is a unique identifier which allows post-processing to
+ * correlate kernel trace(s) with user unwinds.  It has the CPU id the highest
+ * 16 bits and a per-CPU entry counter in the lower 48 bits.
+ */
+static u64 ctx_to_cookie(u64 cpu, u64 ctx)
+{
+	BUILD_BUG_ON(NR_CPUS > 65535);
+	return (ctx & ((1UL << 48) - 1)) | cpu;
+}
+
+/*
+ * Schedule a user space unwind to be done in task work before exiting the
+ * kernel.
+ *
+ * The @callback must have previously been registered with
+ * unwind_user_register().
+ *
+ * The @cookie output is a unique identifer which will also be passed to the
+ * callback function.  It can be used to stitch kernel and user traces together
+ * in post-processing.
+ *
+ * If there are multiple calls to this function for a given @callback, the
+ * cookie will usually be the same and the callback will only be called once.
+ *
+ * The only exception is when the task has migrated to another CPU, *and* this
+ * is called while the task work is running (or has already run).  Then a new
+ * cookie will be generated and the callback will be called again for the new
+ * cookie.
+ */
+int unwind_user_deferred(struct unwind_callback *callback, u64 *ctx_cookie, void *data)
+{
+	struct unwind_task_info *info = &current->unwind_task_info;
+	u64 cookie = info->ctx_cookie;
+	int idx = callback->idx;
+
+	if (WARN_ON_ONCE(in_nmi()))
+		return -EINVAL;
+
+	if (WARN_ON_ONCE(!callback->func || idx < 0))
+		return -EINVAL;
+
+	if (!current->mm)
+		return -EINVAL;
+
+	guard(irqsave)();
+
+	if (cookie && (info->pending_callbacks & (1 << idx)))
+		goto done;
+
+	/*
+	 * If this is the first call from *any* caller since the most recent
+	 * entry from user space, initialize the task context cookie and
+	 * schedule the task work.
+	 */
+	if (!cookie) {
+		u64 ctx_ctr = __this_cpu_read(unwind_ctx_ctr);
+		u64 cpu = raw_smp_processor_id();
+
+		cookie = ctx_to_cookie(cpu, ctx_ctr);
+
+		/*
+		 * If called after task work has sent an unwind to the callback
+		 * function but before the exit to user space, skip it as the
+		 * previous call to the callback function should suffice.
+		 *
+		 * The only exception is if this task has migrated to another
+		 * CPU since the first call to unwind_user_deferred().  The
+		 * per-CPU context counter will have changed which will result
+		 * in a new cookie and another unwind (see comment above
+		 * function).
+		 */
+		if (cookie == info->last_cookies[idx])
+			goto done;
+
+		info->ctx_cookie = cookie;
+		task_work_add(current, &info->work, TWA_RESUME);
+	}
+
+	info->pending_callbacks |= (1 << idx);
+	info->privs[idx] = data;
+	info->last_cookies[idx] = cookie;
+
+done:
+	if (ctx_cookie)
+		*ctx_cookie = cookie;
+	return 0;
+}
+
+static void unwind_user_task_work(struct callback_head *head)
+{
+	struct unwind_task_info *info = container_of(head, struct unwind_task_info, work);
+	struct task_struct *task = container_of(info, struct task_struct, unwind_task_info);
+	void *privs[UNWIND_MAX_CALLBACKS];
+	struct unwind_stacktrace trace;
+	unsigned long pending;
+	u64 cookie = 0;
+	int i;
+
+	BUILD_BUG_ON(UNWIND_MAX_CALLBACKS > 32);
+
+	if (WARN_ON_ONCE(task != current))
+		return;
+
+	if (WARN_ON_ONCE(!info->ctx_cookie || !info->pending_callbacks))
+		return;
+
+	scoped_guard(irqsave) {
+		pending = info->pending_callbacks;
+		cookie = info->ctx_cookie;
+
+		info->pending_callbacks = 0;
+		info->ctx_cookie = 0;
+		memcpy(privs, info->privs, sizeof(void *) * UNWIND_MAX_CALLBACKS);
+	}
+
+	if (!info->entries) {
+		info->entries = kmalloc(UNWIND_MAX_ENTRIES * sizeof(long),
+					GFP_KERNEL);
+		if (!info->entries)
+			return;
+	}
+
+	trace.entries = info->entries;
+	trace.nr = 0;
+	unwind_user(&trace, UNWIND_MAX_ENTRIES);
+
+	guard(rwsem_read)(&callbacks_rwsem);
+
+	for_each_set_bit(i, &pending, UNWIND_MAX_CALLBACKS) {
+		if (callbacks[i])
+			callbacks[i]->func(&trace, cookie, privs[i]);
+	}
+}
+
+int unwind_user_register(struct unwind_callback *callback, unwind_callback_t func)
+{
+	scoped_guard(rwsem_write, &callbacks_rwsem) {
+		for (int i = 0; i < UNWIND_MAX_CALLBACKS; i++) {
+			if (!callbacks[i]) {
+				callback->func = func;
+				callback->idx = i;
+				callbacks[i] = callback;
+				return 0;
+			}
+		}
+	}
+
+	callback->func = NULL;
+	callback->idx = -1;
+	return -ENOSPC;
+}
+
+int unwind_user_unregister(struct unwind_callback *callback)
+{
+	if (callback->idx < 0)
+		return -EINVAL;
+
+	scoped_guard(rwsem_write, &callbacks_rwsem)
+		callbacks[callback->idx] = NULL;
+
+	callback->func = NULL;
+	callback->idx = -1;
+
+	return 0;
+}
+
+void unwind_task_init(struct task_struct *task)
+{
+	struct unwind_task_info *info = &task->unwind_task_info;
+
+	info->entries		= NULL;
+	info->pending_callbacks	= 0;
+	info->ctx_cookie	= 0;
+
+	memset(info->last_cookies, 0, sizeof(u64) * UNWIND_MAX_CALLBACKS);
+	memset(info->privs,	   0, sizeof(u64) * UNWIND_MAX_CALLBACKS);
+
+	init_task_work(&info->work, unwind_user_task_work);
+}
+
+void unwind_task_free(struct task_struct *task)
+{
+	struct unwind_task_info *info = &task->unwind_task_info;
+
+	kfree(info->entries);
+}
-- 
2.47.0


WARNING: multiple messages have this Message-ID (diff)
From: Josh Poimboeuf <jpoimboe@kernel.org>
To: x86@kernel.org
Cc: Peter Zijlstra <peterz@infradead.org>,
	Steven Rostedt <rostedt@goodmis.org>,
	Ingo Molnar <mingo@kernel.org>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	linux-kernel@vger.kernel.org,
	Indu Bhagat <indu.bhagat@oracle.com>,
	Mark Rutland <mark.rutland@arm.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Jiri Olsa <jolsa@kernel.org>, Namhyung Kim <namhyung@kernel.org>,
	Ian Rogers <irogers@google.com>,
	Adrian Hunter <adrian.hunter@intel.com>,
	linux-perf-users@vger.kernel.org, Mark Brown <broonie@kernel.org>,
	linux-toolchains@vger.kernel.org, Jordan Rome <jordalgo@meta.com>,
	Sam James <sam@gentoo.org>,
	linux-trace-kernel@vger.kerne.org,
	Andrii Nakryiko <andrii.nakryiko@gmail.com>,
	Jens Remus <jremus@linux.ibm.com>,
	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
	Florian Weimer <fweimer@redhat.com>,
	Andy Lutomirski <luto@kernel.org>
Subject: [PATCH v3 11/19] unwind: Add deferred user space unwinding API
Date: Mon, 28 Oct 2024 14:47:58 -0700	[thread overview]
Message-ID: <a94eb70a80c4a13dedb2655b7848304a992cb1b0.1730150953.git.jpoimboe@kernel.org> (raw)
Message-ID: <20241028214758.rQAal-b4GPIjzZNUOOuPD3CrNNTz4p6Jd8x91m7sE1U@z> (raw)
In-Reply-To: <cover.1730150953.git.jpoimboe@kernel.org>

Add unwind_user_deferred() which allows callers to schedule task work to
unwind the user space stack before returning to user space.  This solves
several problems for its callers:

  - Ensure the unwind happens in task context even if the caller may
    running in interrupt context.

  - Only do the unwind once, even if called multiple times either by the
    same caller or multiple callers.

  - Create a "context context" cookie which allows trace post-processing
    to correlate kernel unwinds/traces with the user unwind.

Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
---
 include/linux/entry-common.h |   3 +
 include/linux/sched.h        |   5 +
 include/linux/unwind_user.h  |  56 ++++++++++
 kernel/fork.c                |   4 +
 kernel/unwind/user.c         | 199 +++++++++++++++++++++++++++++++++++
 5 files changed, 267 insertions(+)

diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index 1e50cdb83ae5..efbe8f964f31 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -12,6 +12,7 @@
 #include <linux/resume_user_mode.h>
 #include <linux/tick.h>
 #include <linux/kmsan.h>
+#include <linux/unwind_user.h>
 
 #include <asm/entry-common.h>
 
@@ -111,6 +112,8 @@ static __always_inline void enter_from_user_mode(struct pt_regs *regs)
 	CT_WARN_ON(__ct_state() != CT_STATE_USER);
 	user_exit_irqoff();
 
+	unwind_enter_from_user_mode();
+
 	instrumentation_begin();
 	kmsan_unpoison_entry_regs(regs);
 	trace_hardirqs_off_finish();
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5007a8e2d640..31b6f1d763ef 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -47,6 +47,7 @@
 #include <linux/livepatch_sched.h>
 #include <linux/uidgid_types.h>
 #include <asm/kmap_size.h>
+#include <linux/unwind_user.h>
 
 /* task_struct member predeclarations (sorted alphabetically): */
 struct audit_context;
@@ -1592,6 +1593,10 @@ struct task_struct {
 	struct user_event_mm		*user_event_mm;
 #endif
 
+#ifdef CONFIG_UNWIND_USER
+	struct unwind_task_info		unwind_task_info;
+#endif
+
 	/*
 	 * New fields for task_struct should be added above here, so that
 	 * they are included in the randomized portion of task_struct.
diff --git a/include/linux/unwind_user.h b/include/linux/unwind_user.h
index cde0fde4923e..98e236c843b1 100644
--- a/include/linux/unwind_user.h
+++ b/include/linux/unwind_user.h
@@ -3,6 +3,9 @@
 #define _LINUX_UNWIND_USER_H
 
 #include <linux/types.h>
+#include <linux/percpu-defs.h>
+
+#define UNWIND_MAX_CALLBACKS 4
 
 enum unwind_user_type {
 	UNWIND_USER_TYPE_NONE,
@@ -30,6 +33,26 @@ struct unwind_user_state {
 	bool done;
 };
 
+struct unwind_task_info {
+	u64			ctx_cookie;
+	u32			pending_callbacks;
+	u64			last_cookies[UNWIND_MAX_CALLBACKS];
+	void			*privs[UNWIND_MAX_CALLBACKS];
+	unsigned long		*entries;
+	struct callback_head	work;
+};
+
+typedef void (*unwind_callback_t)(struct unwind_stacktrace *trace,
+				  u64 ctx_cookie, void *data);
+
+struct unwind_callback {
+	unwind_callback_t		func;
+	int				idx;
+};
+
+
+#ifdef CONFIG_UNWIND_USER
+
 /* Synchronous interfaces: */
 
 int unwind_user_start(struct unwind_user_state *state);
@@ -40,4 +63,37 @@ int unwind_user(struct unwind_stacktrace *trace, unsigned int max_entries);
 #define for_each_user_frame(state) \
 	for (unwind_user_start((state)); !(state)->done; unwind_user_next((state)))
 
+
+/* Asynchronous interfaces: */
+
+void unwind_task_init(struct task_struct *task);
+void unwind_task_free(struct task_struct *task);
+
+int unwind_user_register(struct unwind_callback *callback, unwind_callback_t func);
+int unwind_user_unregister(struct unwind_callback *callback);
+
+int unwind_user_deferred(struct unwind_callback *callback, u64 *ctx_cookie, void *data);
+
+DECLARE_PER_CPU(u64, unwind_ctx_ctr);
+
+static __always_inline void unwind_enter_from_user_mode(void)
+{
+	__this_cpu_inc(unwind_ctx_ctr);
+}
+
+
+#else /* !CONFIG_UNWIND_USER */
+
+static inline void unwind_task_init(struct task_struct *task) {}
+static inline void unwind_task_free(struct task_struct *task) {}
+
+static inline int unwind_user_register(struct unwind_callback *callback, unwind_callback_t func) { return -ENOSYS; }
+static inline int unwind_user_unregister(struct unwind_callback *callback) { return -ENOSYS; }
+
+static inline int unwind_user_deferred(struct unwind_callback *callback, u64 *ctx_cookie, void *data) { return -ENOSYS; }
+
+static inline void unwind_enter_from_user_mode(void) {}
+
+#endif /* !CONFIG_UNWIND_USER */
+
 #endif /* _LINUX_UNWIND_USER_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index 60f14fbab956..d7580067853d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -105,6 +105,7 @@
 #include <linux/rseq.h>
 #include <uapi/linux/pidfd.h>
 #include <linux/pidfs.h>
+#include <linux/unwind_user.h>
 #include <linux/sframe.h>
 
 #include <asm/pgalloc.h>
@@ -972,6 +973,7 @@ void __put_task_struct(struct task_struct *tsk)
 	WARN_ON(refcount_read(&tsk->usage));
 	WARN_ON(tsk == current);
 
+	unwind_task_free(tsk);
 	sched_ext_free(tsk);
 	io_uring_free(tsk);
 	cgroup_free(tsk);
@@ -2348,6 +2350,8 @@ __latent_entropy struct task_struct *copy_process(
 	p->bpf_ctx = NULL;
 #endif
 
+	unwind_task_init(p);
+
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	retval = sched_fork(clone_flags, p);
 	if (retval)
diff --git a/kernel/unwind/user.c b/kernel/unwind/user.c
index 8e47c80e3e54..ed7759c56551 100644
--- a/kernel/unwind/user.c
+++ b/kernel/unwind/user.c
@@ -10,6 +10,11 @@
 #include <linux/unwind_user.h>
 #include <linux/sframe.h>
 #include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <linux/task_work.h>
+#include <linux/mm.h>
+
+#define UNWIND_MAX_ENTRIES 512
 
 #ifdef CONFIG_HAVE_UNWIND_USER_FP
 #include <asm/unwind_user.h>
@@ -20,6 +25,12 @@ static struct unwind_user_frame fp_frame = {
 static struct unwind_user_frame fp_frame;
 #endif
 
+static struct unwind_callback *callbacks[UNWIND_MAX_CALLBACKS];
+static DECLARE_RWSEM(callbacks_rwsem);
+
+/* Counter for entries from user space */
+DEFINE_PER_CPU(u64, unwind_ctx_ctr);
+
 int unwind_user_next(struct unwind_user_state *state)
 {
 	struct unwind_user_frame _frame;
@@ -117,3 +128,191 @@ int unwind_user(struct unwind_stacktrace *trace, unsigned int max_entries)
 
 	return 0;
 }
+
+/*
+ * The "context cookie" is a unique identifier which allows post-processing to
+ * correlate kernel trace(s) with user unwinds.  It has the CPU id the highest
+ * 16 bits and a per-CPU entry counter in the lower 48 bits.
+ */
+static u64 ctx_to_cookie(u64 cpu, u64 ctx)
+{
+	BUILD_BUG_ON(NR_CPUS > 65535);
+	return (ctx & ((1UL << 48) - 1)) | cpu;
+}
+
+/*
+ * Schedule a user space unwind to be done in task work before exiting the
+ * kernel.
+ *
+ * The @callback must have previously been registered with
+ * unwind_user_register().
+ *
+ * The @cookie output is a unique identifer which will also be passed to the
+ * callback function.  It can be used to stitch kernel and user traces together
+ * in post-processing.
+ *
+ * If there are multiple calls to this function for a given @callback, the
+ * cookie will usually be the same and the callback will only be called once.
+ *
+ * The only exception is when the task has migrated to another CPU, *and* this
+ * is called while the task work is running (or has already run).  Then a new
+ * cookie will be generated and the callback will be called again for the new
+ * cookie.
+ */
+int unwind_user_deferred(struct unwind_callback *callback, u64 *ctx_cookie, void *data)
+{
+	struct unwind_task_info *info = &current->unwind_task_info;
+	u64 cookie = info->ctx_cookie;
+	int idx = callback->idx;
+
+	if (WARN_ON_ONCE(in_nmi()))
+		return -EINVAL;
+
+	if (WARN_ON_ONCE(!callback->func || idx < 0))
+		return -EINVAL;
+
+	if (!current->mm)
+		return -EINVAL;
+
+	guard(irqsave)();
+
+	if (cookie && (info->pending_callbacks & (1 << idx)))
+		goto done;
+
+	/*
+	 * If this is the first call from *any* caller since the most recent
+	 * entry from user space, initialize the task context cookie and
+	 * schedule the task work.
+	 */
+	if (!cookie) {
+		u64 ctx_ctr = __this_cpu_read(unwind_ctx_ctr);
+		u64 cpu = raw_smp_processor_id();
+
+		cookie = ctx_to_cookie(cpu, ctx_ctr);
+
+		/*
+		 * If called after task work has sent an unwind to the callback
+		 * function but before the exit to user space, skip it as the
+		 * previous call to the callback function should suffice.
+		 *
+		 * The only exception is if this task has migrated to another
+		 * CPU since the first call to unwind_user_deferred().  The
+		 * per-CPU context counter will have changed which will result
+		 * in a new cookie and another unwind (see comment above
+		 * function).
+		 */
+		if (cookie == info->last_cookies[idx])
+			goto done;
+
+		info->ctx_cookie = cookie;
+		task_work_add(current, &info->work, TWA_RESUME);
+	}
+
+	info->pending_callbacks |= (1 << idx);
+	info->privs[idx] = data;
+	info->last_cookies[idx] = cookie;
+
+done:
+	if (ctx_cookie)
+		*ctx_cookie = cookie;
+	return 0;
+}
+
+static void unwind_user_task_work(struct callback_head *head)
+{
+	struct unwind_task_info *info = container_of(head, struct unwind_task_info, work);
+	struct task_struct *task = container_of(info, struct task_struct, unwind_task_info);
+	void *privs[UNWIND_MAX_CALLBACKS];
+	struct unwind_stacktrace trace;
+	unsigned long pending;
+	u64 cookie = 0;
+	int i;
+
+	BUILD_BUG_ON(UNWIND_MAX_CALLBACKS > 32);
+
+	if (WARN_ON_ONCE(task != current))
+		return;
+
+	if (WARN_ON_ONCE(!info->ctx_cookie || !info->pending_callbacks))
+		return;
+
+	scoped_guard(irqsave) {
+		pending = info->pending_callbacks;
+		cookie = info->ctx_cookie;
+
+		info->pending_callbacks = 0;
+		info->ctx_cookie = 0;
+		memcpy(privs, info->privs, sizeof(void *) * UNWIND_MAX_CALLBACKS);
+	}
+
+	if (!info->entries) {
+		info->entries = kmalloc(UNWIND_MAX_ENTRIES * sizeof(long),
+					GFP_KERNEL);
+		if (!info->entries)
+			return;
+	}
+
+	trace.entries = info->entries;
+	trace.nr = 0;
+	unwind_user(&trace, UNWIND_MAX_ENTRIES);
+
+	guard(rwsem_read)(&callbacks_rwsem);
+
+	for_each_set_bit(i, &pending, UNWIND_MAX_CALLBACKS) {
+		if (callbacks[i])
+			callbacks[i]->func(&trace, cookie, privs[i]);
+	}
+}
+
+int unwind_user_register(struct unwind_callback *callback, unwind_callback_t func)
+{
+	scoped_guard(rwsem_write, &callbacks_rwsem) {
+		for (int i = 0; i < UNWIND_MAX_CALLBACKS; i++) {
+			if (!callbacks[i]) {
+				callback->func = func;
+				callback->idx = i;
+				callbacks[i] = callback;
+				return 0;
+			}
+		}
+	}
+
+	callback->func = NULL;
+	callback->idx = -1;
+	return -ENOSPC;
+}
+
+int unwind_user_unregister(struct unwind_callback *callback)
+{
+	if (callback->idx < 0)
+		return -EINVAL;
+
+	scoped_guard(rwsem_write, &callbacks_rwsem)
+		callbacks[callback->idx] = NULL;
+
+	callback->func = NULL;
+	callback->idx = -1;
+
+	return 0;
+}
+
+void unwind_task_init(struct task_struct *task)
+{
+	struct unwind_task_info *info = &task->unwind_task_info;
+
+	info->entries		= NULL;
+	info->pending_callbacks	= 0;
+	info->ctx_cookie	= 0;
+
+	memset(info->last_cookies, 0, sizeof(u64) * UNWIND_MAX_CALLBACKS);
+	memset(info->privs,	   0, sizeof(u64) * UNWIND_MAX_CALLBACKS);
+
+	init_task_work(&info->work, unwind_user_task_work);
+}
+
+void unwind_task_free(struct task_struct *task)
+{
+	struct unwind_task_info *info = &task->unwind_task_info;
+
+	kfree(info->entries);
+}
-- 
2.47.0


  parent reply	other threads:[~2024-10-28 21:48 UTC|newest]

Thread overview: 119+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-28 21:47 [PATCH v3 00/19] unwind, perf: sframe user space unwinding Josh Poimboeuf
2024-10-28 21:47 ` [PATCH v3 01/19] x86/vdso: Fix DWARF generation for getrandom() Josh Poimboeuf
2024-10-28 21:47   ` Josh Poimboeuf
2024-10-28 21:47 ` [PATCH v3 02/19] x86/asm: Avoid emitting DWARF CFI for non-VDSO Josh Poimboeuf
2024-10-28 21:47   ` Josh Poimboeuf
2024-10-30 17:19   ` Jens Remus
2024-10-30 17:51     ` Josh Poimboeuf
2024-10-28 21:47 ` [PATCH v3 03/19] x86/asm: Fix VDSO DWARF generation with kernel IBT enabled Josh Poimboeuf
2024-10-28 21:47   ` Josh Poimboeuf
2024-10-28 21:47 ` [PATCH v3 04/19] x86/vdso: Use SYM_FUNC_{START,END} in __kernel_vsyscall() Josh Poimboeuf
2024-10-28 21:47   ` Josh Poimboeuf
2024-10-28 21:47 ` [PATCH v3 05/19] x86/vdso: Use CFI macros in __vdso_sgx_enter_enclave() Josh Poimboeuf
2024-10-28 21:47   ` Josh Poimboeuf
2024-10-28 21:47 ` [PATCH v3 06/19] x86/vdso: Enable sframe generation in VDSO Josh Poimboeuf
2024-10-28 21:47   ` Josh Poimboeuf
2024-10-30 18:20   ` Jens Remus
2024-10-30 19:17     ` Josh Poimboeuf
2024-10-28 21:47 ` [PATCH v3 07/19] unwind: Add user space unwinding API Josh Poimboeuf
2024-10-28 21:47   ` Josh Poimboeuf
2024-12-06 10:29   ` Jens Remus
2024-12-09 20:54     ` Josh Poimboeuf
2024-12-11 14:53       ` Jens Remus
2024-12-11 17:48         ` Josh Poimboeuf
2024-10-28 21:47 ` [PATCH v3 08/19] unwind/x86: Enable CONFIG_HAVE_UNWIND_USER_FP Josh Poimboeuf
2024-10-28 21:47   ` Josh Poimboeuf
2024-10-29 13:13   ` Peter Zijlstra
2024-10-29 16:31     ` Josh Poimboeuf
2024-10-29 18:08       ` Peter Zijlstra
2024-10-28 21:47 ` [PATCH v3 09/19] unwind: Introduce sframe user space unwinding Josh Poimboeuf
2024-10-28 21:47   ` Josh Poimboeuf
2024-10-29 13:27   ` Peter Zijlstra
2024-10-29 16:50     ` Josh Poimboeuf
2024-10-29 18:10       ` Peter Zijlstra
2024-10-29 23:32   ` Andrii Nakryiko
2024-10-30  5:53     ` Josh Poimboeuf
2024-10-31 20:57       ` Andrii Nakryiko
2024-10-31 21:00         ` Nick Desaulniers
2024-10-31 21:38         ` Indu Bhagat
2024-11-01 18:38           ` Andrii Nakryiko
2024-11-01 18:47             ` Steven Rostedt
2024-11-01 18:54               ` Andrii Nakryiko
2024-11-03  0:07             ` Indu Bhagat
2024-10-31 23:03         ` Josh Poimboeuf
2024-11-01 18:34           ` Andrii Nakryiko
2024-11-01 19:29             ` Josh Poimboeuf
2024-11-01 19:44               ` Andrii Nakryiko
2024-11-01 19:46                 ` Andrii Nakryiko
2024-11-01 19:51                   ` Josh Poimboeuf
2024-11-01 19:09           ` Segher Boessenkool
2024-11-01 19:33             ` Josh Poimboeuf
2024-11-01 19:35               ` Josh Poimboeuf
2024-11-01 19:48                 ` Josh Poimboeuf
2024-11-01 21:35                   ` Segher Boessenkool
2024-11-05 17:40   ` Steven Rostedt
2024-11-05 17:45     ` Steven Rostedt
2024-11-06 17:04   ` Jens Remus
2024-11-07  8:25   ` Weinan Liu
2024-11-07 16:59   ` Jens Remus
2024-11-13 20:50     ` Steven Rostedt
2024-11-13 21:15       ` Josh Poimboeuf
2024-11-13 22:13         ` Steven Rostedt
2024-11-13 22:21           ` Steven Rostedt
2024-11-13 22:25             ` Steven Rostedt
2024-11-14  9:57           ` Jens Remus
2024-11-13 15:56   ` Jens Remus
2024-11-13 20:50     ` Steven Rostedt
2024-11-13 21:13       ` Josh Poimboeuf
2024-10-28 21:47 ` [PATCH v3 10/19] unwind/x86: Enable CONFIG_HAVE_UNWIND_USER_SFRAME Josh Poimboeuf
2024-10-28 21:47   ` Josh Poimboeuf
2024-10-29 13:14   ` Peter Zijlstra
2024-10-28 21:47 ` Josh Poimboeuf [this message]
2024-10-28 21:47   ` [PATCH v3 11/19] unwind: Add deferred user space unwinding API Josh Poimboeuf
2024-10-29 13:48   ` Peter Zijlstra
2024-10-29 16:51     ` Josh Poimboeuf
2024-10-29 13:49   ` Peter Zijlstra
2024-10-29 17:05     ` Josh Poimboeuf
2024-10-29 18:11       ` Peter Zijlstra
2024-10-29 13:56   ` Peter Zijlstra
2024-10-29 17:17     ` Josh Poimboeuf
2024-10-29 17:47       ` Mathieu Desnoyers
2024-10-29 18:20         ` Peter Zijlstra
2024-10-30  6:17           ` Steven Rostedt
2024-10-30 14:03             ` Peter Zijlstra
2024-10-30 19:58               ` Steven Rostedt
2024-10-30 20:48                 ` Josh Poimboeuf
2024-10-29 18:34         ` Josh Poimboeuf
2024-10-30 13:44           ` Mathieu Desnoyers
2024-10-30 17:47             ` Josh Poimboeuf
2024-10-30 17:55               ` Josh Poimboeuf
2024-10-30 18:25               ` Josh Poimboeuf
2024-10-29 23:32   ` Andrii Nakryiko
2024-10-30  6:10     ` Josh Poimboeuf
2024-10-31 21:22       ` Andrii Nakryiko
2024-10-31 23:13         ` Josh Poimboeuf
2024-10-31 23:28           ` Andrii Nakryiko
2024-11-01 17:41             ` Josh Poimboeuf
2024-11-01 18:05               ` Andrii Nakryiko
2024-10-28 21:47 ` [PATCH v3 12/19] perf: Remove get_perf_callchain() 'init_nr' argument Josh Poimboeuf
2024-10-28 21:47   ` Josh Poimboeuf
2024-10-28 21:47 ` [PATCH v3 13/19] perf: Remove get_perf_callchain() 'crosstask' argument Josh Poimboeuf
2024-10-28 21:48   ` Josh Poimboeuf
2024-10-28 21:47 ` [PATCH v3 14/19] perf: Simplify get_perf_callchain() user logic Josh Poimboeuf
2024-10-28 21:48   ` Josh Poimboeuf
2024-10-28 21:47 ` [PATCH v3 15/19] perf: Add deferred user callchains Josh Poimboeuf
2024-10-28 21:48   ` Josh Poimboeuf
2024-10-29 14:06   ` Peter Zijlstra
2024-11-06  9:45   ` Jens Remus
2024-10-28 21:47 ` [PATCH v3 16/19] perf tools: Minimal CALLCHAIN_DEFERRED support Josh Poimboeuf
2024-10-28 21:48   ` Josh Poimboeuf
2024-10-28 21:47 ` [PATCH v3 17/19] perf record: Enable defer_callchain for user callchains Josh Poimboeuf
2024-10-28 21:48   ` Josh Poimboeuf
2024-10-28 21:47 ` [PATCH v3 18/19] perf script: Display PERF_RECORD_CALLCHAIN_DEFERRED Josh Poimboeuf
2024-10-28 21:48   ` Josh Poimboeuf
2024-10-28 21:47 ` [PATCH v3 19/19] perf tools: Merge deferred user callchains Josh Poimboeuf
2024-10-28 21:48   ` Josh Poimboeuf
2024-10-28 21:47 ` [PATCH v3 00/19] unwind, perf: sframe user space unwinding Josh Poimboeuf
2024-10-28 21:54 ` Josh Poimboeuf
2024-10-28 23:55 ` Josh Poimboeuf
2024-10-29 14:08 ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a94eb70a80c4a13dedb2655b7848304a992cb1b0.1730150953.git.jpoimboe@kernel.org \
    --to=jpoimboe@kernel.org \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=andrii.nakryiko@gmail.com \
    --cc=broonie@kernel.org \
    --cc=fweimer@redhat.com \
    --cc=indu.bhagat@oracle.com \
    --cc=irogers@google.com \
    --cc=jolsa@kernel.org \
    --cc=jordalgo@meta.com \
    --cc=jremus@linux.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=linux-toolchains@vger.kernel.org \
    --cc=linux-trace-kernel@vger.kerne.org \
    --cc=luto@kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=mingo@kernel.org \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=sam@gentoo.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).