Linux DTrace development list
 help / color / mirror / Atom feed
* [PATCH] rawfbt: new provider
@ 2024-12-05 19:42 Kris Van Hees
  2024-12-05 23:23 ` Eugene Loh
  2024-12-06 10:35 ` Alan Maguire
  0 siblings, 2 replies; 7+ messages in thread
From: Kris Van Hees @ 2024-12-05 19:42 UTC (permalink / raw)
  To: dtrace, dtrace-devel

This provider provides access to all kprobe-based probes that are
available on the system.  This includes any compiler-generated
optimized variants of functions, named <func>.<suffix>.

Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com>
---
 libdtrace/Build            |   2 +
 libdtrace/dt_prov_rawfbt.c | 330 +++++++++++++++++++++++++++++++++++++
 libdtrace/dt_provider.c    |   1 +
 libdtrace/dt_provider.h    |   1 +
 test/utils/clean_probes.sh |   2 +-
 5 files changed, 335 insertions(+), 1 deletion(-)
 create mode 100644 libdtrace/dt_prov_rawfbt.c

diff --git a/libdtrace/Build b/libdtrace/Build
index 8d398221..72235159 100644
--- a/libdtrace/Build
+++ b/libdtrace/Build
@@ -55,6 +55,7 @@ libdtrace-build_SOURCES = dt_aggregate.c \
 			  dt_prov_lockstat.c \
 			  dt_prov_proc.c \
 			  dt_prov_profile.c \
+			  dt_prov_rawfbt.c \
 			  dt_prov_rawtp.c \
 			  dt_prov_sched.c \
 			  dt_prov_sdt.c \
@@ -112,6 +113,7 @@ dt_prov_ip.c_CFLAGS := -Wno-pedantic
 dt_prov_lockstat.c_CFLAGS := -Wno-pedantic
 dt_prov_proc.c_CFLAGS := -Wno-pedantic
 dt_prov_profile.c_CFLAGS := -Wno-pedantic
+dt_prov_rawfbt.c_CFLAGS := -Wno-pedantic
 dt_prov_rawtp.c_CFLAGS := -Wno-pedantic
 dt_prov_sched.c_CFLAGS := -Wno-pedantic
 dt_prov_sdt.c_CFLAGS := -Wno-pedantic
diff --git a/libdtrace/dt_prov_rawfbt.c b/libdtrace/dt_prov_rawfbt.c
new file mode 100644
index 00000000..edfd36b4
--- /dev/null
+++ b/libdtrace/dt_prov_rawfbt.c
@@ -0,0 +1,330 @@
+/*
+ * Oracle Linux DTrace.
+ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+ * Licensed under the Universal Permissive License v 1.0 as shown at
+ * http://oss.oracle.com/licenses/upl.
+ *
+ * The Raw Function Boundary Tracing provider for DTrace.
+ *
+ * The kernel provides kprobes to trace specific symbols.  They are listed in
+ * the TRACEFS/available_filter_functions file.  Kprobes may be associated with
+ * a symbol in the core kernel or with a symbol in a specific kernel module.
+ * Whereas the fbt provider supports tracing regular symbols only, the rawfbt
+ * provider also provides access to synthetic symbols, i.e. symbols created by
+ * compiler optimizations.
+ *
+ * Mapping from event name to DTrace probe name:
+ *
+ *	<name>					rawfbt:vmlinux:<name>:entry
+ *						rawfbt:vmlinux:<name>:return
+ *   or
+ *	<name> [<modname>]			rawfbt:<modname>:<name>:entry
+ *						rawfbt:<modname>:<name>:return
+ */
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <bpf_asm.h>
+
+#include "dt_btf.h"
+#include "dt_dctx.h"
+#include "dt_cg.h"
+#include "dt_module.h"
+#include "dt_provider_tp.h"
+#include "dt_probe.h"
+#include "dt_pt_regs.h"
+
+static const char		prvname[] = "rawfbt";
+static const char		modname[] = "vmlinux";
+
+#define KPROBE_EVENTS		TRACEFS "kprobe_events"
+#define PROBE_LIST		TRACEFS "available_filter_functions"
+
+#define FBT_GROUP_FMT		GROUP_FMT "_%s"
+#define FBT_GROUP_DATA		GROUP_DATA, prp->desc->prb
+
+static const dtrace_pattr_t	pattr = {
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
+};
+
+/*
+ * Scan the PROBE_LIST file and add entry and return probes for every function
+ * that is listed.
+ */
+static int populate(dtrace_hdl_t *dtp)
+{
+	dt_provider_t		*prv;
+	FILE			*f;
+	char			*buf = NULL;
+	size_t			len  = 0;
+	size_t			n = 0;
+	dtrace_syminfo_t	sip;
+	dtrace_probedesc_t	pd;
+
+	prv = dt_provider_create(dtp, prvname, &dt_rawfbt, &pattr, NULL);
+	if (prv == NULL)
+		return -1;			/* errno already set */
+
+	f = fopen(PROBE_LIST, "r");
+	if (f == NULL)
+		return 0;
+
+	while (getline(&buf, &len, f) >= 0) {
+		char		*p, *q;
+		const char	*mod = modname;
+		dt_probe_t	*prp;
+
+		/*
+		 * Here buf is either "funcname\n" or "funcname [modname]\n".
+		 * The last line may not have a linefeed.
+		 */
+		p = strchr(buf, '\n');
+		if (p) {
+			*p = '\0';
+			if (p > buf && *(--p) == ']')
+				*p = '\0';
+		}
+
+		/*
+		 * Now buf is either "funcname" or "funcname [modname".  If
+		 * there is no module name provided, we will use the default.
+		 */
+		p = strchr(buf, ' ');
+		if (p) {
+			*p++ = '\0';
+			if (*p == '[')
+				p++;
+		}
+
+#define strstarts(var, x) (strncmp(var, x, strlen (x)) == 0)
+		/* Weed out __ftrace_invalid_address___* entries. */
+		if (strstarts(buf, "__ftrace_invalid_address__") ||
+		    strstarts(buf, "__probestub_") ||
+		    strstarts(buf, "__traceiter_"))
+			continue;
+#undef strstarts
+
+		/*
+		 * If we did not see a module name, perform a symbol lookup to
+		 * try to determine the module name.
+		 */
+		if (!p) {
+			/*
+			 * For synthetic symbol names (those containing '.'),
+			 * we need to use the base name (before the '.') for
+			 * module name lookup, because the synthetic forms are
+			 * not recorded in kallsyms information.
+			 *
+			 * We replace the first '.' with a 0 to terminate the
+			 * string, and after the lookup, we put it back.
+			 */
+			q = strchr(buf, '.');
+			if (q != NULL)
+				*q = '\0';
+
+			if (dtrace_lookup_by_name(dtp, DTRACE_OBJ_KMODS, buf,
+						  NULL, &sip) == 0)
+				mod = sip.object;
+
+			if (q != NULL)
+				*q = '.';
+		} else
+			mod = p;
+
+		/*
+		 * Due to the lack of module names in
+		 * TRACEFS/available_filter_functions, there are some duplicate
+		 * function names.  The kernel does not let us trace functions
+		 * that have duplicates, so we need to remove the existing one.
+		 */
+		pd.id = DTRACE_IDNONE;
+		pd.prv = prvname;
+		pd.mod = mod;
+		pd.fun = buf;
+		pd.prb = "entry";
+		prp = dt_probe_lookup(dtp, &pd);
+		if (prp != NULL) {
+			dt_probe_destroy(prp);
+			continue;
+		}
+
+		if (dt_tp_probe_insert(dtp, prv, prvname, mod, buf, "entry"))
+			n++;
+		if (dt_tp_probe_insert(dtp, prv, prvname, mod, buf, "return"))
+			n++;
+	}
+
+	free(buf);
+	fclose(f);
+
+	return n;
+}
+
+/*
+ * Generate a BPF trampoline for a FBT probe.
+ *
+ * The trampoline function is called when a FBT probe triggers, and it must
+ * satisfy the following prototype:
+ *
+ *	int dt_fbt(dt_pt_regs *regs)
+ *
+ * The trampoline will populate a dt_dctx_t struct and then call the function
+ * that implements the compiled D clause.  It returns 0 to the caller.
+ */
+static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
+{
+	dt_cg_tramp_prologue(pcb);
+
+	/*
+	 * After the dt_cg_tramp_prologue() call, we have:
+	 *				//     (%r7 = dctx->mst)
+	 *				//     (%r8 = dctx->ctx)
+	 */
+	dt_cg_tramp_copy_regs(pcb);
+	if (strcmp(pcb->pcb_probe->desc->prb, "return") == 0) {
+		dt_irlist_t	*dlp = &pcb->pcb_ir;
+
+		dt_cg_tramp_copy_rval_from_regs(pcb);
+
+		/*
+		 * fbt:::return arg0 should be the function offset for
+		 * return instruction.  Since we use kretprobes, however,
+		 * which do not fire until the function has returned to
+		 * its caller, information about the returning instruction
+		 * in the callee has been lost.
+		 *
+		 * Set arg0=-1 to indicate that we do not know the value.
+		 */
+		dt_cg_xsetx(dlp, NULL, DT_LBL_NONE, BPF_REG_0, -1);
+		emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
+	} else
+		dt_cg_tramp_copy_args_from_regs(pcb, 1);
+	dt_cg_tramp_epilogue(pcb);
+
+	return 0;
+}
+
+static int attach(dtrace_hdl_t *dtp, const dt_probe_t *prp, int bpf_fd)
+{
+	if (!dt_tp_probe_has_info(prp)) {
+		char	*fn, *prb, *p;
+		FILE	*f;
+		size_t	len;
+		int	fd, rc = -1;
+
+		/*
+		 * The tracepoint event we will be creating needs to have a
+		 * valid name.  We use a copy of the probe name, with . -> _
+		 * conversion.
+		 */
+		prb = strdup(prp->desc->fun);
+		for (p = prb; *p; p++) {
+			if (*p == '.')
+				*p = '_';
+		}
+
+		/*
+		 * Register the kprobe with the tracing subsystem.  This will
+		 * create a tracepoint event.
+		 */
+		fd = open(KPROBE_EVENTS, O_WRONLY | O_APPEND);
+		if (fd == -1)
+			return -ENOENT;
+
+		rc = dprintf(fd, "%c:" FBT_GROUP_FMT "/%s %s\n",
+			     prp->desc->prb[0] == 'e' ? 'p' : 'r',
+			     FBT_GROUP_DATA, prb, prp->desc->fun);
+		close(fd);
+		if (rc == -1)
+			return -ENOENT;
+
+		/* create format file name */
+		len = snprintf(NULL, 0, "%s" FBT_GROUP_FMT "/%s/format",
+			       EVENTSFS, FBT_GROUP_DATA, prb) + 1;
+		fn = dt_alloc(dtp, len);
+		if (fn == NULL)
+			return -ENOENT;
+
+		snprintf(fn, len, "%s" FBT_GROUP_FMT "/%s/format", EVENTSFS,
+			 FBT_GROUP_DATA, prb);
+
+		/* open format file */
+		f = fopen(fn, "r");
+		dt_free(dtp, fn);
+		if (f == NULL)
+			return -ENOENT;
+
+		/* read event id from format file */
+		rc = dt_tp_probe_info(dtp, f, 0, prp, NULL, NULL);
+		fclose(f);
+
+		if (rc < 0)
+			return -ENOENT;
+	}
+
+	/* attach BPF program to the probe */
+	return dt_tp_probe_attach(dtp, prp, bpf_fd);
+}
+
+static int probe_info(dtrace_hdl_t *dtp, const dt_probe_t *prp, int *argcp,
+		      dt_argdesc_t **argvp)
+{
+	*argcp = 0;			/* no arguments by default */
+	*argvp = NULL;
+
+	return 0;
+}
+
+/*
+ * Try to clean up system resources that may have been allocated for this
+ * probe.
+ *
+ * If there is an event FD, we close it.
+ *
+ * We also try to remove any kprobe that may have been created for the probe.
+ * This is harmless for probes that didn't get created.  If the removal fails
+ * for some reason we are out of luck - fortunately it is not harmful to the
+ * system as a whole.
+ */
+static void detach(dtrace_hdl_t *dtp, const dt_probe_t *prp)
+{
+	int	fd;
+
+	if (!dt_tp_probe_has_info(prp))
+		return;
+
+	dt_tp_probe_detach(dtp, prp);
+
+	fd = open(KPROBE_EVENTS, O_WRONLY | O_APPEND);
+	if (fd == -1)
+		return;
+
+	dprintf(fd, "-:" FBT_GROUP_FMT "/%s\n", FBT_GROUP_DATA,
+		prp->desc->fun);
+	close(fd);
+}
+
+dt_provimpl_t	dt_rawfbt = {
+	.name		= "rawfbt",
+	.prog_type	= BPF_PROG_TYPE_KPROBE,
+	.populate	= &populate,
+	.load_prog	= &dt_bpf_prog_load,
+	.trampoline	= &trampoline,
+	.attach		= &attach,
+	.probe_info	= &probe_info,
+	.detach		= &detach,
+	.probe_destroy	= &dt_tp_probe_destroy,
+};
diff --git a/libdtrace/dt_provider.c b/libdtrace/dt_provider.c
index 1e2e844e..0c621197 100644
--- a/libdtrace/dt_provider.c
+++ b/libdtrace/dt_provider.c
@@ -36,6 +36,7 @@ const dt_provimpl_t *dt_providers[] = {
 	&dt_lockstat,
 	&dt_proc,
 	&dt_profile,
+	&dt_rawfbt,
 	&dt_rawtp,
 	&dt_sched,
 	&dt_sdt,
diff --git a/libdtrace/dt_provider.h b/libdtrace/dt_provider.h
index f62137de..59a8d62e 100644
--- a/libdtrace/dt_provider.h
+++ b/libdtrace/dt_provider.h
@@ -82,6 +82,7 @@ extern dt_provimpl_t dt_ip;
 extern dt_provimpl_t dt_lockstat;
 extern dt_provimpl_t dt_proc;
 extern dt_provimpl_t dt_profile;
+extern dt_provimpl_t dt_rawfbt;
 extern dt_provimpl_t dt_rawtp;
 extern dt_provimpl_t dt_sched;
 extern dt_provimpl_t dt_sdt;
diff --git a/test/utils/clean_probes.sh b/test/utils/clean_probes.sh
index 8292b309..16052cbc 100755
--- a/test/utils/clean_probes.sh
+++ b/test/utils/clean_probes.sh
@@ -70,7 +70,7 @@ fi
 	     { sub(/:/, "/"); }
 
 	     {
-		 if (/_fbt_/)
+		 if (/_(raw)?fbt_/)
 		     kpv[kpc++] = $0;
 		 else
 		     upv[upc++] = $0;
-- 
2.45.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] rawfbt: new provider
  2024-12-05 19:42 [PATCH] rawfbt: new provider Kris Van Hees
@ 2024-12-05 23:23 ` Eugene Loh
  2024-12-06 10:35 ` Alan Maguire
  1 sibling, 0 replies; 7+ messages in thread
From: Eugene Loh @ 2024-12-05 23:23 UTC (permalink / raw)
  To: Kris Van Hees, dtrace, dtrace-devel

There are a lot of comments below, but presumably they will be easy to 
adapt.  So:
Reviewed-by: Eugene Loh <eugene.loh@oracle.com>

Anyhow...

Add comment in the commit message that this is the old kprobes provider?

Should there be a test?

On 12/5/24 14:42, Kris Van Hees wrote:
> This provider provides access to all kprobe-based probes that are
> available on the system.  This includes any compiler-generated
> optimized variants of functions, named <func>.<suffix>.
>
> Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com>
>
> diff --git a/libdtrace/dt_prov_rawfbt.c b/libdtrace/dt_prov_rawfbt.c
> new file mode 100644
> index 00000000..edfd36b4
> --- /dev/null
> +++ b/libdtrace/dt_prov_rawfbt.c
> @@ -0,0 +1,330 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + *
> + * The Raw Function Boundary Tracing provider for DTrace.
> + *
> + * The kernel provides kprobes to trace specific symbols.  They are listed in
> + * the TRACEFS/available_filter_functions file.  Kprobes may be associated with
> + * a symbol in the core kernel or with a symbol in a specific kernel module.
> + * Whereas the fbt provider supports tracing regular symbols only, the rawfbt
> + * provider also provides access to synthetic symbols, i.e. symbols created by
> + * compiler optimizations.
> + *
> + * Mapping from event name to DTrace probe name:
> + *
> + *	<name>					rawfbt:vmlinux:<name>:entry
> + *						rawfbt:vmlinux:<name>:return
> + *   or
> + *	<name> [<modname>]			rawfbt:<modname>:<name>:entry
> + *						rawfbt:<modname>:<name>:return
> + */
> +#include <assert.h>
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <linux/bpf.h>
> +#include <linux/btf.h>
> +#include <sys/stat.h>
> +#include <sys/types.h>
> +
> +#include <bpf_asm.h>
> +
> +#include "dt_btf.h"
> +#include "dt_dctx.h"
> +#include "dt_cg.h"
> +#include "dt_module.h"
> +#include "dt_provider_tp.h"
> +#include "dt_probe.h"
> +#include "dt_pt_regs.h"
> +
> +static const char		prvname[] = "rawfbt";
> +static const char		modname[] = "vmlinux";
> +
> +#define KPROBE_EVENTS		TRACEFS "kprobe_events"
> +#define PROBE_LIST		TRACEFS "available_filter_functions"
> +
> +#define FBT_GROUP_FMT		GROUP_FMT "_%s"
> +#define FBT_GROUP_DATA		GROUP_DATA, prp->desc->prb
> +
> +static const dtrace_pattr_t	pattr = {
> +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
> +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
> +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
> +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
> +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
> +};
> +
> +/*
> + * Scan the PROBE_LIST file and add entry and return probes for every function
> + * that is listed.
> + */
> +static int populate(dtrace_hdl_t *dtp)
> +{
> +	dt_provider_t		*prv;
> +	FILE			*f;
> +	char			*buf = NULL;
> +	size_t			len  = 0;

Extra space before the =.

Incidentally, nice catch:  n was being used for two distinct purposes:  
counting (for which it was uninitialized) and buflen. But then shouldn't 
this problem be corrected in the file where it originally appeared...  
not to mention the other files with this same problem?  That is the 
pre-existing problem exists in:
     dt_prov_fbt.c
     dt_prov_rawtp.c
     dt_prov_sdt.c
     dt_prov_syscall.c

> +	size_t			n = 0;
> +	dtrace_syminfo_t	sip;
> +	dtrace_probedesc_t	pd;
> +
> +	prv = dt_provider_create(dtp, prvname, &dt_rawfbt, &pattr, NULL);
> +	if (prv == NULL)
> +		return -1;			/* errno already set */
> +
> +	f = fopen(PROBE_LIST, "r");
> +	if (f == NULL)
> +		return 0;
> +
> +	while (getline(&buf, &len, f) >= 0) {
> +		char		*p, *q;

Okay, but q could also be declared in the innermost clause in which it 
is used.

> +		const char	*mod = modname;
> +		dt_probe_t	*prp;
> +
> +		/*
> +		 * Here buf is either "funcname\n" or "funcname [modname]\n".
> +		 * The last line may not have a linefeed.
> +		 */
> +		p = strchr(buf, '\n');
> +		if (p) {
> +			*p = '\0';
> +			if (p > buf && *(--p) == ']')
> +				*p = '\0';
> +		}
> +
> +		/*
> +		 * Now buf is either "funcname" or "funcname [modname".  If
> +		 * there is no module name provided, we will use the default.
> +		 */
> +		p = strchr(buf, ' ');
> +		if (p) {
> +			*p++ = '\0';
> +			if (*p == '[')
> +				p++;
> +		}
> +
> +#define strstarts(var, x) (strncmp(var, x, strlen (x)) == 0)
> +		/* Weed out __ftrace_invalid_address___* entries. */
> +		if (strstarts(buf, "__ftrace_invalid_address__") ||
> +		    strstarts(buf, "__probestub_") ||
> +		    strstarts(buf, "__traceiter_"))
> +			continue;
> +#undef strstarts
> +
> +		/*
> +		 * If we did not see a module name, perform a symbol lookup to
> +		 * try to determine the module name.
> +		 */
> +		if (!p) {
> +			/*
> +			 * For synthetic symbol names (those containing '.'),
> +			 * we need to use the base name (before the '.') for
> +			 * module name lookup, because the synthetic forms are
> +			 * not recorded in kallsyms information.
> +			 *
> +			 * We replace the first '.' with a 0 to terminate the
> +			 * string, and after the lookup, we put it back.
> +			 */
> +			q = strchr(buf, '.');
> +			if (q != NULL)
> +				*q = '\0';
> +
> +			if (dtrace_lookup_by_name(dtp, DTRACE_OBJ_KMODS, buf,
> +						  NULL, &sip) == 0)
> +				mod = sip.object;
> +
> +			if (q != NULL)
> +				*q = '.';
> +		} else
> +			mod = p;
> +
> +		/*
> +		 * Due to the lack of module names in
> +		 * TRACEFS/available_filter_functions, there are some duplicate
> +		 * function names.  The kernel does not let us trace functions
> +		 * that have duplicates, so we need to remove the existing one.
> +		 */

This represents a change in behavior from fbt kprobe.  We used not to 
create the new one, but here we remove the old one.  How about a comment 
on why the change.

> +		pd.id = DTRACE_IDNONE;
> +		pd.prv = prvname;
> +		pd.mod = mod;
> +		pd.fun = buf;
> +		pd.prb = "entry";
> +		prp = dt_probe_lookup(dtp, &pd);
> +		if (prp != NULL) {
> +			dt_probe_destroy(prp);
> +			continue;
> +		}
> +
> +		if (dt_tp_probe_insert(dtp, prv, prvname, mod, buf, "entry"))
> +			n++;
> +		if (dt_tp_probe_insert(dtp, prv, prvname, mod, buf, "return"))
> +			n++;
> +	}
> +
> +	free(buf);
> +	fclose(f);
> +
> +	return n;
> +}
> +
> +/*
> + * Generate a BPF trampoline for a FBT probe.
> + *
> + * The trampoline function is called when a FBT probe triggers, and it must
> + * satisfy the following prototype:
> + *
> + *	int dt_fbt(dt_pt_regs *regs)

Should that be dt_rawfbt?

> + *
> + * The trampoline will populate a dt_dctx_t struct and then call the function
> + * that implements the compiled D clause.  It returns 0 to the caller.
> + */
> +static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> +{
> +	dt_cg_tramp_prologue(pcb);
> +
> +	/*
> +	 * After the dt_cg_tramp_prologue() call, we have:
> +	 *				//     (%r7 = dctx->mst)
> +	 *				//     (%r8 = dctx->ctx)
> +	 */
> +	dt_cg_tramp_copy_regs(pcb);
> +	if (strcmp(pcb->pcb_probe->desc->prb, "return") == 0) {
> +		dt_irlist_t	*dlp = &pcb->pcb_ir;
> +
> +		dt_cg_tramp_copy_rval_from_regs(pcb);
> +
> +		/*
> +		 * fbt:::return arg0 should be the function offset for
> +		 * return instruction.  Since we use kretprobes, however,
> +		 * which do not fire until the function has returned to
> +		 * its caller, information about the returning instruction
> +		 * in the callee has been lost.
> +		 *
> +		 * Set arg0=-1 to indicate that we do not know the value.
> +		 */
> +		dt_cg_xsetx(dlp, NULL, DT_LBL_NONE, BPF_REG_0, -1);
> +		emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
> +	} else
> +		dt_cg_tramp_copy_args_from_regs(pcb, 1);
> +	dt_cg_tramp_epilogue(pcb);
> +
> +	return 0;
> +}
> +
> +static int attach(dtrace_hdl_t *dtp, const dt_probe_t *prp, int bpf_fd)
> +{
> +	if (!dt_tp_probe_has_info(prp)) {
> +		char	*fn, *prb, *p;
> +		FILE	*f;
> +		size_t	len;
> +		int	fd, rc = -1;
> +
> +		/*
> +		 * The tracepoint event we will be creating needs to have a
> +		 * valid name.  We use a copy of the probe name, with . -> _
> +		 * conversion.
> +		 */
> +		prb = strdup(prp->desc->fun);

strdup()... memory leak?

> +		for (p = prb; *p; p++) {
> +			if (*p == '.')
> +				*p = '_';
> +		}
> +
> +		/*
> +		 * Register the kprobe with the tracing subsystem.  This will
> +		 * create a tracepoint event.
> +		 */
> +		fd = open(KPROBE_EVENTS, O_WRONLY | O_APPEND);
> +		if (fd == -1)
> +			return -ENOENT;
> +
> +		rc = dprintf(fd, "%c:" FBT_GROUP_FMT "/%s %s\n",
> +			     prp->desc->prb[0] == 'e' ? 'p' : 'r',
> +			     FBT_GROUP_DATA, prb, prp->desc->fun);
> +		close(fd);
> +		if (rc == -1)
> +			return -ENOENT;
> +
> +		/* create format file name */
> +		len = snprintf(NULL, 0, "%s" FBT_GROUP_FMT "/%s/format",
> +			       EVENTSFS, FBT_GROUP_DATA, prb) + 1;
> +		fn = dt_alloc(dtp, len);

Use asprintf() instead?

> +		if (fn == NULL)
> +			return -ENOENT;
> +
> +		snprintf(fn, len, "%s" FBT_GROUP_FMT "/%s/format", EVENTSFS,
> +			 FBT_GROUP_DATA, prb);
> +
> +		/* open format file */
> +		f = fopen(fn, "r");
> +		dt_free(dtp, fn);
> +		if (f == NULL)
> +			return -ENOENT;
> +
> +		/* read event id from format file */
> +		rc = dt_tp_probe_info(dtp, f, 0, prp, NULL, NULL);
> +		fclose(f);
> +
> +		if (rc < 0)
> +			return -ENOENT;
> +	}
> +
> +	/* attach BPF program to the probe */
> +	return dt_tp_probe_attach(dtp, prp, bpf_fd);
> +}
> +
> +static int probe_info(dtrace_hdl_t *dtp, const dt_probe_t *prp, int *argcp,
> +		      dt_argdesc_t **argvp)
> +{
> +	*argcp = 0;			/* no arguments by default */
> +	*argvp = NULL;
> +
> +	return 0;
> +}

With d44036087 ("probe: probes in providers without probe_info() have no 
args"), such a probe_info() is no longer necessary.

> +
> +/*
> + * Try to clean up system resources that may have been allocated for this
> + * probe.
> + *
> + * If there is an event FD, we close it.
> + *
> + * We also try to remove any kprobe that may have been created for the probe.

kprobe, yes.  But that fixes the "uprobe" typo that was in 
kprobe_detach().  So, how about fixing that typo in dt_prov_fbt.c in 
this patch while we're at it.

> + * This is harmless for probes that didn't get created.  If the removal fails
> + * for some reason we are out of luck - fortunately it is not harmful to the
> + * system as a whole.
> + */
> +static void detach(dtrace_hdl_t *dtp, const dt_probe_t *prp)
> +{
> +	int	fd;
> +
> +	if (!dt_tp_probe_has_info(prp))
> +		return;
> +
> +	dt_tp_probe_detach(dtp, prp);
> +
> +	fd = open(KPROBE_EVENTS, O_WRONLY | O_APPEND);
> +	if (fd == -1)
> +		return;
> +
> +	dprintf(fd, "-:" FBT_GROUP_FMT "/%s\n", FBT_GROUP_DATA,
> +		prp->desc->fun);
> +	close(fd);
> +}
> +
> +dt_provimpl_t	dt_rawfbt = {
> +	.name		= "rawfbt",

Use prvname instead of "rawfbt"?

> +	.prog_type	= BPF_PROG_TYPE_KPROBE,
> +	.populate	= &populate,
> +	.load_prog	= &dt_bpf_prog_load,
> +	.trampoline	= &trampoline,
> +	.attach		= &attach,
> +	.probe_info	= &probe_info,
> +	.detach		= &detach,
> +	.probe_destroy	= &dt_tp_probe_destroy,
> +};
> diff --git a/libdtrace/dt_provider.c b/libdtrace/dt_provider.c
> index 1e2e844e..0c621197 100644
> --- a/libdtrace/dt_provider.c
> +++ b/libdtrace/dt_provider.c
> @@ -36,6 +36,7 @@ const dt_provimpl_t *dt_providers[] = {
>   	&dt_lockstat,
>   	&dt_proc,
>   	&dt_profile,
> +	&dt_rawfbt,
>   	&dt_rawtp,
>   	&dt_sched,
>   	&dt_sdt,
> diff --git a/libdtrace/dt_provider.h b/libdtrace/dt_provider.h
> index f62137de..59a8d62e 100644
> --- a/libdtrace/dt_provider.h
> +++ b/libdtrace/dt_provider.h
> @@ -82,6 +82,7 @@ extern dt_provimpl_t dt_ip;
>   extern dt_provimpl_t dt_lockstat;
>   extern dt_provimpl_t dt_proc;
>   extern dt_provimpl_t dt_profile;
> +extern dt_provimpl_t dt_rawfbt;
>   extern dt_provimpl_t dt_rawtp;
>   extern dt_provimpl_t dt_sched;
>   extern dt_provimpl_t dt_sdt;
> diff --git a/test/utils/clean_probes.sh b/test/utils/clean_probes.sh
> index 8292b309..16052cbc 100755
> --- a/test/utils/clean_probes.sh
> +++ b/test/utils/clean_probes.sh
> @@ -70,7 +70,7 @@ fi
>   	     { sub(/:/, "/"); }
>   
>   	     {
> -		 if (/_fbt_/)
> +		 if (/_(raw)?fbt_/)
>   		     kpv[kpc++] = $0;
>   		 else
>   		     upv[upc++] = $0;

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] rawfbt: new provider
  2024-12-05 19:42 [PATCH] rawfbt: new provider Kris Van Hees
  2024-12-05 23:23 ` Eugene Loh
@ 2024-12-06 10:35 ` Alan Maguire
  2024-12-06 16:33   ` Kris Van Hees
  1 sibling, 1 reply; 7+ messages in thread
From: Alan Maguire @ 2024-12-06 10:35 UTC (permalink / raw)
  To: Kris Van Hees, dtrace, dtrace-devel

On 05/12/2024 19:42, Kris Van Hees wrote:
> This provider provides access to all kprobe-based probes that are
> available on the system.  This includes any compiler-generated
> optimized variants of functions, named <func>.<suffix>.
> 

This is great! Having a rawfbt allows users to still trace cases
where fprobe can't currently handle.

On the .suffix; I get that this is intended to be a raw provider, but
would it be better for stability to expose these functions as "func"
rather than "func.suffix"? It becomes difficult to write portable
scripts when suffixes are included, because wildcarding

rawfbt::func*:entry

...to catch both suffixed and non-suffixed variants may end up tracing
func, func.suffix but also func2 etc.

> Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com>
> ---
>  libdtrace/Build            |   2 +
>  libdtrace/dt_prov_rawfbt.c | 330 +++++++++++++++++++++++++++++++++++++
>  libdtrace/dt_provider.c    |   1 +
>  libdtrace/dt_provider.h    |   1 +
>  test/utils/clean_probes.sh |   2 +-
>  5 files changed, 335 insertions(+), 1 deletion(-)
>  create mode 100644 libdtrace/dt_prov_rawfbt.c
> 
> diff --git a/libdtrace/Build b/libdtrace/Build
> index 8d398221..72235159 100644
> --- a/libdtrace/Build
> +++ b/libdtrace/Build
> @@ -55,6 +55,7 @@ libdtrace-build_SOURCES = dt_aggregate.c \
>  			  dt_prov_lockstat.c \
>  			  dt_prov_proc.c \
>  			  dt_prov_profile.c \
> +			  dt_prov_rawfbt.c \
>  			  dt_prov_rawtp.c \
>  			  dt_prov_sched.c \
>  			  dt_prov_sdt.c \
> @@ -112,6 +113,7 @@ dt_prov_ip.c_CFLAGS := -Wno-pedantic
>  dt_prov_lockstat.c_CFLAGS := -Wno-pedantic
>  dt_prov_proc.c_CFLAGS := -Wno-pedantic
>  dt_prov_profile.c_CFLAGS := -Wno-pedantic
> +dt_prov_rawfbt.c_CFLAGS := -Wno-pedantic
>  dt_prov_rawtp.c_CFLAGS := -Wno-pedantic
>  dt_prov_sched.c_CFLAGS := -Wno-pedantic
>  dt_prov_sdt.c_CFLAGS := -Wno-pedantic
> diff --git a/libdtrace/dt_prov_rawfbt.c b/libdtrace/dt_prov_rawfbt.c
> new file mode 100644
> index 00000000..edfd36b4
> --- /dev/null
> +++ b/libdtrace/dt_prov_rawfbt.c
> @@ -0,0 +1,330 @@
> +/*
> + * Oracle Linux DTrace.
> + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
> + * Licensed under the Universal Permissive License v 1.0 as shown at
> + * http://oss.oracle.com/licenses/upl.
> + *
> + * The Raw Function Boundary Tracing provider for DTrace.
> + *
> + * The kernel provides kprobes to trace specific symbols.  They are listed in
> + * the TRACEFS/available_filter_functions file.  Kprobes may be associated with
> + * a symbol in the core kernel or with a symbol in a specific kernel module.
> + * Whereas the fbt provider supports tracing regular symbols only, the rawfbt
> + * provider also provides access to synthetic symbols, i.e. symbols created by
> + * compiler optimizations.
> + *
> + * Mapping from event name to DTrace probe name:
> + *
> + *	<name>					rawfbt:vmlinux:<name>:entry
> + *						rawfbt:vmlinux:<name>:return
> + *   or
> + *	<name> [<modname>]			rawfbt:<modname>:<name>:entry
> + *						rawfbt:<modname>:<name>:return
> + */
> +#include <assert.h>
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <linux/bpf.h>
> +#include <linux/btf.h>
> +#include <sys/stat.h>
> +#include <sys/types.h>
> +
> +#include <bpf_asm.h>
> +
> +#include "dt_btf.h"
> +#include "dt_dctx.h"
> +#include "dt_cg.h"
> +#include "dt_module.h"
> +#include "dt_provider_tp.h"
> +#include "dt_probe.h"
> +#include "dt_pt_regs.h"
> +
> +static const char		prvname[] = "rawfbt";
> +static const char		modname[] = "vmlinux";
> +
> +#define KPROBE_EVENTS		TRACEFS "kprobe_events"
> +#define PROBE_LIST		TRACEFS "available_filter_functions"
> +
> +#define FBT_GROUP_FMT		GROUP_FMT "_%s"
> +#define FBT_GROUP_DATA		GROUP_DATA, prp->desc->prb
> +
> +static const dtrace_pattr_t	pattr = {
> +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
> +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
> +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
> +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
> +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
> +};
> +
> +/*
> + * Scan the PROBE_LIST file and add entry and return probes for every function
> + * that is listed.
> + */
> +static int populate(dtrace_hdl_t *dtp)
> +{
> +	dt_provider_t		*prv;
> +	FILE			*f;
> +	char			*buf = NULL;
> +	size_t			len  = 0;
> +	size_t			n = 0;
> +	dtrace_syminfo_t	sip;
> +	dtrace_probedesc_t	pd;
> +
> +	prv = dt_provider_create(dtp, prvname, &dt_rawfbt, &pattr, NULL);
> +	if (prv == NULL)
> +		return -1;			/* errno already set */
> +
> +	f = fopen(PROBE_LIST, "r");
> +	if (f == NULL)
> +		return 0;
> +
> +	while (getline(&buf, &len, f) >= 0) {
> +		char		*p, *q;
> +		const char	*mod = modname;
> +		dt_probe_t	*prp;
> +
> +		/*
> +		 * Here buf is either "funcname\n" or "funcname [modname]\n".
> +		 * The last line may not have a linefeed.
> +		 */
> +		p = strchr(buf, '\n');
> +		if (p) {
> +			*p = '\0';
> +			if (p > buf && *(--p) == ']')
> +				*p = '\0';
> +		}
> +
> +		/*
> +		 * Now buf is either "funcname" or "funcname [modname".  If
> +		 * there is no module name provided, we will use the default.
> +		 */
> +		p = strchr(buf, ' ');
> +		if (p) {
> +			*p++ = '\0';
> +			if (*p == '[')
> +				p++;
> +		}
> +
> +#define strstarts(var, x) (strncmp(var, x, strlen (x)) == 0)
> +		/* Weed out __ftrace_invalid_address___* entries. */
> +		if (strstarts(buf, "__ftrace_invalid_address__") ||
> +		    strstarts(buf, "__probestub_") ||
> +		    strstarts(buf, "__traceiter_"))
> +			continue;
> +#undef strstarts
> +
> +		/*
> +		 * If we did not see a module name, perform a symbol lookup to
> +		 * try to determine the module name.
> +		 */
> +		if (!p) {
> +			/*
> +			 * For synthetic symbol names (those containing '.'),
> +			 * we need to use the base name (before the '.') for
> +			 * module name lookup, because the synthetic forms are
> +			 * not recorded in kallsyms information.
> +			 *
> +			 * We replace the first '.' with a 0 to terminate the
> +			 * string, and after the lookup, we put it back.
> +			 */
> +			q = strchr(buf, '.');
> +			if (q != NULL)
> +				*q = '\0';
> +
> +			if (dtrace_lookup_by_name(dtp, DTRACE_OBJ_KMODS, buf,
> +						  NULL, &sip) == 0)
> +				mod = sip.object;
> +
> +			if (q != NULL)
> +				*q = '.';
> +		} else
> +			mod = p;
> +
> +		/*
> +		 * Due to the lack of module names in
> +		 * TRACEFS/available_filter_functions, there are some duplicate
> +		 * function names.  The kernel does not let us trace functions
> +		 * that have duplicates, so we need to remove the existing one.
> +		 */
> +		pd.id = DTRACE_IDNONE;
> +		pd.prv = prvname;
> +		pd.mod = mod;
> +		pd.fun = buf;
> +		pd.prb = "entry";
> +		prp = dt_probe_lookup(dtp, &pd);
> +		if (prp != NULL) {
> +			dt_probe_destroy(prp);
> +			continue;
> +		}
> +
> +		if (dt_tp_probe_insert(dtp, prv, prvname, mod, buf, "entry"))
> +			n++;
> +		if (dt_tp_probe_insert(dtp, prv, prvname, mod, buf, "return"))
> +			n++;
> +	}
> +
> +	free(buf);
> +	fclose(f);
> +
> +	return n;
> +}
> +
> +/*
> + * Generate a BPF trampoline for a FBT probe.
> + *
> + * The trampoline function is called when a FBT probe triggers, and it must
> + * satisfy the following prototype:
> + *
> + *	int dt_fbt(dt_pt_regs *regs)
> + *
> + * The trampoline will populate a dt_dctx_t struct and then call the function
> + * that implements the compiled D clause.  It returns 0 to the caller.
> + */
> +static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> +{
> +	dt_cg_tramp_prologue(pcb);
> +
> +	/*
> +	 * After the dt_cg_tramp_prologue() call, we have:
> +	 *				//     (%r7 = dctx->mst)
> +	 *				//     (%r8 = dctx->ctx)
> +	 */
> +	dt_cg_tramp_copy_regs(pcb);
> +	if (strcmp(pcb->pcb_probe->desc->prb, "return") == 0) {
> +		dt_irlist_t	*dlp = &pcb->pcb_ir;
> +
> +		dt_cg_tramp_copy_rval_from_regs(pcb);
> +
> +		/*
> +		 * fbt:::return arg0 should be the function offset for
> +		 * return instruction.  Since we use kretprobes, however,
> +		 * which do not fire until the function has returned to
> +		 * its caller, information about the returning instruction
> +		 * in the callee has been lost.
> +		 *
> +		 * Set arg0=-1 to indicate that we do not know the value.
> +		 */
> +		dt_cg_xsetx(dlp, NULL, DT_LBL_NONE, BPF_REG_0, -1);
> +		emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
> +	} else
> +		dt_cg_tramp_copy_args_from_regs(pcb, 1);
> +	dt_cg_tramp_epilogue(pcb);
> +
> +	return 0;
> +}
> +
> +static int attach(dtrace_hdl_t *dtp, const dt_probe_t *prp, int bpf_fd)
> +{
> +	if (!dt_tp_probe_has_info(prp)) {
> +		char	*fn, *prb, *p;
> +		FILE	*f;
> +		size_t	len;
> +		int	fd, rc = -1;
> +
> +		/*
> +		 * The tracepoint event we will be creating needs to have a
> +		 * valid name.  We use a copy of the probe name, with . -> _
> +		 * conversion.
> +		 */
> +		prb = strdup(prp->desc->fun);
> +		for (p = prb; *p; p++) {
> +			if (*p == '.')
> +				*p = '_';
> +		}
> +
> +		/*
> +		 * Register the kprobe with the tracing subsystem.  This will
> +		 * create a tracepoint event.
> +		 */
> +		fd = open(KPROBE_EVENTS, O_WRONLY | O_APPEND);
> +		if (fd == -1)
> +			return -ENOENT;
> +
> +		rc = dprintf(fd, "%c:" FBT_GROUP_FMT "/%s %s\n",
> +			     prp->desc->prb[0] == 'e' ? 'p' : 'r',
> +			     FBT_GROUP_DATA, prb, prp->desc->fun);
> +		close(fd);
> +		if (rc == -1)
> +			return -ENOENT;
> +
> +		/* create format file name */
> +		len = snprintf(NULL, 0, "%s" FBT_GROUP_FMT "/%s/format",
> +			       EVENTSFS, FBT_GROUP_DATA, prb) + 1;
> +		fn = dt_alloc(dtp, len);
> +		if (fn == NULL)
> +			return -ENOENT;
> +
> +		snprintf(fn, len, "%s" FBT_GROUP_FMT "/%s/format", EVENTSFS,
> +			 FBT_GROUP_DATA, prb);
> +
> +		/* open format file */
> +		f = fopen(fn, "r");
> +		dt_free(dtp, fn);
> +		if (f == NULL)
> +			return -ENOENT;
> +
> +		/* read event id from format file */
> +		rc = dt_tp_probe_info(dtp, f, 0, prp, NULL, NULL);
> +		fclose(f);
> +
> +		if (rc < 0)
> +			return -ENOENT;
> +	}
> +
> +	/* attach BPF program to the probe */
> +	return dt_tp_probe_attach(dtp, prp, bpf_fd);
> +}
> +
> +static int probe_info(dtrace_hdl_t *dtp, const dt_probe_t *prp, int *argcp,
> +		      dt_argdesc_t **argvp)
> +{
> +	*argcp = 0;			/* no arguments by default */
> +	*argvp = NULL;
> +
> +	return 0;
> +}
> +
> +/*
> + * Try to clean up system resources that may have been allocated for this
> + * probe.
> + *
> + * If there is an event FD, we close it.
> + *
> + * We also try to remove any kprobe that may have been created for the probe.
> + * This is harmless for probes that didn't get created.  If the removal fails
> + * for some reason we are out of luck - fortunately it is not harmful to the
> + * system as a whole.
> + */
> +static void detach(dtrace_hdl_t *dtp, const dt_probe_t *prp)
> +{
> +	int	fd;
> +
> +	if (!dt_tp_probe_has_info(prp))
> +		return;
> +
> +	dt_tp_probe_detach(dtp, prp);
> +
> +	fd = open(KPROBE_EVENTS, O_WRONLY | O_APPEND);
> +	if (fd == -1)
> +		return;
> +
> +	dprintf(fd, "-:" FBT_GROUP_FMT "/%s\n", FBT_GROUP_DATA,
> +		prp->desc->fun);
> +	close(fd);
> +}
> +
> +dt_provimpl_t	dt_rawfbt = {
> +	.name		= "rawfbt",
> +	.prog_type	= BPF_PROG_TYPE_KPROBE,
> +	.populate	= &populate,
> +	.load_prog	= &dt_bpf_prog_load,
> +	.trampoline	= &trampoline,
> +	.attach		= &attach,
> +	.probe_info	= &probe_info,
> +	.detach		= &detach,
> +	.probe_destroy	= &dt_tp_probe_destroy,
> +};
> diff --git a/libdtrace/dt_provider.c b/libdtrace/dt_provider.c
> index 1e2e844e..0c621197 100644
> --- a/libdtrace/dt_provider.c
> +++ b/libdtrace/dt_provider.c
> @@ -36,6 +36,7 @@ const dt_provimpl_t *dt_providers[] = {
>  	&dt_lockstat,
>  	&dt_proc,
>  	&dt_profile,
> +	&dt_rawfbt,
>  	&dt_rawtp,
>  	&dt_sched,
>  	&dt_sdt,
> diff --git a/libdtrace/dt_provider.h b/libdtrace/dt_provider.h
> index f62137de..59a8d62e 100644
> --- a/libdtrace/dt_provider.h
> +++ b/libdtrace/dt_provider.h
> @@ -82,6 +82,7 @@ extern dt_provimpl_t dt_ip;
>  extern dt_provimpl_t dt_lockstat;
>  extern dt_provimpl_t dt_proc;
>  extern dt_provimpl_t dt_profile;
> +extern dt_provimpl_t dt_rawfbt;
>  extern dt_provimpl_t dt_rawtp;
>  extern dt_provimpl_t dt_sched;
>  extern dt_provimpl_t dt_sdt;
> diff --git a/test/utils/clean_probes.sh b/test/utils/clean_probes.sh
> index 8292b309..16052cbc 100755
> --- a/test/utils/clean_probes.sh
> +++ b/test/utils/clean_probes.sh
> @@ -70,7 +70,7 @@ fi
>  	     { sub(/:/, "/"); }
>  
>  	     {
> -		 if (/_fbt_/)
> +		 if (/_(raw)?fbt_/)
>  		     kpv[kpc++] = $0;
>  		 else
>  		     upv[upc++] = $0;


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] rawfbt: new provider
  2024-12-06 10:35 ` Alan Maguire
@ 2024-12-06 16:33   ` Kris Van Hees
  2024-12-09 10:37     ` Alan Maguire
  0 siblings, 1 reply; 7+ messages in thread
From: Kris Van Hees @ 2024-12-06 16:33 UTC (permalink / raw)
  To: Alan Maguire; +Cc: Kris Van Hees, dtrace, dtrace-devel

On Fri, Dec 06, 2024 at 10:35:50AM +0000, Alan Maguire wrote:
> On 05/12/2024 19:42, Kris Van Hees wrote:
> > This provider provides access to all kprobe-based probes that are
> > available on the system.  This includes any compiler-generated
> > optimized variants of functions, named <func>.<suffix>.
> > 
> 
> This is great! Having a rawfbt allows users to still trace cases
> where fprobe can't currently handle.
> 
> On the .suffix; I get that this is intended to be a raw provider, but
> would it be better for stability to expose these functions as "func"
> rather than "func.suffix"? It becomes difficult to write portable
> scripts when suffixes are included, because wildcarding
> 
> rawfbt::func*:entry
> 
> ...to catch both suffixed and non-suffixed variants may end up tracing
> func, func.suffix but also func2 etc.

The trouble is that exposing <func>.<suffix> as <func> will mean that you
automatically end up trying to probe *any* <func>.<suffix> whenever you
place a probe on <func> using rawfbt, which is not necessarily what you may
want.  And there would not be any way to avoid that.

But by not collapsing them, you have the ability to catch them all anyway,
if you want, by specifying <func>, <func>.* instead of <func>.

> > Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com>
> > ---
> >  libdtrace/Build            |   2 +
> >  libdtrace/dt_prov_rawfbt.c | 330 +++++++++++++++++++++++++++++++++++++
> >  libdtrace/dt_provider.c    |   1 +
> >  libdtrace/dt_provider.h    |   1 +
> >  test/utils/clean_probes.sh |   2 +-
> >  5 files changed, 335 insertions(+), 1 deletion(-)
> >  create mode 100644 libdtrace/dt_prov_rawfbt.c
> > 
> > diff --git a/libdtrace/Build b/libdtrace/Build
> > index 8d398221..72235159 100644
> > --- a/libdtrace/Build
> > +++ b/libdtrace/Build
> > @@ -55,6 +55,7 @@ libdtrace-build_SOURCES = dt_aggregate.c \
> >  			  dt_prov_lockstat.c \
> >  			  dt_prov_proc.c \
> >  			  dt_prov_profile.c \
> > +			  dt_prov_rawfbt.c \
> >  			  dt_prov_rawtp.c \
> >  			  dt_prov_sched.c \
> >  			  dt_prov_sdt.c \
> > @@ -112,6 +113,7 @@ dt_prov_ip.c_CFLAGS := -Wno-pedantic
> >  dt_prov_lockstat.c_CFLAGS := -Wno-pedantic
> >  dt_prov_proc.c_CFLAGS := -Wno-pedantic
> >  dt_prov_profile.c_CFLAGS := -Wno-pedantic
> > +dt_prov_rawfbt.c_CFLAGS := -Wno-pedantic
> >  dt_prov_rawtp.c_CFLAGS := -Wno-pedantic
> >  dt_prov_sched.c_CFLAGS := -Wno-pedantic
> >  dt_prov_sdt.c_CFLAGS := -Wno-pedantic
> > diff --git a/libdtrace/dt_prov_rawfbt.c b/libdtrace/dt_prov_rawfbt.c
> > new file mode 100644
> > index 00000000..edfd36b4
> > --- /dev/null
> > +++ b/libdtrace/dt_prov_rawfbt.c
> > @@ -0,0 +1,330 @@
> > +/*
> > + * Oracle Linux DTrace.
> > + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
> > + * Licensed under the Universal Permissive License v 1.0 as shown at
> > + * http://oss.oracle.com/licenses/upl.
> > + *
> > + * The Raw Function Boundary Tracing provider for DTrace.
> > + *
> > + * The kernel provides kprobes to trace specific symbols.  They are listed in
> > + * the TRACEFS/available_filter_functions file.  Kprobes may be associated with
> > + * a symbol in the core kernel or with a symbol in a specific kernel module.
> > + * Whereas the fbt provider supports tracing regular symbols only, the rawfbt
> > + * provider also provides access to synthetic symbols, i.e. symbols created by
> > + * compiler optimizations.
> > + *
> > + * Mapping from event name to DTrace probe name:
> > + *
> > + *	<name>					rawfbt:vmlinux:<name>:entry
> > + *						rawfbt:vmlinux:<name>:return
> > + *   or
> > + *	<name> [<modname>]			rawfbt:<modname>:<name>:entry
> > + *						rawfbt:<modname>:<name>:return
> > + */
> > +#include <assert.h>
> > +#include <errno.h>
> > +#include <fcntl.h>
> > +#include <stdio.h>
> > +#include <stdlib.h>
> > +#include <string.h>
> > +#include <unistd.h>
> > +#include <linux/bpf.h>
> > +#include <linux/btf.h>
> > +#include <sys/stat.h>
> > +#include <sys/types.h>
> > +
> > +#include <bpf_asm.h>
> > +
> > +#include "dt_btf.h"
> > +#include "dt_dctx.h"
> > +#include "dt_cg.h"
> > +#include "dt_module.h"
> > +#include "dt_provider_tp.h"
> > +#include "dt_probe.h"
> > +#include "dt_pt_regs.h"
> > +
> > +static const char		prvname[] = "rawfbt";
> > +static const char		modname[] = "vmlinux";
> > +
> > +#define KPROBE_EVENTS		TRACEFS "kprobe_events"
> > +#define PROBE_LIST		TRACEFS "available_filter_functions"
> > +
> > +#define FBT_GROUP_FMT		GROUP_FMT "_%s"
> > +#define FBT_GROUP_DATA		GROUP_DATA, prp->desc->prb
> > +
> > +static const dtrace_pattr_t	pattr = {
> > +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
> > +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
> > +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
> > +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
> > +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
> > +};
> > +
> > +/*
> > + * Scan the PROBE_LIST file and add entry and return probes for every function
> > + * that is listed.
> > + */
> > +static int populate(dtrace_hdl_t *dtp)
> > +{
> > +	dt_provider_t		*prv;
> > +	FILE			*f;
> > +	char			*buf = NULL;
> > +	size_t			len  = 0;
> > +	size_t			n = 0;
> > +	dtrace_syminfo_t	sip;
> > +	dtrace_probedesc_t	pd;
> > +
> > +	prv = dt_provider_create(dtp, prvname, &dt_rawfbt, &pattr, NULL);
> > +	if (prv == NULL)
> > +		return -1;			/* errno already set */
> > +
> > +	f = fopen(PROBE_LIST, "r");
> > +	if (f == NULL)
> > +		return 0;
> > +
> > +	while (getline(&buf, &len, f) >= 0) {
> > +		char		*p, *q;
> > +		const char	*mod = modname;
> > +		dt_probe_t	*prp;
> > +
> > +		/*
> > +		 * Here buf is either "funcname\n" or "funcname [modname]\n".
> > +		 * The last line may not have a linefeed.
> > +		 */
> > +		p = strchr(buf, '\n');
> > +		if (p) {
> > +			*p = '\0';
> > +			if (p > buf && *(--p) == ']')
> > +				*p = '\0';
> > +		}
> > +
> > +		/*
> > +		 * Now buf is either "funcname" or "funcname [modname".  If
> > +		 * there is no module name provided, we will use the default.
> > +		 */
> > +		p = strchr(buf, ' ');
> > +		if (p) {
> > +			*p++ = '\0';
> > +			if (*p == '[')
> > +				p++;
> > +		}
> > +
> > +#define strstarts(var, x) (strncmp(var, x, strlen (x)) == 0)
> > +		/* Weed out __ftrace_invalid_address___* entries. */
> > +		if (strstarts(buf, "__ftrace_invalid_address__") ||
> > +		    strstarts(buf, "__probestub_") ||
> > +		    strstarts(buf, "__traceiter_"))
> > +			continue;
> > +#undef strstarts
> > +
> > +		/*
> > +		 * If we did not see a module name, perform a symbol lookup to
> > +		 * try to determine the module name.
> > +		 */
> > +		if (!p) {
> > +			/*
> > +			 * For synthetic symbol names (those containing '.'),
> > +			 * we need to use the base name (before the '.') for
> > +			 * module name lookup, because the synthetic forms are
> > +			 * not recorded in kallsyms information.
> > +			 *
> > +			 * We replace the first '.' with a 0 to terminate the
> > +			 * string, and after the lookup, we put it back.
> > +			 */
> > +			q = strchr(buf, '.');
> > +			if (q != NULL)
> > +				*q = '\0';
> > +
> > +			if (dtrace_lookup_by_name(dtp, DTRACE_OBJ_KMODS, buf,
> > +						  NULL, &sip) == 0)
> > +				mod = sip.object;
> > +
> > +			if (q != NULL)
> > +				*q = '.';
> > +		} else
> > +			mod = p;
> > +
> > +		/*
> > +		 * Due to the lack of module names in
> > +		 * TRACEFS/available_filter_functions, there are some duplicate
> > +		 * function names.  The kernel does not let us trace functions
> > +		 * that have duplicates, so we need to remove the existing one.
> > +		 */
> > +		pd.id = DTRACE_IDNONE;
> > +		pd.prv = prvname;
> > +		pd.mod = mod;
> > +		pd.fun = buf;
> > +		pd.prb = "entry";
> > +		prp = dt_probe_lookup(dtp, &pd);
> > +		if (prp != NULL) {
> > +			dt_probe_destroy(prp);
> > +			continue;
> > +		}
> > +
> > +		if (dt_tp_probe_insert(dtp, prv, prvname, mod, buf, "entry"))
> > +			n++;
> > +		if (dt_tp_probe_insert(dtp, prv, prvname, mod, buf, "return"))
> > +			n++;
> > +	}
> > +
> > +	free(buf);
> > +	fclose(f);
> > +
> > +	return n;
> > +}
> > +
> > +/*
> > + * Generate a BPF trampoline for a FBT probe.
> > + *
> > + * The trampoline function is called when a FBT probe triggers, and it must
> > + * satisfy the following prototype:
> > + *
> > + *	int dt_fbt(dt_pt_regs *regs)
> > + *
> > + * The trampoline will populate a dt_dctx_t struct and then call the function
> > + * that implements the compiled D clause.  It returns 0 to the caller.
> > + */
> > +static int trampoline(dt_pcb_t *pcb, uint_t exitlbl)
> > +{
> > +	dt_cg_tramp_prologue(pcb);
> > +
> > +	/*
> > +	 * After the dt_cg_tramp_prologue() call, we have:
> > +	 *				//     (%r7 = dctx->mst)
> > +	 *				//     (%r8 = dctx->ctx)
> > +	 */
> > +	dt_cg_tramp_copy_regs(pcb);
> > +	if (strcmp(pcb->pcb_probe->desc->prb, "return") == 0) {
> > +		dt_irlist_t	*dlp = &pcb->pcb_ir;
> > +
> > +		dt_cg_tramp_copy_rval_from_regs(pcb);
> > +
> > +		/*
> > +		 * fbt:::return arg0 should be the function offset for
> > +		 * return instruction.  Since we use kretprobes, however,
> > +		 * which do not fire until the function has returned to
> > +		 * its caller, information about the returning instruction
> > +		 * in the callee has been lost.
> > +		 *
> > +		 * Set arg0=-1 to indicate that we do not know the value.
> > +		 */
> > +		dt_cg_xsetx(dlp, NULL, DT_LBL_NONE, BPF_REG_0, -1);
> > +		emit(dlp,  BPF_STORE(BPF_DW, BPF_REG_7, DMST_ARG(0), BPF_REG_0));
> > +	} else
> > +		dt_cg_tramp_copy_args_from_regs(pcb, 1);
> > +	dt_cg_tramp_epilogue(pcb);
> > +
> > +	return 0;
> > +}
> > +
> > +static int attach(dtrace_hdl_t *dtp, const dt_probe_t *prp, int bpf_fd)
> > +{
> > +	if (!dt_tp_probe_has_info(prp)) {
> > +		char	*fn, *prb, *p;
> > +		FILE	*f;
> > +		size_t	len;
> > +		int	fd, rc = -1;
> > +
> > +		/*
> > +		 * The tracepoint event we will be creating needs to have a
> > +		 * valid name.  We use a copy of the probe name, with . -> _
> > +		 * conversion.
> > +		 */
> > +		prb = strdup(prp->desc->fun);
> > +		for (p = prb; *p; p++) {
> > +			if (*p == '.')
> > +				*p = '_';
> > +		}
> > +
> > +		/*
> > +		 * Register the kprobe with the tracing subsystem.  This will
> > +		 * create a tracepoint event.
> > +		 */
> > +		fd = open(KPROBE_EVENTS, O_WRONLY | O_APPEND);
> > +		if (fd == -1)
> > +			return -ENOENT;
> > +
> > +		rc = dprintf(fd, "%c:" FBT_GROUP_FMT "/%s %s\n",
> > +			     prp->desc->prb[0] == 'e' ? 'p' : 'r',
> > +			     FBT_GROUP_DATA, prb, prp->desc->fun);
> > +		close(fd);
> > +		if (rc == -1)
> > +			return -ENOENT;
> > +
> > +		/* create format file name */
> > +		len = snprintf(NULL, 0, "%s" FBT_GROUP_FMT "/%s/format",
> > +			       EVENTSFS, FBT_GROUP_DATA, prb) + 1;
> > +		fn = dt_alloc(dtp, len);
> > +		if (fn == NULL)
> > +			return -ENOENT;
> > +
> > +		snprintf(fn, len, "%s" FBT_GROUP_FMT "/%s/format", EVENTSFS,
> > +			 FBT_GROUP_DATA, prb);
> > +
> > +		/* open format file */
> > +		f = fopen(fn, "r");
> > +		dt_free(dtp, fn);
> > +		if (f == NULL)
> > +			return -ENOENT;
> > +
> > +		/* read event id from format file */
> > +		rc = dt_tp_probe_info(dtp, f, 0, prp, NULL, NULL);
> > +		fclose(f);
> > +
> > +		if (rc < 0)
> > +			return -ENOENT;
> > +	}
> > +
> > +	/* attach BPF program to the probe */
> > +	return dt_tp_probe_attach(dtp, prp, bpf_fd);
> > +}
> > +
> > +static int probe_info(dtrace_hdl_t *dtp, const dt_probe_t *prp, int *argcp,
> > +		      dt_argdesc_t **argvp)
> > +{
> > +	*argcp = 0;			/* no arguments by default */
> > +	*argvp = NULL;
> > +
> > +	return 0;
> > +}
> > +
> > +/*
> > + * Try to clean up system resources that may have been allocated for this
> > + * probe.
> > + *
> > + * If there is an event FD, we close it.
> > + *
> > + * We also try to remove any kprobe that may have been created for the probe.
> > + * This is harmless for probes that didn't get created.  If the removal fails
> > + * for some reason we are out of luck - fortunately it is not harmful to the
> > + * system as a whole.
> > + */
> > +static void detach(dtrace_hdl_t *dtp, const dt_probe_t *prp)
> > +{
> > +	int	fd;
> > +
> > +	if (!dt_tp_probe_has_info(prp))
> > +		return;
> > +
> > +	dt_tp_probe_detach(dtp, prp);
> > +
> > +	fd = open(KPROBE_EVENTS, O_WRONLY | O_APPEND);
> > +	if (fd == -1)
> > +		return;
> > +
> > +	dprintf(fd, "-:" FBT_GROUP_FMT "/%s\n", FBT_GROUP_DATA,
> > +		prp->desc->fun);
> > +	close(fd);
> > +}
> > +
> > +dt_provimpl_t	dt_rawfbt = {
> > +	.name		= "rawfbt",
> > +	.prog_type	= BPF_PROG_TYPE_KPROBE,
> > +	.populate	= &populate,
> > +	.load_prog	= &dt_bpf_prog_load,
> > +	.trampoline	= &trampoline,
> > +	.attach		= &attach,
> > +	.probe_info	= &probe_info,
> > +	.detach		= &detach,
> > +	.probe_destroy	= &dt_tp_probe_destroy,
> > +};
> > diff --git a/libdtrace/dt_provider.c b/libdtrace/dt_provider.c
> > index 1e2e844e..0c621197 100644
> > --- a/libdtrace/dt_provider.c
> > +++ b/libdtrace/dt_provider.c
> > @@ -36,6 +36,7 @@ const dt_provimpl_t *dt_providers[] = {
> >  	&dt_lockstat,
> >  	&dt_proc,
> >  	&dt_profile,
> > +	&dt_rawfbt,
> >  	&dt_rawtp,
> >  	&dt_sched,
> >  	&dt_sdt,
> > diff --git a/libdtrace/dt_provider.h b/libdtrace/dt_provider.h
> > index f62137de..59a8d62e 100644
> > --- a/libdtrace/dt_provider.h
> > +++ b/libdtrace/dt_provider.h
> > @@ -82,6 +82,7 @@ extern dt_provimpl_t dt_ip;
> >  extern dt_provimpl_t dt_lockstat;
> >  extern dt_provimpl_t dt_proc;
> >  extern dt_provimpl_t dt_profile;
> > +extern dt_provimpl_t dt_rawfbt;
> >  extern dt_provimpl_t dt_rawtp;
> >  extern dt_provimpl_t dt_sched;
> >  extern dt_provimpl_t dt_sdt;
> > diff --git a/test/utils/clean_probes.sh b/test/utils/clean_probes.sh
> > index 8292b309..16052cbc 100755
> > --- a/test/utils/clean_probes.sh
> > +++ b/test/utils/clean_probes.sh
> > @@ -70,7 +70,7 @@ fi
> >  	     { sub(/:/, "/"); }
> >  
> >  	     {
> > -		 if (/_fbt_/)
> > +		 if (/_(raw)?fbt_/)
> >  		     kpv[kpc++] = $0;
> >  		 else
> >  		     upv[upc++] = $0;

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] rawfbt: new provider
  2024-12-06 16:33   ` Kris Van Hees
@ 2024-12-09 10:37     ` Alan Maguire
  2024-12-09 16:29       ` Nick Alcock
  0 siblings, 1 reply; 7+ messages in thread
From: Alan Maguire @ 2024-12-09 10:37 UTC (permalink / raw)
  To: Kris Van Hees; +Cc: dtrace, dtrace-devel

On 06/12/2024 16:33, Kris Van Hees wrote:
> On Fri, Dec 06, 2024 at 10:35:50AM +0000, Alan Maguire wrote:
>> On 05/12/2024 19:42, Kris Van Hees wrote:
>>> This provider provides access to all kprobe-based probes that are
>>> available on the system.  This includes any compiler-generated
>>> optimized variants of functions, named <func>.<suffix>.
>>>
>>
>> This is great! Having a rawfbt allows users to still trace cases
>> where fprobe can't currently handle.
>>
>> On the .suffix; I get that this is intended to be a raw provider, but
>> would it be better for stability to expose these functions as "func"
>> rather than "func.suffix"? It becomes difficult to write portable
>> scripts when suffixes are included, because wildcarding
>>
>> rawfbt::func*:entry
>>
>> ...to catch both suffixed and non-suffixed variants may end up tracing
>> func, func.suffix but also func2 etc.
> 
> The trouble is that exposing <func>.<suffix> as <func> will mean that you
> automatically end up trying to probe *any* <func>.<suffix> whenever you
> place a probe on <func> using rawfbt, which is not necessarily what you may
> want.  And there would not be any way to avoid that.
> 
> But by not collapsing them, you have the ability to catch them all anyway,
> if you want, by specifying <func>, <func>.* instead of <func>.
>

I tried that with the rawfbt code but got:

# dtrace -n 'rawfbt::xfs_cleanup_inode.*:entry {}'
dtrace: description 'rawfbt::xfs_cleanup_inode.*:entry ' matched 1 probe
^C

This matches xfs_cleanup_inode.isra.0, looks good so far. However:

# dtrace -n
'rawfbt::xfs_cleanup_inode:entry,rawfbt::xfs_cleanup_inode.*:entry {}'
dtrace: invalid probe specifier
rawfbt::xfs_cfs_cleanup_inode:entry,rawfbt::xfs_cleanup_inode.*:entry
{}: probe description rawfbt::xfs_cfs_cleanup_inode:entry does not match
any probes

So the absence of the non-suffixed function (which we included for
portability) triggers the failure. So looks like it will be necessary to
add -Z:

# /sbin/dtrace -Zn
'rawfbt::xfs_cfs_cleanup_inode:entry,rawfbt::xfs_cleanup_inode.*:entry {}'
dtrace: description
'rawfbt::xfs_cfs_cleanup_inode:entry,rawfbt::xfs_cleanup_inode.*:entry '
matched 1 probe

So I guess as long as we document this clearly for folks wanting to to
use rawfbt to write portable scripts, it should be okay.

Alan

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] rawfbt: new provider
  2024-12-09 10:37     ` Alan Maguire
@ 2024-12-09 16:29       ` Nick Alcock
  2024-12-09 16:57         ` Kris Van Hees
  0 siblings, 1 reply; 7+ messages in thread
From: Nick Alcock @ 2024-12-09 16:29 UTC (permalink / raw)
  To: Alan Maguire; +Cc: Kris Van Hees, dtrace, dtrace-devel

On 9 Dec 2024, Alan Maguire stated:

> On 06/12/2024 16:33, Kris Van Hees wrote:
>> On Fri, Dec 06, 2024 at 10:35:50AM +0000, Alan Maguire wrote:
>>> On 05/12/2024 19:42, Kris Van Hees wrote:
>>>> This provider provides access to all kprobe-based probes that are
>>>> available on the system.  This includes any compiler-generated
>>>> optimized variants of functions, named <func>.<suffix>.
>>>>
>>>
>>> This is great! Having a rawfbt allows users to still trace cases
>>> where fprobe can't currently handle.
>>>
>>> On the .suffix; I get that this is intended to be a raw provider, but
>>> would it be better for stability to expose these functions as "func"
>>> rather than "func.suffix"? It becomes difficult to write portable
>>> scripts when suffixes are included, because wildcarding
>>>
>>> rawfbt::func*:entry
>>>
>>> ...to catch both suffixed and non-suffixed variants may end up tracing
>>> func, func.suffix but also func2 etc.
>> 
>> The trouble is that exposing <func>.<suffix> as <func> will mean that you
>> automatically end up trying to probe *any* <func>.<suffix> whenever you
>> place a probe on <func> using rawfbt, which is not necessarily what you may
>> want.  And there would not be any way to avoid that.
>> 
>> But by not collapsing them, you have the ability to catch them all anyway,
>> if you want, by specifying <func>, <func>.* instead of <func>.
>
> I tried that with the rawfbt code but got:
>
> # dtrace -n 'rawfbt::xfs_cleanup_inode.*:entry {}'
> dtrace: description 'rawfbt::xfs_cleanup_inode.*:entry ' matched 1 probe
> ^C
>
> This matches xfs_cleanup_inode.isra.0, looks good so far. However:
>
> # dtrace -n
> 'rawfbt::xfs_cleanup_inode:entry,rawfbt::xfs_cleanup_inode.*:entry {}'
> dtrace: invalid probe specifier
> rawfbt::xfs_cfs_cleanup_inode:entry,rawfbt::xfs_cleanup_inode.*:entry
> {}: probe description rawfbt::xfs_cfs_cleanup_inode:entry does not match
> any probes
>
> So the absence of the non-suffixed function (which we included for
> portability) triggers the failure. So looks like it will be necessary to
> add -Z:
>
> # /sbin/dtrace -Zn
> 'rawfbt::xfs_cfs_cleanup_inode:entry,rawfbt::xfs_cleanup_inode.*:entry {}'
> dtrace: description
> 'rawfbt::xfs_cfs_cleanup_inode:entry,rawfbt::xfs_cleanup_inode.*:entry '
> matched 1 probe
>
> So I guess as long as we document this clearly for folks wanting to to
> use rawfbt to write portable scripts, it should be okay.

Agreed. Documented behaviour, trivial workaround, maybe can be improved
later -- maybe we shouldn't get a 'no probes' error unless *every probe*
in a probe list is unmatched? There are good reasons for both... maybe
we should add "foo & bar" and "foo | bar" and parens for precedence in
later releases, but this is huge overdesign :P

-- 
NULL && (void)

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] rawfbt: new provider
  2024-12-09 16:29       ` Nick Alcock
@ 2024-12-09 16:57         ` Kris Van Hees
  0 siblings, 0 replies; 7+ messages in thread
From: Kris Van Hees @ 2024-12-09 16:57 UTC (permalink / raw)
  To: Nick Alcock; +Cc: Alan Maguire, Kris Van Hees, dtrace, dtrace-devel

On Mon, Dec 09, 2024 at 04:29:19PM +0000, Nick Alcock wrote:
> On 9 Dec 2024, Alan Maguire stated:
> 
> > On 06/12/2024 16:33, Kris Van Hees wrote:
> >> On Fri, Dec 06, 2024 at 10:35:50AM +0000, Alan Maguire wrote:
> >>> On 05/12/2024 19:42, Kris Van Hees wrote:
> >>>> This provider provides access to all kprobe-based probes that are
> >>>> available on the system.  This includes any compiler-generated
> >>>> optimized variants of functions, named <func>.<suffix>.
> >>>>
> >>>
> >>> This is great! Having a rawfbt allows users to still trace cases
> >>> where fprobe can't currently handle.
> >>>
> >>> On the .suffix; I get that this is intended to be a raw provider, but
> >>> would it be better for stability to expose these functions as "func"
> >>> rather than "func.suffix"? It becomes difficult to write portable
> >>> scripts when suffixes are included, because wildcarding
> >>>
> >>> rawfbt::func*:entry
> >>>
> >>> ...to catch both suffixed and non-suffixed variants may end up tracing
> >>> func, func.suffix but also func2 etc.
> >> 
> >> The trouble is that exposing <func>.<suffix> as <func> will mean that you
> >> automatically end up trying to probe *any* <func>.<suffix> whenever you
> >> place a probe on <func> using rawfbt, which is not necessarily what you may
> >> want.  And there would not be any way to avoid that.
> >> 
> >> But by not collapsing them, you have the ability to catch them all anyway,
> >> if you want, by specifying <func>, <func>.* instead of <func>.
> >
> > I tried that with the rawfbt code but got:
> >
> > # dtrace -n 'rawfbt::xfs_cleanup_inode.*:entry {}'
> > dtrace: description 'rawfbt::xfs_cleanup_inode.*:entry ' matched 1 probe
> > ^C
> >
> > This matches xfs_cleanup_inode.isra.0, looks good so far. However:
> >
> > # dtrace -n
> > 'rawfbt::xfs_cleanup_inode:entry,rawfbt::xfs_cleanup_inode.*:entry {}'
> > dtrace: invalid probe specifier
> > rawfbt::xfs_cfs_cleanup_inode:entry,rawfbt::xfs_cleanup_inode.*:entry
> > {}: probe description rawfbt::xfs_cfs_cleanup_inode:entry does not match
> > any probes
> >
> > So the absence of the non-suffixed function (which we included for
> > portability) triggers the failure. So looks like it will be necessary to
> > add -Z:
> >
> > # /sbin/dtrace -Zn
> > 'rawfbt::xfs_cfs_cleanup_inode:entry,rawfbt::xfs_cleanup_inode.*:entry {}'
> > dtrace: description
> > 'rawfbt::xfs_cfs_cleanup_inode:entry,rawfbt::xfs_cleanup_inode.*:entry '
> > matched 1 probe
> >
> > So I guess as long as we document this clearly for folks wanting to to
> > use rawfbt to write portable scripts, it should be okay.
> 
> Agreed. Documented behaviour, trivial workaround, maybe can be improved
> later -- maybe we shouldn't get a 'no probes' error unless *every probe*
> in a probe list is unmatched? There are good reasons for both... maybe
> we should add "foo & bar" and "foo | bar" and parens for precedence in
> later releases, but this is huge overdesign :P

This is indeed proper behaviour.  If the probe does not exist, the tracing
should not start and an error should be reported.

Since these probes are made available by the kernel based on the kernel build
(i.e. the functions that it determines can be probed using kprobe), they are
not at all a stable interface and therefore portability of scripts is not at
all supported.  Often that can be resolved with using -Z indeed (or the
equivalent pragma that can be included in the script).

I could *perhaps* see some value in someday being able to specify that certain
probes are required and others are optional (in a script), possibly using a
more refined pragma that assigns such status to particular probe specifications
but that is certainly only a vague notion of potential future feature :)

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2024-12-09 16:57 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-12-05 19:42 [PATCH] rawfbt: new provider Kris Van Hees
2024-12-05 23:23 ` Eugene Loh
2024-12-06 10:35 ` Alan Maguire
2024-12-06 16:33   ` Kris Van Hees
2024-12-09 10:37     ` Alan Maguire
2024-12-09 16:29       ` Nick Alcock
2024-12-09 16:57         ` Kris Van Hees

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox