From: Dave Marchevsky <davemarchevsky@fb.com>
To: <bpf@vger.kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>,
Andrii Nakryiko <andrii@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
Rik van Riel <riel@surriel.com>,
Ilya Leoshkevich <iii@linux.ibm.com>, Yonghong Song <yhs@fb.com>,
<kernel-team@fb.com>, Dave Marchevsky <davemarchevsky@fb.com>
Subject: [RFC PATCH bpf-next 2/5] bpf: add get_reg_val helper
Date: Thu, 12 May 2022 00:43:18 -0700 [thread overview]
Message-ID: <20220512074321.2090073-3-davemarchevsky@fb.com> (raw)
In-Reply-To: <20220512074321.2090073-1-davemarchevsky@fb.com>
Add a helper which reads the value of specified register into memory.
Currently, bpf programs only have access to general-purpose registers
via struct pt_regs. Other registers, like SSE regs %xmm0-15, are
inaccessible, which makes some tracing usecases impossible. For example,
User Statically-Defined Tracing (USDT) probes may use SSE registers to
pass their arguments on x86. While this patch adds support for %xmm0-15
only, the helper is meant to be generic enough to support fetching any
reg.
A useful "value of register" definition for bpf programs is "value of
register before control transfer to kernel". pt_regs gives us this
currently, so it's the default behavior of the new helper. Fetching the
actual _current_ reg value is possible, though, by passing
BPF_GETREG_F_CURRENT flag as part of input.
For SSE regs we try to avoid digging around in task's fpu state by first
reading _current_ value, then checking to see if the state of cpu's
floating point regs matches task's view of them. If so, we can just
return _current_ value.
Further usecases which are straightforward to support, but
unimplemented:
* using the helper to fetch general-purpose register value.
currently-unused pt_regs parameter exists for this reason.
* fetching rdtsc (w/ BPF_GETREG_F_CURRENT)
* other architectures. s390 specifically might benefit from similar
fpu reg fetching as USDT library was recently updated to support that
architecture.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
---
include/uapi/linux/bpf.h | 40 +++++++++
kernel/trace/bpf_trace.c | 148 +++++++++++++++++++++++++++++++++
kernel/trace/bpf_trace.h | 1 +
tools/include/uapi/linux/bpf.h | 40 +++++++++
4 files changed, 229 insertions(+)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 444fe6f1cf35..3ef8f683ed9e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5154,6 +5154,18 @@ union bpf_attr {
* if not NULL, is a reference which must be released using its
* corresponding release function, or moved into a BPF map before
* program exit.
+ *
+ * long bpf_get_reg_val(void *dst, u32 size, u64 getreg_spec, struct pt_regs *regs, struct task_struct *tsk)
+ * Description
+ * Store the value of a SSE register specified by *getreg_spec*
+ * into memory region of size *size* specified by *dst*. *getreg_spec*
+ * is a combination of BPF_GETREG enum AND BPF_GETREG_F flag e.g.
+ * (BPF_GETREG_X86_XMM0 << 32) | BPF_GETREG_F_CURRENT.*
+ * Return
+ * 0 on success
+ * **-ENOENT** if the system architecture does not have requested reg
+ * **-EINVAL** if *getreg_spec* is invalid
+ * **-EINVAL** if *size* != bytes necessary to store requested reg val
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -5351,6 +5363,7 @@ union bpf_attr {
FN(skb_set_tstamp), \
FN(ima_file_hash), \
FN(kptr_xchg), \
+ FN(get_reg_val), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -6318,6 +6331,33 @@ struct bpf_perf_event_value {
__u64 running;
};
+/* bpf_get_reg_val register enum */
+enum {
+ BPF_GETREG_X86_XMM0 = 0,
+ BPF_GETREG_X86_XMM1,
+ BPF_GETREG_X86_XMM2,
+ BPF_GETREG_X86_XMM3,
+ BPF_GETREG_X86_XMM4,
+ BPF_GETREG_X86_XMM5,
+ BPF_GETREG_X86_XMM6,
+ BPF_GETREG_X86_XMM7,
+ BPF_GETREG_X86_XMM8,
+ BPF_GETREG_X86_XMM9,
+ BPF_GETREG_X86_XMM10,
+ BPF_GETREG_X86_XMM11,
+ BPF_GETREG_X86_XMM12,
+ BPF_GETREG_X86_XMM13,
+ BPF_GETREG_X86_XMM14,
+ BPF_GETREG_X86_XMM15,
+ __MAX_BPF_GETREG,
+};
+
+/* bpf_get_reg_val flags */
+enum {
+ BPF_GETREG_F_NONE = 0,
+ BPF_GETREG_F_CURRENT = (1U << 0),
+};
+
enum {
BPF_DEVCG_ACC_MKNOD = (1ULL << 0),
BPF_DEVCG_ACC_READ = (1ULL << 1),
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f15b826f9899..0de7d6b3af5b 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -28,6 +28,10 @@
#include <asm/tlb.h>
+#ifdef CONFIG_X86
+#include <asm/fpu/context.h>
+#endif
+
#include "trace_probe.h"
#include "trace.h"
@@ -1166,6 +1170,148 @@ static const struct bpf_func_proto bpf_get_func_arg_cnt_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
+#define XMM_REG_SZ 16
+
+#define __xmm_space_off(regno) \
+ case BPF_GETREG_X86_XMM ## regno: \
+ xmm_space_off = regno * 16; \
+ break;
+
+static long getreg_read_xmm_fxsave(u32 reg, struct task_struct *tsk,
+ void *data)
+{
+ struct fxregs_state *fxsave;
+ u32 xmm_space_off;
+
+ switch (reg) {
+ __xmm_space_off(0);
+ __xmm_space_off(1);
+ __xmm_space_off(2);
+ __xmm_space_off(3);
+ __xmm_space_off(4);
+ __xmm_space_off(5);
+ __xmm_space_off(6);
+ __xmm_space_off(7);
+#ifdef CONFIG_X86_64
+ __xmm_space_off(8);
+ __xmm_space_off(9);
+ __xmm_space_off(10);
+ __xmm_space_off(11);
+ __xmm_space_off(12);
+ __xmm_space_off(13);
+ __xmm_space_off(14);
+ __xmm_space_off(15);
+#endif
+ default:
+ return -EINVAL;
+ }
+
+ fxsave = &tsk->thread.fpu.fpstate->regs.fxsave;
+ memcpy(data, (void *)&fxsave->xmm_space + xmm_space_off, XMM_REG_SZ);
+ return 0;
+}
+
+#undef __xmm_space_off
+
+static bool getreg_is_xmm(u32 reg)
+{
+ return reg >= BPF_GETREG_X86_XMM0 && reg <= BPF_GETREG_X86_XMM15;
+}
+
+#define __bpf_sse_read(regno) \
+ case BPF_GETREG_X86_XMM ## regno: \
+ asm("movdqa %%xmm" #regno ", %0" : "=m"(*(char *)data)); \
+ break;
+
+static long bpf_read_sse_reg(u32 reg, u32 flags, struct task_struct *tsk,
+ void *data)
+{
+#ifdef CONFIG_X86
+ unsigned long irq_flags;
+ long err;
+
+ switch (reg) {
+ __bpf_sse_read(0);
+ __bpf_sse_read(1);
+ __bpf_sse_read(2);
+ __bpf_sse_read(3);
+ __bpf_sse_read(4);
+ __bpf_sse_read(5);
+ __bpf_sse_read(6);
+ __bpf_sse_read(7);
+#ifdef CONFIG_X86_64
+ __bpf_sse_read(8);
+ __bpf_sse_read(9);
+ __bpf_sse_read(10);
+ __bpf_sse_read(11);
+ __bpf_sse_read(12);
+ __bpf_sse_read(13);
+ __bpf_sse_read(14);
+ __bpf_sse_read(15);
+#endif /* CONFIG_X86_64 */
+ default:
+ return -EINVAL;
+ }
+
+ if (flags & BPF_GETREG_F_CURRENT)
+ return 0;
+
+ if (!fpregs_state_valid(&tsk->thread.fpu, smp_processor_id())) {
+ local_irq_save(irq_flags);
+ err = getreg_read_xmm_fxsave(reg, tsk, data);
+ local_irq_restore(irq_flags);
+ return err;
+ }
+
+ return 0;
+#else
+ return -ENOENT;
+#endif /* CONFIG_X86 */
+}
+
+#undef __bpf_sse_read
+
+BPF_CALL_5(get_reg_val, void *, dst, u32, size,
+ u64, getreg_spec, struct pt_regs *, regs,
+ struct task_struct *, tsk)
+{
+ u32 reg, flags;
+
+ reg = (u32)(getreg_spec >> 32);
+ flags = (u32)getreg_spec;
+ if (reg >= __MAX_BPF_GETREG)
+ return -EINVAL;
+
+ if (getreg_is_xmm(reg)) {
+#ifndef CONFIG_X86
+ return -ENOENT;
+#else
+ if (size != XMM_REG_SZ)
+ return -EINVAL;
+
+ return bpf_read_sse_reg(reg, flags, tsk, dst);
+ }
+
+ return -EINVAL;
+#endif
+}
+
+BTF_ID_LIST(bpf_get_reg_val_ids)
+BTF_ID(struct, pt_regs)
+
+static const struct bpf_func_proto bpf_get_reg_val_proto = {
+ .func = get_reg_val,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg2_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_BTF_ID_OR_NULL,
+ .arg4_btf_id = &bpf_get_reg_val_ids[0],
+ .arg5_type = ARG_PTR_TO_BTF_ID_OR_NULL,
+ .arg5_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
+};
+
static const struct bpf_func_proto *
bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@@ -1287,6 +1433,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_find_vma_proto;
case BPF_FUNC_trace_vprintk:
return bpf_get_trace_vprintk_proto();
+ case BPF_FUNC_get_reg_val:
+ return &bpf_get_reg_val_proto;
default:
return bpf_base_func_proto(func_id);
}
diff --git a/kernel/trace/bpf_trace.h b/kernel/trace/bpf_trace.h
index 9acbc11ac7bb..b4b55706c2dd 100644
--- a/kernel/trace/bpf_trace.h
+++ b/kernel/trace/bpf_trace.h
@@ -29,6 +29,7 @@ TRACE_EVENT(bpf_trace_printk,
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE bpf_trace
#include <trace/define_trace.h>
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 444fe6f1cf35..3ef8f683ed9e 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5154,6 +5154,18 @@ union bpf_attr {
* if not NULL, is a reference which must be released using its
* corresponding release function, or moved into a BPF map before
* program exit.
+ *
+ * long bpf_get_reg_val(void *dst, u32 size, u64 getreg_spec, struct pt_regs *regs, struct task_struct *tsk)
+ * Description
+ * Store the value of a SSE register specified by *getreg_spec*
+ * into memory region of size *size* specified by *dst*. *getreg_spec*
+ * is a combination of BPF_GETREG enum AND BPF_GETREG_F flag e.g.
+ * (BPF_GETREG_X86_XMM0 << 32) | BPF_GETREG_F_CURRENT.*
+ * Return
+ * 0 on success
+ * **-ENOENT** if the system architecture does not have requested reg
+ * **-EINVAL** if *getreg_spec* is invalid
+ * **-EINVAL** if *size* != bytes necessary to store requested reg val
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -5351,6 +5363,7 @@ union bpf_attr {
FN(skb_set_tstamp), \
FN(ima_file_hash), \
FN(kptr_xchg), \
+ FN(get_reg_val), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -6318,6 +6331,33 @@ struct bpf_perf_event_value {
__u64 running;
};
+/* bpf_get_reg_val register enum */
+enum {
+ BPF_GETREG_X86_XMM0 = 0,
+ BPF_GETREG_X86_XMM1,
+ BPF_GETREG_X86_XMM2,
+ BPF_GETREG_X86_XMM3,
+ BPF_GETREG_X86_XMM4,
+ BPF_GETREG_X86_XMM5,
+ BPF_GETREG_X86_XMM6,
+ BPF_GETREG_X86_XMM7,
+ BPF_GETREG_X86_XMM8,
+ BPF_GETREG_X86_XMM9,
+ BPF_GETREG_X86_XMM10,
+ BPF_GETREG_X86_XMM11,
+ BPF_GETREG_X86_XMM12,
+ BPF_GETREG_X86_XMM13,
+ BPF_GETREG_X86_XMM14,
+ BPF_GETREG_X86_XMM15,
+ __MAX_BPF_GETREG,
+};
+
+/* bpf_get_reg_val flags */
+enum {
+ BPF_GETREG_F_NONE = 0,
+ BPF_GETREG_F_CURRENT = (1U << 0),
+};
+
enum {
BPF_DEVCG_ACC_MKNOD = (1ULL << 0),
BPF_DEVCG_ACC_READ = (1ULL << 1),
--
2.30.2
next prev parent reply other threads:[~2022-05-12 7:43 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-05-12 7:43 [RFC PATCH bpf-next 0/5] bpf: add get_reg_val helper Dave Marchevsky
2022-05-12 7:43 ` [RFC PATCH bpf-next 1/5] x86/fpu: Move context.h to include/asm Dave Marchevsky
2022-05-12 13:56 ` David Vernet
2022-05-14 0:44 ` Alexei Starovoitov
2022-05-12 7:43 ` Dave Marchevsky [this message]
2022-05-12 15:29 ` [RFC PATCH bpf-next 2/5] bpf: add get_reg_val helper David Vernet
2022-05-18 8:07 ` Dave Marchevsky
2022-05-14 0:41 ` Alexei Starovoitov
2022-05-18 7:35 ` Dave Marchevsky
2022-05-12 7:43 ` [RFC PATCH bpf-next 3/5] libbpf: usdt lib wiring of xmm reads Dave Marchevsky
2022-05-14 0:43 ` Alexei Starovoitov
2022-05-16 23:26 ` Andrii Nakryiko
2022-05-18 8:20 ` Dave Marchevsky
2022-05-12 7:43 ` [RFC PATCH bpf-next 4/5] selftests/bpf: Add test for USDT parse of xmm reg Dave Marchevsky
2022-05-16 23:31 ` Andrii Nakryiko
2022-05-17 1:17 ` Alexei Starovoitov
2022-05-18 23:56 ` Andrii Nakryiko
2022-05-12 7:43 ` [RFC PATCH bpf-next 5/5] selftests/bpf: get_reg_val test exercising fxsave fetch Dave Marchevsky
2022-05-12 17:47 ` Dave Marchevsky
2022-05-16 23:28 ` Andrii Nakryiko
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220512074321.2090073-3-davemarchevsky@fb.com \
--to=davemarchevsky@fb.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=iii@linux.ibm.com \
--cc=kernel-team@fb.com \
--cc=riel@surriel.com \
--cc=yhs@fb.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox