* [PATCH 6.1 1/3] bpf: Add struct for bin_args arg in bpf_bprintf_prepare
2024-02-17 12:13 [PATCH 5.15,6.1] Fixup preempt imbalance with bpf_trace_printk Thadeu Lima de Souza Cascardo
@ 2024-02-17 12:13 ` Thadeu Lima de Souza Cascardo
2024-02-17 12:13 ` [PATCH 6.1 2/3] bpf: Do cleanup in bpf_bprintf_cleanup only when needed Thadeu Lima de Souza Cascardo
` (6 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Thadeu Lima de Souza Cascardo @ 2024-02-17 12:13 UTC (permalink / raw)
To: stable; +Cc: cascardo, jolsa, daniel, yhs
From: Jiri Olsa <jolsa@kernel.org>
commit 78aa1cc9404399a15d2a1205329c6a06236f5378 upstream.
Adding struct bpf_bprintf_data to hold bin_args argument for
bpf_bprintf_prepare function.
We will add another return argument to bpf_bprintf_prepare and
pass the struct to bpf_bprintf_cleanup for proper cleanup in
following changes.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20221215214430.1336195-2-jolsa@kernel.org
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
---
include/linux/bpf.h | 7 ++++++-
kernel/bpf/helpers.c | 24 +++++++++++++-----------
kernel/bpf/verifier.c | 3 ++-
kernel/trace/bpf_trace.c | 34 ++++++++++++++++++++--------------
4 files changed, 41 insertions(+), 27 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c04a61ffac8a..0e4fb16cd5c1 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2740,8 +2740,13 @@ bool btf_id_set_contains(const struct btf_id_set *set, u32 id);
#define MAX_BPRINTF_VARARGS 12
+struct bpf_bprintf_data {
+ u32 *bin_args;
+ bool get_bin_args;
+};
+
int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
- u32 **bin_buf, u32 num_args);
+ u32 num_args, struct bpf_bprintf_data *data);
void bpf_bprintf_cleanup(void);
/* the implementation of the opaque uapi struct bpf_dynptr */
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 34135fbd6097..42141d59e9c6 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -795,16 +795,16 @@ void bpf_bprintf_cleanup(void)
* Returns a negative value if fmt is an invalid format string or 0 otherwise.
*
* This can be used in two ways:
- * - Format string verification only: when bin_args is NULL
+ * - Format string verification only: when data->get_bin_args is false
* - Arguments preparation: in addition to the above verification, it writes in
- * bin_args a binary representation of arguments usable by bstr_printf where
- * pointers from BPF have been sanitized.
+ * data->bin_args a binary representation of arguments usable by bstr_printf
+ * where pointers from BPF have been sanitized.
*
* In argument preparation mode, if 0 is returned, safe temporary buffers are
* allocated and bpf_bprintf_cleanup should be called to free them after use.
*/
int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
- u32 **bin_args, u32 num_args)
+ u32 num_args, struct bpf_bprintf_data *data)
{
char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end;
size_t sizeof_cur_arg, sizeof_cur_ip;
@@ -817,12 +817,12 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
return -EINVAL;
fmt_size = fmt_end - fmt;
- if (bin_args) {
+ if (data->get_bin_args) {
if (num_args && try_get_fmt_tmp_buf(&tmp_buf))
return -EBUSY;
tmp_buf_end = tmp_buf + MAX_BPRINTF_BUF_LEN;
- *bin_args = (u32 *)tmp_buf;
+ data->bin_args = (u32 *)tmp_buf;
}
for (i = 0; i < fmt_size; i++) {
@@ -1023,24 +1023,26 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
}
BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt,
- const void *, data, u32, data_len)
+ const void *, args, u32, data_len)
{
+ struct bpf_bprintf_data data = {
+ .get_bin_args = true,
+ };
int err, num_args;
- u32 *bin_args;
if (data_len % 8 || data_len > MAX_BPRINTF_VARARGS * 8 ||
- (data_len && !data))
+ (data_len && !args))
return -EINVAL;
num_args = data_len / 8;
/* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we
* can safely give an unbounded size.
*/
- err = bpf_bprintf_prepare(fmt, UINT_MAX, data, &bin_args, num_args);
+ err = bpf_bprintf_prepare(fmt, UINT_MAX, args, num_args, &data);
if (err < 0)
return err;
- err = bstr_printf(str, str_size, fmt, bin_args);
+ err = bstr_printf(str, str_size, fmt, data.bin_args);
bpf_bprintf_cleanup();
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 23b6d57b5eef..1a29ac4db6ea 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7448,6 +7448,7 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
struct bpf_reg_state *fmt_reg = ®s[BPF_REG_3];
struct bpf_reg_state *data_len_reg = ®s[BPF_REG_5];
struct bpf_map *fmt_map = fmt_reg->map_ptr;
+ struct bpf_bprintf_data data = {};
int err, fmt_map_off, num_args;
u64 fmt_addr;
char *fmt;
@@ -7472,7 +7473,7 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
/* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
* can focus on validating the format specifiers.
*/
- err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, NULL, num_args);
+ err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, num_args, &data);
if (err < 0)
verbose(env, "Invalid format string\n");
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f4a494a457c5..bbf44095999f 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -377,18 +377,20 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
u64, arg2, u64, arg3)
{
u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 };
- u32 *bin_args;
+ struct bpf_bprintf_data data = {
+ .get_bin_args = true,
+ };
static char buf[BPF_TRACE_PRINTK_SIZE];
unsigned long flags;
int ret;
- ret = bpf_bprintf_prepare(fmt, fmt_size, args, &bin_args,
- MAX_TRACE_PRINTK_VARARGS);
+ ret = bpf_bprintf_prepare(fmt, fmt_size, args,
+ MAX_TRACE_PRINTK_VARARGS, &data);
if (ret < 0)
return ret;
raw_spin_lock_irqsave(&trace_printk_lock, flags);
- ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
+ ret = bstr_printf(buf, sizeof(buf), fmt, data.bin_args);
trace_bpf_trace_printk(buf);
raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
@@ -426,25 +428,27 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
return &bpf_trace_printk_proto;
}
-BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, data,
+BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, args,
u32, data_len)
{
+ struct bpf_bprintf_data data = {
+ .get_bin_args = true,
+ };
static char buf[BPF_TRACE_PRINTK_SIZE];
unsigned long flags;
int ret, num_args;
- u32 *bin_args;
if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 ||
- (data_len && !data))
+ (data_len && !args))
return -EINVAL;
num_args = data_len / 8;
- ret = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args);
+ ret = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data);
if (ret < 0)
return ret;
raw_spin_lock_irqsave(&trace_printk_lock, flags);
- ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
+ ret = bstr_printf(buf, sizeof(buf), fmt, data.bin_args);
trace_bpf_trace_printk(buf);
raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
@@ -471,21 +475,23 @@ const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void)
}
BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
- const void *, data, u32, data_len)
+ const void *, args, u32, data_len)
{
+ struct bpf_bprintf_data data = {
+ .get_bin_args = true,
+ };
int err, num_args;
- u32 *bin_args;
if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 ||
- (data_len && !data))
+ (data_len && !args))
return -EINVAL;
num_args = data_len / 8;
- err = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args);
+ err = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data);
if (err < 0)
return err;
- seq_bprintf(m, fmt, bin_args);
+ seq_bprintf(m, fmt, data.bin_args);
bpf_bprintf_cleanup();
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 6.1 2/3] bpf: Do cleanup in bpf_bprintf_cleanup only when needed
2024-02-17 12:13 [PATCH 5.15,6.1] Fixup preempt imbalance with bpf_trace_printk Thadeu Lima de Souza Cascardo
2024-02-17 12:13 ` [PATCH 6.1 1/3] bpf: Add struct for bin_args arg in bpf_bprintf_prepare Thadeu Lima de Souza Cascardo
@ 2024-02-17 12:13 ` Thadeu Lima de Souza Cascardo
2024-02-17 12:13 ` [PATCH 6.1 3/3] bpf: Remove trace_printk_lock Thadeu Lima de Souza Cascardo
` (5 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Thadeu Lima de Souza Cascardo @ 2024-02-17 12:13 UTC (permalink / raw)
To: stable; +Cc: cascardo, jolsa, daniel, yhs
From: Jiri Olsa <jolsa@kernel.org>
commit f19a4050455aad847fb93f18dc1fe502eb60f989 upstream.
Currently we always cleanup/decrement bpf_bprintf_nest_level variable
in bpf_bprintf_cleanup if it's > 0.
There's possible scenario where this could cause a problem, when
bpf_bprintf_prepare does not get bin_args buffer (because num_args is 0)
and following bpf_bprintf_cleanup call decrements bpf_bprintf_nest_level
variable, like:
in task context:
bpf_bprintf_prepare(num_args != 0) increments 'bpf_bprintf_nest_level = 1'
-> first irq :
bpf_bprintf_prepare(num_args == 0)
bpf_bprintf_cleanup decrements 'bpf_bprintf_nest_level = 0'
-> second irq:
bpf_bprintf_prepare(num_args != 0) bpf_bprintf_nest_level = 1
gets same buffer as task context above
Adding check to bpf_bprintf_cleanup and doing the real cleanup only if we
got bin_args data in the first place.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20221215214430.1336195-3-jolsa@kernel.org
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
---
include/linux/bpf.h | 2 +-
kernel/bpf/helpers.c | 16 +++++++++-------
kernel/trace/bpf_trace.c | 6 +++---
3 files changed, 13 insertions(+), 11 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 0e4fb16cd5c1..bc4e6969899a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2747,7 +2747,7 @@ struct bpf_bprintf_data {
int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
u32 num_args, struct bpf_bprintf_data *data);
-void bpf_bprintf_cleanup(void);
+void bpf_bprintf_cleanup(struct bpf_bprintf_data *data);
/* the implementation of the opaque uapi struct bpf_dynptr */
struct bpf_dynptr_kern {
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 42141d59e9c6..64f41f58b37b 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -781,12 +781,14 @@ static int try_get_fmt_tmp_buf(char **tmp_buf)
return 0;
}
-void bpf_bprintf_cleanup(void)
+void bpf_bprintf_cleanup(struct bpf_bprintf_data *data)
{
- if (this_cpu_read(bpf_bprintf_nest_level)) {
- this_cpu_dec(bpf_bprintf_nest_level);
- preempt_enable();
- }
+ if (!data->bin_args)
+ return;
+ if (WARN_ON_ONCE(this_cpu_read(bpf_bprintf_nest_level) == 0))
+ return;
+ this_cpu_dec(bpf_bprintf_nest_level);
+ preempt_enable();
}
/*
@@ -1018,7 +1020,7 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
err = 0;
out:
if (err)
- bpf_bprintf_cleanup();
+ bpf_bprintf_cleanup(data);
return err;
}
@@ -1044,7 +1046,7 @@ BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt,
err = bstr_printf(str, str_size, fmt, data.bin_args);
- bpf_bprintf_cleanup();
+ bpf_bprintf_cleanup(&data);
return err + 1;
}
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index bbf44095999f..263a54d0d9ee 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -395,7 +395,7 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
trace_bpf_trace_printk(buf);
raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
- bpf_bprintf_cleanup();
+ bpf_bprintf_cleanup(&data);
return ret;
}
@@ -453,7 +453,7 @@ BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, args,
trace_bpf_trace_printk(buf);
raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
- bpf_bprintf_cleanup();
+ bpf_bprintf_cleanup(&data);
return ret;
}
@@ -493,7 +493,7 @@ BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
seq_bprintf(m, fmt, data.bin_args);
- bpf_bprintf_cleanup();
+ bpf_bprintf_cleanup(&data);
return seq_has_overflowed(m) ? -EOVERFLOW : 0;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 6.1 3/3] bpf: Remove trace_printk_lock
2024-02-17 12:13 [PATCH 5.15,6.1] Fixup preempt imbalance with bpf_trace_printk Thadeu Lima de Souza Cascardo
2024-02-17 12:13 ` [PATCH 6.1 1/3] bpf: Add struct for bin_args arg in bpf_bprintf_prepare Thadeu Lima de Souza Cascardo
2024-02-17 12:13 ` [PATCH 6.1 2/3] bpf: Do cleanup in bpf_bprintf_cleanup only when needed Thadeu Lima de Souza Cascardo
@ 2024-02-17 12:13 ` Thadeu Lima de Souza Cascardo
2024-02-17 12:13 ` [PATCH 5.15 1/4] bpf: Merge printk and seq_printf VARARG max macros Thadeu Lima de Souza Cascardo
` (4 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Thadeu Lima de Souza Cascardo @ 2024-02-17 12:13 UTC (permalink / raw)
To: stable; +Cc: cascardo, jolsa, daniel, yhs
From: Jiri Olsa <jolsa@kernel.org>
commit e2bb9e01d589f7fa82573aedd2765ff9b277816a upstream.
Both bpf_trace_printk and bpf_trace_vprintk helpers use static buffer guarded
with trace_printk_lock spin lock.
The spin lock contention causes issues with bpf programs attached to
contention_begin tracepoint [1][2].
Andrii suggested we could get rid of the contention by using trylock, but we
could actually get rid of the spinlock completely by using percpu buffers the
same way as for bin_args in bpf_bprintf_prepare function.
Adding new return 'buf' argument to struct bpf_bprintf_data and making
bpf_bprintf_prepare to return also the buffer for printk helpers.
[1] https://lore.kernel.org/bpf/CACkBjsakT_yWxnSWr4r-0TpPvbKm9-OBmVUhJb7hV3hY8fdCkw@mail.gmail.com/
[2] https://lore.kernel.org/bpf/CACkBjsaCsTovQHFfkqJKto6S4Z8d02ud1D7MPESrHa1cVNNTrw@mail.gmail.com/
Reported-by: Hao Sun <sunhao.th@gmail.com>
Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20221215214430.1336195-4-jolsa@kernel.org
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
---
include/linux/bpf.h | 3 +++
kernel/bpf/helpers.c | 31 +++++++++++++++++++------------
kernel/trace/bpf_trace.c | 20 ++++++--------------
3 files changed, 28 insertions(+), 26 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index bc4e6969899a..1ca1902af23e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2739,10 +2739,13 @@ struct btf_id_set;
bool btf_id_set_contains(const struct btf_id_set *set, u32 id);
#define MAX_BPRINTF_VARARGS 12
+#define MAX_BPRINTF_BUF 1024
struct bpf_bprintf_data {
u32 *bin_args;
+ char *buf;
bool get_bin_args;
+ bool get_buf;
};
int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 64f41f58b37b..6a61a98d602c 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -753,19 +753,20 @@ static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
/* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary
* arguments representation.
*/
-#define MAX_BPRINTF_BUF_LEN 512
+#define MAX_BPRINTF_BIN_ARGS 512
/* Support executing three nested bprintf helper calls on a given CPU */
#define MAX_BPRINTF_NEST_LEVEL 3
struct bpf_bprintf_buffers {
- char tmp_bufs[MAX_BPRINTF_NEST_LEVEL][MAX_BPRINTF_BUF_LEN];
+ char bin_args[MAX_BPRINTF_BIN_ARGS];
+ char buf[MAX_BPRINTF_BUF];
};
-static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs);
+
+static DEFINE_PER_CPU(struct bpf_bprintf_buffers[MAX_BPRINTF_NEST_LEVEL], bpf_bprintf_bufs);
static DEFINE_PER_CPU(int, bpf_bprintf_nest_level);
-static int try_get_fmt_tmp_buf(char **tmp_buf)
+static int try_get_buffers(struct bpf_bprintf_buffers **bufs)
{
- struct bpf_bprintf_buffers *bufs;
int nest_level;
preempt_disable();
@@ -775,15 +776,14 @@ static int try_get_fmt_tmp_buf(char **tmp_buf)
preempt_enable();
return -EBUSY;
}
- bufs = this_cpu_ptr(&bpf_bprintf_bufs);
- *tmp_buf = bufs->tmp_bufs[nest_level - 1];
+ *bufs = this_cpu_ptr(&bpf_bprintf_bufs[nest_level - 1]);
return 0;
}
void bpf_bprintf_cleanup(struct bpf_bprintf_data *data)
{
- if (!data->bin_args)
+ if (!data->bin_args && !data->buf)
return;
if (WARN_ON_ONCE(this_cpu_read(bpf_bprintf_nest_level) == 0))
return;
@@ -808,7 +808,9 @@ void bpf_bprintf_cleanup(struct bpf_bprintf_data *data)
int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
u32 num_args, struct bpf_bprintf_data *data)
{
+ bool get_buffers = (data->get_bin_args && num_args) || data->get_buf;
char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end;
+ struct bpf_bprintf_buffers *buffers = NULL;
size_t sizeof_cur_arg, sizeof_cur_ip;
int err, i, num_spec = 0;
u64 cur_arg;
@@ -819,14 +821,19 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
return -EINVAL;
fmt_size = fmt_end - fmt;
- if (data->get_bin_args) {
- if (num_args && try_get_fmt_tmp_buf(&tmp_buf))
- return -EBUSY;
+ if (get_buffers && try_get_buffers(&buffers))
+ return -EBUSY;
- tmp_buf_end = tmp_buf + MAX_BPRINTF_BUF_LEN;
+ if (data->get_bin_args) {
+ if (num_args)
+ tmp_buf = buffers->bin_args;
+ tmp_buf_end = tmp_buf + MAX_BPRINTF_BIN_ARGS;
data->bin_args = (u32 *)tmp_buf;
}
+ if (data->get_buf)
+ data->buf = buffers->buf;
+
for (i = 0; i < fmt_size; i++) {
if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
err = -EINVAL;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 263a54d0d9ee..3fdde232eaa9 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -368,8 +368,6 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
return &bpf_probe_write_user_proto;
}
-static DEFINE_RAW_SPINLOCK(trace_printk_lock);
-
#define MAX_TRACE_PRINTK_VARARGS 3
#define BPF_TRACE_PRINTK_SIZE 1024
@@ -379,9 +377,8 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 };
struct bpf_bprintf_data data = {
.get_bin_args = true,
+ .get_buf = true,
};
- static char buf[BPF_TRACE_PRINTK_SIZE];
- unsigned long flags;
int ret;
ret = bpf_bprintf_prepare(fmt, fmt_size, args,
@@ -389,11 +386,9 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
if (ret < 0)
return ret;
- raw_spin_lock_irqsave(&trace_printk_lock, flags);
- ret = bstr_printf(buf, sizeof(buf), fmt, data.bin_args);
+ ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args);
- trace_bpf_trace_printk(buf);
- raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
+ trace_bpf_trace_printk(data.buf);
bpf_bprintf_cleanup(&data);
@@ -433,9 +428,8 @@ BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, args,
{
struct bpf_bprintf_data data = {
.get_bin_args = true,
+ .get_buf = true,
};
- static char buf[BPF_TRACE_PRINTK_SIZE];
- unsigned long flags;
int ret, num_args;
if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 ||
@@ -447,11 +441,9 @@ BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, args,
if (ret < 0)
return ret;
- raw_spin_lock_irqsave(&trace_printk_lock, flags);
- ret = bstr_printf(buf, sizeof(buf), fmt, data.bin_args);
+ ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args);
- trace_bpf_trace_printk(buf);
- raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
+ trace_bpf_trace_printk(data.buf);
bpf_bprintf_cleanup(&data);
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 5.15 1/4] bpf: Merge printk and seq_printf VARARG max macros
2024-02-17 12:13 [PATCH 5.15,6.1] Fixup preempt imbalance with bpf_trace_printk Thadeu Lima de Souza Cascardo
` (2 preceding siblings ...)
2024-02-17 12:13 ` [PATCH 6.1 3/3] bpf: Remove trace_printk_lock Thadeu Lima de Souza Cascardo
@ 2024-02-17 12:13 ` Thadeu Lima de Souza Cascardo
2024-02-17 12:13 ` [PATCH 5.15 2/4] bpf: Add struct for bin_args arg in bpf_bprintf_prepare Thadeu Lima de Souza Cascardo
` (3 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Thadeu Lima de Souza Cascardo @ 2024-02-17 12:13 UTC (permalink / raw)
To: stable; +Cc: cascardo, jolsa, daniel, yhs
From: Dave Marchevsky <davemarchevsky@fb.com>
commit 335ff4990cf3bfa42d8846f9b3d8c09456f51801 upstream.
MAX_SNPRINTF_VARARGS and MAX_SEQ_PRINTF_VARARGS are used by bpf helpers
bpf_snprintf and bpf_seq_printf to limit their varargs. Both call into
bpf_bprintf_prepare for print formatting logic and have convenience
macros in libbpf (BPF_SNPRINTF, BPF_SEQ_PRINTF) which use the same
helper macros to convert varargs to a byte array.
Changing shared functionality to support more varargs for either bpf
helper would affect the other as well, so let's combine the _VARARGS
macros to make this more obvious.
Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210917182911.2426606-2-davemarchevsky@fb.com
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
---
include/linux/bpf.h | 2 ++
kernel/bpf/helpers.c | 4 +---
kernel/trace/bpf_trace.c | 4 +---
3 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 00c615fc8ec3..175d623a16a1 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2286,6 +2286,8 @@ void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
struct btf_id_set;
bool btf_id_set_contains(const struct btf_id_set *set, u32 id);
+#define MAX_BPRINTF_VARARGS 12
+
int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
u32 **bin_buf, u32 num_args);
void bpf_bprintf_cleanup(void);
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 11e406ad16ae..4eb3b929504d 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -979,15 +979,13 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
return err;
}
-#define MAX_SNPRINTF_VARARGS 12
-
BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt,
const void *, data, u32, data_len)
{
int err, num_args;
u32 *bin_args;
- if (data_len % 8 || data_len > MAX_SNPRINTF_VARARGS * 8 ||
+ if (data_len % 8 || data_len > MAX_BPRINTF_VARARGS * 8 ||
(data_len && !data))
return -EINVAL;
num_args = data_len / 8;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 85a36b19c2b8..34455856c035 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -414,15 +414,13 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
return &bpf_trace_printk_proto;
}
-#define MAX_SEQ_PRINTF_VARARGS 12
-
BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
const void *, data, u32, data_len)
{
int err, num_args;
u32 *bin_args;
- if (data_len & 7 || data_len > MAX_SEQ_PRINTF_VARARGS * 8 ||
+ if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 ||
(data_len && !data))
return -EINVAL;
num_args = data_len / 8;
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 5.15 2/4] bpf: Add struct for bin_args arg in bpf_bprintf_prepare
2024-02-17 12:13 [PATCH 5.15,6.1] Fixup preempt imbalance with bpf_trace_printk Thadeu Lima de Souza Cascardo
` (3 preceding siblings ...)
2024-02-17 12:13 ` [PATCH 5.15 1/4] bpf: Merge printk and seq_printf VARARG max macros Thadeu Lima de Souza Cascardo
@ 2024-02-17 12:13 ` Thadeu Lima de Souza Cascardo
2024-02-17 12:13 ` [PATCH 5.15 3/4] bpf: Do cleanup in bpf_bprintf_cleanup only when needed Thadeu Lima de Souza Cascardo
` (2 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Thadeu Lima de Souza Cascardo @ 2024-02-17 12:13 UTC (permalink / raw)
To: stable; +Cc: cascardo, jolsa, daniel, yhs
From: Jiri Olsa <jolsa@kernel.org>
commit 78aa1cc9404399a15d2a1205329c6a06236f5378 upstream.
Adding struct bpf_bprintf_data to hold bin_args argument for
bpf_bprintf_prepare function.
We will add another return argument to bpf_bprintf_prepare and
pass the struct to bpf_bprintf_cleanup for proper cleanup in
following changes.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20221215214430.1336195-2-jolsa@kernel.org
[cascardo: there is no bpf_trace_vprintk in 5.15]
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
---
include/linux/bpf.h | 7 ++++++-
kernel/bpf/helpers.c | 24 +++++++++++++-----------
kernel/bpf/verifier.c | 3 ++-
kernel/trace/bpf_trace.c | 22 +++++++++++++---------
4 files changed, 34 insertions(+), 22 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 175d623a16a1..c738e98b426b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2288,8 +2288,13 @@ bool btf_id_set_contains(const struct btf_id_set *set, u32 id);
#define MAX_BPRINTF_VARARGS 12
+struct bpf_bprintf_data {
+ u32 *bin_args;
+ bool get_bin_args;
+};
+
int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
- u32 **bin_buf, u32 num_args);
+ u32 num_args, struct bpf_bprintf_data *data);
void bpf_bprintf_cleanup(void);
#endif /* _LINUX_BPF_H */
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 4eb3b929504d..20796e387e2c 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -752,16 +752,16 @@ void bpf_bprintf_cleanup(void)
* Returns a negative value if fmt is an invalid format string or 0 otherwise.
*
* This can be used in two ways:
- * - Format string verification only: when bin_args is NULL
+ * - Format string verification only: when data->get_bin_args is false
* - Arguments preparation: in addition to the above verification, it writes in
- * bin_args a binary representation of arguments usable by bstr_printf where
- * pointers from BPF have been sanitized.
+ * data->bin_args a binary representation of arguments usable by bstr_printf
+ * where pointers from BPF have been sanitized.
*
* In argument preparation mode, if 0 is returned, safe temporary buffers are
* allocated and bpf_bprintf_cleanup should be called to free them after use.
*/
int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
- u32 **bin_args, u32 num_args)
+ u32 num_args, struct bpf_bprintf_data *data)
{
char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end;
size_t sizeof_cur_arg, sizeof_cur_ip;
@@ -774,12 +774,12 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
return -EINVAL;
fmt_size = fmt_end - fmt;
- if (bin_args) {
+ if (data->get_bin_args) {
if (num_args && try_get_fmt_tmp_buf(&tmp_buf))
return -EBUSY;
tmp_buf_end = tmp_buf + MAX_BPRINTF_BUF_LEN;
- *bin_args = (u32 *)tmp_buf;
+ data->bin_args = (u32 *)tmp_buf;
}
for (i = 0; i < fmt_size; i++) {
@@ -980,24 +980,26 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
}
BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt,
- const void *, data, u32, data_len)
+ const void *, args, u32, data_len)
{
+ struct bpf_bprintf_data data = {
+ .get_bin_args = true,
+ };
int err, num_args;
- u32 *bin_args;
if (data_len % 8 || data_len > MAX_BPRINTF_VARARGS * 8 ||
- (data_len && !data))
+ (data_len && !args))
return -EINVAL;
num_args = data_len / 8;
/* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we
* can safely give an unbounded size.
*/
- err = bpf_bprintf_prepare(fmt, UINT_MAX, data, &bin_args, num_args);
+ err = bpf_bprintf_prepare(fmt, UINT_MAX, args, num_args, &data);
if (err < 0)
return err;
- err = bstr_printf(str, str_size, fmt, bin_args);
+ err = bstr_printf(str, str_size, fmt, data.bin_args);
bpf_bprintf_cleanup();
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 88a468cc0510..f099c5481b66 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6407,6 +6407,7 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
struct bpf_reg_state *fmt_reg = ®s[BPF_REG_3];
struct bpf_reg_state *data_len_reg = ®s[BPF_REG_5];
struct bpf_map *fmt_map = fmt_reg->map_ptr;
+ struct bpf_bprintf_data data = {};
int err, fmt_map_off, num_args;
u64 fmt_addr;
char *fmt;
@@ -6431,7 +6432,7 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
/* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
* can focus on validating the format specifiers.
*/
- err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, NULL, num_args);
+ err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, num_args, &data);
if (err < 0)
verbose(env, "Invalid format string\n");
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 34455856c035..33da2ff59b96 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -369,18 +369,20 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
u64, arg2, u64, arg3)
{
u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 };
- u32 *bin_args;
+ struct bpf_bprintf_data data = {
+ .get_bin_args = true,
+ };
static char buf[BPF_TRACE_PRINTK_SIZE];
unsigned long flags;
int ret;
- ret = bpf_bprintf_prepare(fmt, fmt_size, args, &bin_args,
- MAX_TRACE_PRINTK_VARARGS);
+ ret = bpf_bprintf_prepare(fmt, fmt_size, args,
+ MAX_TRACE_PRINTK_VARARGS, &data);
if (ret < 0)
return ret;
raw_spin_lock_irqsave(&trace_printk_lock, flags);
- ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
+ ret = bstr_printf(buf, sizeof(buf), fmt, data.bin_args);
trace_bpf_trace_printk(buf);
raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
@@ -415,21 +417,23 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
}
BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
- const void *, data, u32, data_len)
+ const void *, args, u32, data_len)
{
+ struct bpf_bprintf_data data = {
+ .get_bin_args = true,
+ };
int err, num_args;
- u32 *bin_args;
if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 ||
- (data_len && !data))
+ (data_len && !args))
return -EINVAL;
num_args = data_len / 8;
- err = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args);
+ err = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data);
if (err < 0)
return err;
- seq_bprintf(m, fmt, bin_args);
+ seq_bprintf(m, fmt, data.bin_args);
bpf_bprintf_cleanup();
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 5.15 3/4] bpf: Do cleanup in bpf_bprintf_cleanup only when needed
2024-02-17 12:13 [PATCH 5.15,6.1] Fixup preempt imbalance with bpf_trace_printk Thadeu Lima de Souza Cascardo
` (4 preceding siblings ...)
2024-02-17 12:13 ` [PATCH 5.15 2/4] bpf: Add struct for bin_args arg in bpf_bprintf_prepare Thadeu Lima de Souza Cascardo
@ 2024-02-17 12:13 ` Thadeu Lima de Souza Cascardo
2024-02-17 12:13 ` [PATCH 5.15 4/4] bpf: Remove trace_printk_lock Thadeu Lima de Souza Cascardo
2024-02-23 15:43 ` [PATCH 5.15,6.1] Fixup preempt imbalance with bpf_trace_printk Greg KH
7 siblings, 0 replies; 9+ messages in thread
From: Thadeu Lima de Souza Cascardo @ 2024-02-17 12:13 UTC (permalink / raw)
To: stable; +Cc: cascardo, jolsa, daniel, yhs
From: Jiri Olsa <jolsa@kernel.org>
commit f19a4050455aad847fb93f18dc1fe502eb60f989 upstream.
Currently we always cleanup/decrement bpf_bprintf_nest_level variable
in bpf_bprintf_cleanup if it's > 0.
There's possible scenario where this could cause a problem, when
bpf_bprintf_prepare does not get bin_args buffer (because num_args is 0)
and following bpf_bprintf_cleanup call decrements bpf_bprintf_nest_level
variable, like:
in task context:
bpf_bprintf_prepare(num_args != 0) increments 'bpf_bprintf_nest_level = 1'
-> first irq :
bpf_bprintf_prepare(num_args == 0)
bpf_bprintf_cleanup decrements 'bpf_bprintf_nest_level = 0'
-> second irq:
bpf_bprintf_prepare(num_args != 0) bpf_bprintf_nest_level = 1
gets same buffer as task context above
Adding check to bpf_bprintf_cleanup and doing the real cleanup only if we
got bin_args data in the first place.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20221215214430.1336195-3-jolsa@kernel.org
[cascardo: there is no bpf_trace_vprintk in 5.15]
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
---
include/linux/bpf.h | 2 +-
kernel/bpf/helpers.c | 16 +++++++++-------
kernel/trace/bpf_trace.c | 4 ++--
3 files changed, 12 insertions(+), 10 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c738e98b426b..d18f717b6e7e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2295,6 +2295,6 @@ struct bpf_bprintf_data {
int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
u32 num_args, struct bpf_bprintf_data *data);
-void bpf_bprintf_cleanup(void);
+void bpf_bprintf_cleanup(struct bpf_bprintf_data *data);
#endif /* _LINUX_BPF_H */
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 20796e387e2c..cfa23c7112ed 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -738,12 +738,14 @@ static int try_get_fmt_tmp_buf(char **tmp_buf)
return 0;
}
-void bpf_bprintf_cleanup(void)
+void bpf_bprintf_cleanup(struct bpf_bprintf_data *data)
{
- if (this_cpu_read(bpf_bprintf_nest_level)) {
- this_cpu_dec(bpf_bprintf_nest_level);
- preempt_enable();
- }
+ if (!data->bin_args)
+ return;
+ if (WARN_ON_ONCE(this_cpu_read(bpf_bprintf_nest_level) == 0))
+ return;
+ this_cpu_dec(bpf_bprintf_nest_level);
+ preempt_enable();
}
/*
@@ -975,7 +977,7 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
err = 0;
out:
if (err)
- bpf_bprintf_cleanup();
+ bpf_bprintf_cleanup(data);
return err;
}
@@ -1001,7 +1003,7 @@ BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt,
err = bstr_printf(str, str_size, fmt, data.bin_args);
- bpf_bprintf_cleanup();
+ bpf_bprintf_cleanup(&data);
return err + 1;
}
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 33da2ff59b96..2bc85a87f020 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -387,7 +387,7 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
trace_bpf_trace_printk(buf);
raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
- bpf_bprintf_cleanup();
+ bpf_bprintf_cleanup(&data);
return ret;
}
@@ -435,7 +435,7 @@ BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
seq_bprintf(m, fmt, data.bin_args);
- bpf_bprintf_cleanup();
+ bpf_bprintf_cleanup(&data);
return seq_has_overflowed(m) ? -EOVERFLOW : 0;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 5.15 4/4] bpf: Remove trace_printk_lock
2024-02-17 12:13 [PATCH 5.15,6.1] Fixup preempt imbalance with bpf_trace_printk Thadeu Lima de Souza Cascardo
` (5 preceding siblings ...)
2024-02-17 12:13 ` [PATCH 5.15 3/4] bpf: Do cleanup in bpf_bprintf_cleanup only when needed Thadeu Lima de Souza Cascardo
@ 2024-02-17 12:13 ` Thadeu Lima de Souza Cascardo
2024-02-23 15:43 ` [PATCH 5.15,6.1] Fixup preempt imbalance with bpf_trace_printk Greg KH
7 siblings, 0 replies; 9+ messages in thread
From: Thadeu Lima de Souza Cascardo @ 2024-02-17 12:13 UTC (permalink / raw)
To: stable; +Cc: cascardo, jolsa, daniel, yhs
From: Jiri Olsa <jolsa@kernel.org>
commit e2bb9e01d589f7fa82573aedd2765ff9b277816a upstream.
Both bpf_trace_printk and bpf_trace_vprintk helpers use static buffer guarded
with trace_printk_lock spin lock.
The spin lock contention causes issues with bpf programs attached to
contention_begin tracepoint [1][2].
Andrii suggested we could get rid of the contention by using trylock, but we
could actually get rid of the spinlock completely by using percpu buffers the
same way as for bin_args in bpf_bprintf_prepare function.
Adding new return 'buf' argument to struct bpf_bprintf_data and making
bpf_bprintf_prepare to return also the buffer for printk helpers.
[1] https://lore.kernel.org/bpf/CACkBjsakT_yWxnSWr4r-0TpPvbKm9-OBmVUhJb7hV3hY8fdCkw@mail.gmail.com/
[2] https://lore.kernel.org/bpf/CACkBjsaCsTovQHFfkqJKto6S4Z8d02ud1D7MPESrHa1cVNNTrw@mail.gmail.com/
Reported-by: Hao Sun <sunhao.th@gmail.com>
Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20221215214430.1336195-4-jolsa@kernel.org
[cascardo: there is no bpf_trace_vprintk in 5.15]
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
---
include/linux/bpf.h | 3 +++
kernel/bpf/helpers.c | 31 +++++++++++++++++++------------
kernel/trace/bpf_trace.c | 11 +++--------
3 files changed, 25 insertions(+), 20 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d18f717b6e7e..ef8fc639a575 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2287,10 +2287,13 @@ struct btf_id_set;
bool btf_id_set_contains(const struct btf_id_set *set, u32 id);
#define MAX_BPRINTF_VARARGS 12
+#define MAX_BPRINTF_BUF 1024
struct bpf_bprintf_data {
u32 *bin_args;
+ char *buf;
bool get_bin_args;
+ bool get_buf;
};
int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index cfa23c7112ed..c8827d1ff3c5 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -710,19 +710,20 @@ static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
/* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary
* arguments representation.
*/
-#define MAX_BPRINTF_BUF_LEN 512
+#define MAX_BPRINTF_BIN_ARGS 512
/* Support executing three nested bprintf helper calls on a given CPU */
#define MAX_BPRINTF_NEST_LEVEL 3
struct bpf_bprintf_buffers {
- char tmp_bufs[MAX_BPRINTF_NEST_LEVEL][MAX_BPRINTF_BUF_LEN];
+ char bin_args[MAX_BPRINTF_BIN_ARGS];
+ char buf[MAX_BPRINTF_BUF];
};
-static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs);
+
+static DEFINE_PER_CPU(struct bpf_bprintf_buffers[MAX_BPRINTF_NEST_LEVEL], bpf_bprintf_bufs);
static DEFINE_PER_CPU(int, bpf_bprintf_nest_level);
-static int try_get_fmt_tmp_buf(char **tmp_buf)
+static int try_get_buffers(struct bpf_bprintf_buffers **bufs)
{
- struct bpf_bprintf_buffers *bufs;
int nest_level;
preempt_disable();
@@ -732,15 +733,14 @@ static int try_get_fmt_tmp_buf(char **tmp_buf)
preempt_enable();
return -EBUSY;
}
- bufs = this_cpu_ptr(&bpf_bprintf_bufs);
- *tmp_buf = bufs->tmp_bufs[nest_level - 1];
+ *bufs = this_cpu_ptr(&bpf_bprintf_bufs[nest_level - 1]);
return 0;
}
void bpf_bprintf_cleanup(struct bpf_bprintf_data *data)
{
- if (!data->bin_args)
+ if (!data->bin_args && !data->buf)
return;
if (WARN_ON_ONCE(this_cpu_read(bpf_bprintf_nest_level) == 0))
return;
@@ -765,7 +765,9 @@ void bpf_bprintf_cleanup(struct bpf_bprintf_data *data)
int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
u32 num_args, struct bpf_bprintf_data *data)
{
+ bool get_buffers = (data->get_bin_args && num_args) || data->get_buf;
char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end;
+ struct bpf_bprintf_buffers *buffers = NULL;
size_t sizeof_cur_arg, sizeof_cur_ip;
int err, i, num_spec = 0;
u64 cur_arg;
@@ -776,14 +778,19 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
return -EINVAL;
fmt_size = fmt_end - fmt;
- if (data->get_bin_args) {
- if (num_args && try_get_fmt_tmp_buf(&tmp_buf))
- return -EBUSY;
+ if (get_buffers && try_get_buffers(&buffers))
+ return -EBUSY;
- tmp_buf_end = tmp_buf + MAX_BPRINTF_BUF_LEN;
+ if (data->get_bin_args) {
+ if (num_args)
+ tmp_buf = buffers->bin_args;
+ tmp_buf_end = tmp_buf + MAX_BPRINTF_BIN_ARGS;
data->bin_args = (u32 *)tmp_buf;
}
+ if (data->get_buf)
+ data->buf = buffers->buf;
+
for (i = 0; i < fmt_size; i++) {
if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
err = -EINVAL;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 2bc85a87f020..a1dc0ff1962e 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -360,8 +360,6 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
return &bpf_probe_write_user_proto;
}
-static DEFINE_RAW_SPINLOCK(trace_printk_lock);
-
#define MAX_TRACE_PRINTK_VARARGS 3
#define BPF_TRACE_PRINTK_SIZE 1024
@@ -371,9 +369,8 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 };
struct bpf_bprintf_data data = {
.get_bin_args = true,
+ .get_buf = true,
};
- static char buf[BPF_TRACE_PRINTK_SIZE];
- unsigned long flags;
int ret;
ret = bpf_bprintf_prepare(fmt, fmt_size, args,
@@ -381,11 +378,9 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
if (ret < 0)
return ret;
- raw_spin_lock_irqsave(&trace_printk_lock, flags);
- ret = bstr_printf(buf, sizeof(buf), fmt, data.bin_args);
+ ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args);
- trace_bpf_trace_printk(buf);
- raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
+ trace_bpf_trace_printk(data.buf);
bpf_bprintf_cleanup(&data);
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* Re: [PATCH 5.15,6.1] Fixup preempt imbalance with bpf_trace_printk
2024-02-17 12:13 [PATCH 5.15,6.1] Fixup preempt imbalance with bpf_trace_printk Thadeu Lima de Souza Cascardo
` (6 preceding siblings ...)
2024-02-17 12:13 ` [PATCH 5.15 4/4] bpf: Remove trace_printk_lock Thadeu Lima de Souza Cascardo
@ 2024-02-23 15:43 ` Greg KH
7 siblings, 0 replies; 9+ messages in thread
From: Greg KH @ 2024-02-23 15:43 UTC (permalink / raw)
To: Thadeu Lima de Souza Cascardo; +Cc: stable, jolsa, daniel, yhs
On Sat, Feb 17, 2024 at 09:13:14AM -0300, Thadeu Lima de Souza Cascardo wrote:
> When bpf_trace_printk is called without any args in a second depth level,
> it will enable preemption without disabling it.
>
> These patch series fix this for 5.15 and 6.1. The fix was introduced in
> 6.3, so later kernels already have it. And 5.10 and earlier did not have
> the code that disabled preemption, so they are fine in that regard.
>
> This was tested by attaching a bpf program doing a non-0 arguments
> trace_printk at sys_enter and a 0 arguments snprintf at local_timer_entry.
>
> Dave Marchevsky (1):
> bpf: Merge printk and seq_printf VARARG max macros
>
> Jiri Olsa (3):
> bpf: Add struct for bin_args arg in bpf_bprintf_prepare
> bpf: Do cleanup in bpf_bprintf_cleanup only when needed
> bpf: Remove trace_printk_lock
>
> include/linux/bpf.h | 14 ++++++--
> kernel/bpf/helpers.c | 71 ++++++++++++++++++++++------------------
> kernel/bpf/verifier.c | 3 +-
> kernel/trace/bpf_trace.c | 39 ++++++++++------------
> 4 files changed, 72 insertions(+), 55 deletions(-)
>
> --
> 2.34.1
>
>
All now queued up, thanks!
greg k-h
^ permalink raw reply [flat|nested] 9+ messages in thread