* [PATCHv2 bpf-next] bpf: Remove trace_printk_lock
@ 2022-12-14 10:04 Jiri Olsa
2022-12-14 16:55 ` Yonghong Song
` (2 more replies)
0 siblings, 3 replies; 5+ messages in thread
From: Jiri Olsa @ 2022-12-14 10:04 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
Cc: Hao Sun, bpf, Martin KaFai Lau, Song Liu, Yonghong Song,
John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo
Both bpf_trace_printk and bpf_trace_vprintk helpers use static buffer
guarded with trace_printk_lock spin lock.
The spin lock contention causes issues with bpf programs attached to
contention_begin tracepoint [1] [2].
Andrii suggested we could get rid of the contention by using trylock,
but we could actually get rid of the spinlock completely by using
percpu buffers the same way as for bin_args in bpf_bprintf_prepare
function.
Adding 4 per cpu buffers (1k each) which should be enough for all
possible nesting contexts (normal, softirq, irq, nmi) or possible
(yet unlikely) probe within the printk helpers.
In very unlikely case we'd run out of the nesting levels the printk
will be omitted.
[1] https://lore.kernel.org/bpf/CACkBjsakT_yWxnSWr4r-0TpPvbKm9-OBmVUhJb7hV3hY8fdCkw@mail.gmail.com/
[2] https://lore.kernel.org/bpf/CACkBjsaCsTovQHFfkqJKto6S4Z8d02ud1D7MPESrHa1cVNNTrw@mail.gmail.com/
Reported-by: Hao Sun <sunhao.th@gmail.com>
Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
v2 changes:
- changed subject [Yonghong]
- added WARN_ON_ONCE to get_printk_buf [Song]
kernel/trace/bpf_trace.c | 61 +++++++++++++++++++++++++++++++---------
1 file changed, 47 insertions(+), 14 deletions(-)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 3bbd3f0c810c..a992b5a47fd6 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -369,33 +369,62 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
return &bpf_probe_write_user_proto;
}
-static DEFINE_RAW_SPINLOCK(trace_printk_lock);
-
#define MAX_TRACE_PRINTK_VARARGS 3
#define BPF_TRACE_PRINTK_SIZE 1024
+#define BPF_TRACE_PRINTK_LEVELS 4
+
+struct trace_printk_buf {
+ char data[BPF_TRACE_PRINTK_LEVELS][BPF_TRACE_PRINTK_SIZE];
+ int level;
+};
+static DEFINE_PER_CPU(struct trace_printk_buf, printk_buf);
+
+static void put_printk_buf(struct trace_printk_buf __percpu *buf)
+{
+ if (WARN_ON_ONCE(this_cpu_read(buf->level) == 0))
+ return;
+ this_cpu_dec(buf->level);
+ preempt_enable();
+}
+
+static bool get_printk_buf(struct trace_printk_buf __percpu *buf, char **data)
+{
+ int level;
+
+ preempt_disable();
+ level = this_cpu_inc_return(buf->level);
+ if (WARN_ON_ONCE(level > BPF_TRACE_PRINTK_LEVELS)) {
+ put_printk_buf(buf);
+ return false;
+ }
+ *data = (char *) this_cpu_ptr(&buf->data[level - 1]);
+ return true;
+}
BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
u64, arg2, u64, arg3)
{
u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 };
u32 *bin_args;
- static char buf[BPF_TRACE_PRINTK_SIZE];
- unsigned long flags;
+ char *buf;
int ret;
+ if (!get_printk_buf(&printk_buf, &buf))
+ return -EBUSY;
+
ret = bpf_bprintf_prepare(fmt, fmt_size, args, &bin_args,
MAX_TRACE_PRINTK_VARARGS);
if (ret < 0)
- return ret;
+ goto out;
- raw_spin_lock_irqsave(&trace_printk_lock, flags);
- ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
+ ret = bstr_printf(buf, BPF_TRACE_PRINTK_SIZE, fmt, bin_args);
trace_bpf_trace_printk(buf);
- raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
bpf_bprintf_cleanup();
+out:
+ put_printk_buf(&printk_buf);
return ret;
}
@@ -427,31 +456,35 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
return &bpf_trace_printk_proto;
}
+static DEFINE_PER_CPU(struct trace_printk_buf, vprintk_buf);
+
BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, data,
u32, data_len)
{
- static char buf[BPF_TRACE_PRINTK_SIZE];
- unsigned long flags;
int ret, num_args;
u32 *bin_args;
+ char *buf;
if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 ||
(data_len && !data))
return -EINVAL;
num_args = data_len / 8;
+ if (!get_printk_buf(&vprintk_buf, &buf))
+ return -EBUSY;
+
ret = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args);
if (ret < 0)
- return ret;
+ goto out;
- raw_spin_lock_irqsave(&trace_printk_lock, flags);
- ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
+ ret = bstr_printf(buf, BPF_TRACE_PRINTK_SIZE, fmt, bin_args);
trace_bpf_trace_printk(buf);
- raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
bpf_bprintf_cleanup();
+out:
+ put_printk_buf(&vprintk_buf);
return ret;
}
--
2.38.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCHv2 bpf-next] bpf: Remove trace_printk_lock
2022-12-14 10:04 [PATCHv2 bpf-next] bpf: Remove trace_printk_lock Jiri Olsa
@ 2022-12-14 16:55 ` Yonghong Song
2022-12-14 17:51 ` Song Liu
2022-12-14 19:48 ` Andrii Nakryiko
2 siblings, 0 replies; 5+ messages in thread
From: Yonghong Song @ 2022-12-14 16:55 UTC (permalink / raw)
To: Jiri Olsa, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
Cc: Hao Sun, bpf, Martin KaFai Lau, Song Liu, Yonghong Song,
John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo
On 12/14/22 2:04 AM, Jiri Olsa wrote:
> Both bpf_trace_printk and bpf_trace_vprintk helpers use static buffer
> guarded with trace_printk_lock spin lock.
>
> The spin lock contention causes issues with bpf programs attached to
> contention_begin tracepoint [1] [2].
>
> Andrii suggested we could get rid of the contention by using trylock,
> but we could actually get rid of the spinlock completely by using
> percpu buffers the same way as for bin_args in bpf_bprintf_prepare
> function.
>
> Adding 4 per cpu buffers (1k each) which should be enough for all
> possible nesting contexts (normal, softirq, irq, nmi) or possible
> (yet unlikely) probe within the printk helpers.
>
> In very unlikely case we'd run out of the nesting levels the printk
> will be omitted.
>
> [1] https://lore.kernel.org/bpf/CACkBjsakT_yWxnSWr4r-0TpPvbKm9-OBmVUhJb7hV3hY8fdCkw@mail.gmail.com/
> [2] https://lore.kernel.org/bpf/CACkBjsaCsTovQHFfkqJKto6S4Z8d02ud1D7MPESrHa1cVNNTrw@mail.gmail.com/
>
> Reported-by: Hao Sun <sunhao.th@gmail.com>
> Suggested-by: Andrii Nakryiko <andrii@kernel.org>
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---
> v2 changes:
> - changed subject [Yonghong]
> - added WARN_ON_ONCE to get_printk_buf [Song]
>
> kernel/trace/bpf_trace.c | 61 +++++++++++++++++++++++++++++++---------
> 1 file changed, 47 insertions(+), 14 deletions(-)
Ack with a nit below.
Acked-by: Yonghong Song <yhs@fb.com>
>
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 3bbd3f0c810c..a992b5a47fd6 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -369,33 +369,62 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
> return &bpf_probe_write_user_proto;
> }
>
> -static DEFINE_RAW_SPINLOCK(trace_printk_lock);
> -
> #define MAX_TRACE_PRINTK_VARARGS 3
> #define BPF_TRACE_PRINTK_SIZE 1024
> +#define BPF_TRACE_PRINTK_LEVELS 4
> +
> +struct trace_printk_buf {
> + char data[BPF_TRACE_PRINTK_LEVELS][BPF_TRACE_PRINTK_SIZE];
> + int level;
> +};
> +static DEFINE_PER_CPU(struct trace_printk_buf, printk_buf);
> +
> +static void put_printk_buf(struct trace_printk_buf __percpu *buf)
> +{
> + if (WARN_ON_ONCE(this_cpu_read(buf->level) == 0))
> + return;
The above WARN_ON_ONCE is not needed as it never happens based on
implementation. There are a few other similar cases in bpf_trace.c and
none of them has WARN_ON_ONCE.
> + this_cpu_dec(buf->level);
> + preempt_enable();
> +}
> +
> +static bool get_printk_buf(struct trace_printk_buf __percpu *buf, char **data)
> +{
> + int level;
> +
> + preempt_disable();
> + level = this_cpu_inc_return(buf->level);
> + if (WARN_ON_ONCE(level > BPF_TRACE_PRINTK_LEVELS)) {
> + put_printk_buf(buf);
> + return false;
> + }
> + *data = (char *) this_cpu_ptr(&buf->data[level - 1]);
> + return true;
> +}
>
[...]
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCHv2 bpf-next] bpf: Remove trace_printk_lock
2022-12-14 10:04 [PATCHv2 bpf-next] bpf: Remove trace_printk_lock Jiri Olsa
2022-12-14 16:55 ` Yonghong Song
@ 2022-12-14 17:51 ` Song Liu
2022-12-14 19:48 ` Andrii Nakryiko
2 siblings, 0 replies; 5+ messages in thread
From: Song Liu @ 2022-12-14 17:51 UTC (permalink / raw)
To: Jiri Olsa
Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, Hao Sun,
bpf, Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
KP Singh, Stanislav Fomichev, Hao Luo
On Wed, Dec 14, 2022 at 2:04 AM Jiri Olsa <jolsa@kernel.org> wrote:
>
> Both bpf_trace_printk and bpf_trace_vprintk helpers use static buffer
> guarded with trace_printk_lock spin lock.
>
> The spin lock contention causes issues with bpf programs attached to
> contention_begin tracepoint [1] [2].
>
> Andrii suggested we could get rid of the contention by using trylock,
> but we could actually get rid of the spinlock completely by using
> percpu buffers the same way as for bin_args in bpf_bprintf_prepare
> function.
>
> Adding 4 per cpu buffers (1k each) which should be enough for all
> possible nesting contexts (normal, softirq, irq, nmi) or possible
> (yet unlikely) probe within the printk helpers.
>
> In very unlikely case we'd run out of the nesting levels the printk
> will be omitted.
>
> [1] https://lore.kernel.org/bpf/CACkBjsakT_yWxnSWr4r-0TpPvbKm9-OBmVUhJb7hV3hY8fdCkw@mail.gmail.com/
> [2] https://lore.kernel.org/bpf/CACkBjsaCsTovQHFfkqJKto6S4Z8d02ud1D7MPESrHa1cVNNTrw@mail.gmail.com/
>
> Reported-by: Hao Sun <sunhao.th@gmail.com>
> Suggested-by: Andrii Nakryiko <andrii@kernel.org>
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Song Liu <song@kernel.org>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCHv2 bpf-next] bpf: Remove trace_printk_lock
2022-12-14 10:04 [PATCHv2 bpf-next] bpf: Remove trace_printk_lock Jiri Olsa
2022-12-14 16:55 ` Yonghong Song
2022-12-14 17:51 ` Song Liu
@ 2022-12-14 19:48 ` Andrii Nakryiko
2022-12-14 23:48 ` Jiri Olsa
2 siblings, 1 reply; 5+ messages in thread
From: Andrii Nakryiko @ 2022-12-14 19:48 UTC (permalink / raw)
To: Jiri Olsa
Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, Hao Sun,
bpf, Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
KP Singh, Stanislav Fomichev, Hao Luo
On Wed, Dec 14, 2022 at 2:04 AM Jiri Olsa <jolsa@kernel.org> wrote:
>
> Both bpf_trace_printk and bpf_trace_vprintk helpers use static buffer
> guarded with trace_printk_lock spin lock.
>
> The spin lock contention causes issues with bpf programs attached to
> contention_begin tracepoint [1] [2].
>
> Andrii suggested we could get rid of the contention by using trylock,
> but we could actually get rid of the spinlock completely by using
> percpu buffers the same way as for bin_args in bpf_bprintf_prepare
> function.
>
> Adding 4 per cpu buffers (1k each) which should be enough for all
> possible nesting contexts (normal, softirq, irq, nmi) or possible
> (yet unlikely) probe within the printk helpers.
>
> In very unlikely case we'd run out of the nesting levels the printk
> will be omitted.
>
> [1] https://lore.kernel.org/bpf/CACkBjsakT_yWxnSWr4r-0TpPvbKm9-OBmVUhJb7hV3hY8fdCkw@mail.gmail.com/
> [2] https://lore.kernel.org/bpf/CACkBjsaCsTovQHFfkqJKto6S4Z8d02ud1D7MPESrHa1cVNNTrw@mail.gmail.com/
>
> Reported-by: Hao Sun <sunhao.th@gmail.com>
> Suggested-by: Andrii Nakryiko <andrii@kernel.org>
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---
> v2 changes:
> - changed subject [Yonghong]
> - added WARN_ON_ONCE to get_printk_buf [Song]
>
> kernel/trace/bpf_trace.c | 61 +++++++++++++++++++++++++++++++---------
> 1 file changed, 47 insertions(+), 14 deletions(-)
>
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 3bbd3f0c810c..a992b5a47fd6 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -369,33 +369,62 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
> return &bpf_probe_write_user_proto;
> }
>
> -static DEFINE_RAW_SPINLOCK(trace_printk_lock);
> -
> #define MAX_TRACE_PRINTK_VARARGS 3
> #define BPF_TRACE_PRINTK_SIZE 1024
> +#define BPF_TRACE_PRINTK_LEVELS 4
> +
> +struct trace_printk_buf {
> + char data[BPF_TRACE_PRINTK_LEVELS][BPF_TRACE_PRINTK_SIZE];
> + int level;
> +};
> +static DEFINE_PER_CPU(struct trace_printk_buf, printk_buf);
> +
> +static void put_printk_buf(struct trace_printk_buf __percpu *buf)
> +{
> + if (WARN_ON_ONCE(this_cpu_read(buf->level) == 0))
> + return;
> + this_cpu_dec(buf->level);
> + preempt_enable();
> +}
> +
> +static bool get_printk_buf(struct trace_printk_buf __percpu *buf, char **data)
> +{
> + int level;
> +
> + preempt_disable();
> + level = this_cpu_inc_return(buf->level);
> + if (WARN_ON_ONCE(level > BPF_TRACE_PRINTK_LEVELS)) {
> + put_printk_buf(buf);
> + return false;
> + }
> + *data = (char *) this_cpu_ptr(&buf->data[level - 1]);
> + return true;
> +}
>
> BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
> u64, arg2, u64, arg3)
> {
> u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 };
> u32 *bin_args;
> - static char buf[BPF_TRACE_PRINTK_SIZE];
> - unsigned long flags;
> + char *buf;
> int ret;
>
> + if (!get_printk_buf(&printk_buf, &buf))
> + return -EBUSY;
> +
> ret = bpf_bprintf_prepare(fmt, fmt_size, args, &bin_args,
> MAX_TRACE_PRINTK_VARARGS);
> if (ret < 0)
> - return ret;
> + goto out;
>
> - raw_spin_lock_irqsave(&trace_printk_lock, flags);
> - ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
> + ret = bstr_printf(buf, BPF_TRACE_PRINTK_SIZE, fmt, bin_args);
>
> trace_bpf_trace_printk(buf);
> - raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
>
> bpf_bprintf_cleanup();
>
> +out:
> + put_printk_buf(&printk_buf);
> return ret;
> }
>
> @@ -427,31 +456,35 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
> return &bpf_trace_printk_proto;
> }
>
> +static DEFINE_PER_CPU(struct trace_printk_buf, vprintk_buf);
> +
> BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, data,
> u32, data_len)
> {
> - static char buf[BPF_TRACE_PRINTK_SIZE];
> - unsigned long flags;
> int ret, num_args;
> u32 *bin_args;
> + char *buf;
>
> if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 ||
> (data_len && !data))
> return -EINVAL;
> num_args = data_len / 8;
>
> + if (!get_printk_buf(&vprintk_buf, &buf))
> + return -EBUSY;
> +
> ret = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args);
> if (ret < 0)
> - return ret;
> + goto out;
>
> - raw_spin_lock_irqsave(&trace_printk_lock, flags);
> - ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
> + ret = bstr_printf(buf, BPF_TRACE_PRINTK_SIZE, fmt, bin_args);
>
> trace_bpf_trace_printk(buf);
> - raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
>
> bpf_bprintf_cleanup();
I checked what this is doing. And it seems like we have a very similar
thing there already, with preempt_disable(), 3-level buffers, etc.
Would it make sense to roll this new change into bpf_bprintf_prepare()
and make it return the buffer for bpf_trace_printk(), even if it is
not used for bpf_snprintf() ?
>
> +out:
> + put_printk_buf(&vprintk_buf);
> return ret;
> }
>
> --
> 2.38.1
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCHv2 bpf-next] bpf: Remove trace_printk_lock
2022-12-14 19:48 ` Andrii Nakryiko
@ 2022-12-14 23:48 ` Jiri Olsa
0 siblings, 0 replies; 5+ messages in thread
From: Jiri Olsa @ 2022-12-14 23:48 UTC (permalink / raw)
To: Andrii Nakryiko
Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, Hao Sun,
bpf, Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
KP Singh, Stanislav Fomichev, Hao Luo
On Wed, Dec 14, 2022 at 11:48:06AM -0800, Andrii Nakryiko wrote:
SNIP
> >
> > - raw_spin_lock_irqsave(&trace_printk_lock, flags);
> > - ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
> > + ret = bstr_printf(buf, BPF_TRACE_PRINTK_SIZE, fmt, bin_args);
> >
> > trace_bpf_trace_printk(buf);
> > - raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
> >
> > bpf_bprintf_cleanup();
>
> I checked what this is doing. And it seems like we have a very similar
> thing there already, with preempt_disable(), 3-level buffers, etc.
> Would it make sense to roll this new change into bpf_bprintf_prepare()
> and make it return the buffer for bpf_trace_printk(), even if it is
> not used for bpf_snprintf() ?
I thought adding another arg to bpf_bprintf_prepare would be too much,
but it actually does not look that bad
I'll do some more testing and send another version
jirka
---
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 3de24cfb7a3d..da5733f15994 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2795,9 +2795,10 @@ struct btf_id_set;
bool btf_id_set_contains(const struct btf_id_set *set, u32 id);
#define MAX_BPRINTF_VARARGS 12
+#define MAX_PRINTF_BUF 1024
int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
- u32 **bin_buf, u32 num_args);
+ u32 **bin_buf, char **buf, u32 num_args);
void bpf_bprintf_cleanup(void);
/* the implementation of the opaque uapi struct bpf_dynptr */
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index af30c6cbd65d..e2c1e573401b 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -756,19 +756,20 @@ static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
/* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary
* arguments representation.
*/
-#define MAX_BPRINTF_BUF_LEN 512
+#define MAX_PRINTF_BIN_ARGS 512
/* Support executing three nested bprintf helper calls on a given CPU */
#define MAX_BPRINTF_NEST_LEVEL 3
struct bpf_bprintf_buffers {
- char tmp_bufs[MAX_BPRINTF_NEST_LEVEL][MAX_BPRINTF_BUF_LEN];
+ char bin_args[MAX_PRINTF_BIN_ARGS];
+ char buf[MAX_PRINTF_BUF];
};
-static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs);
+
+static DEFINE_PER_CPU(struct bpf_bprintf_buffers[MAX_BPRINTF_NEST_LEVEL], bpf_bprintf_bufs);
static DEFINE_PER_CPU(int, bpf_bprintf_nest_level);
-static int try_get_fmt_tmp_buf(char **tmp_buf)
+static int try_get_buffers(struct bpf_bprintf_buffers **bufs)
{
- struct bpf_bprintf_buffers *bufs;
int nest_level;
preempt_disable();
@@ -778,9 +779,7 @@ static int try_get_fmt_tmp_buf(char **tmp_buf)
preempt_enable();
return -EBUSY;
}
- bufs = this_cpu_ptr(&bpf_bprintf_bufs);
- *tmp_buf = bufs->tmp_bufs[nest_level - 1];
-
+ *bufs = this_cpu_ptr(&bpf_bprintf_bufs[nest_level]);
return 0;
}
@@ -807,27 +806,33 @@ void bpf_bprintf_cleanup(void)
* allocated and bpf_bprintf_cleanup should be called to free them after use.
*/
int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
- u32 **bin_args, u32 num_args)
+ u32 **bin_args, char **buf, u32 num_args)
{
char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end;
+ struct bpf_bprintf_buffers *buffers = NULL;
size_t sizeof_cur_arg, sizeof_cur_ip;
int err, i, num_spec = 0;
u64 cur_arg;
char fmt_ptype, cur_ip[16], ip_spec[] = "%pXX";
+ bool get_buffers = bin_args || buf;
fmt_end = strnchr(fmt, fmt_size, 0);
if (!fmt_end)
return -EINVAL;
fmt_size = fmt_end - fmt;
- if (bin_args) {
- if (num_args && try_get_fmt_tmp_buf(&tmp_buf))
- return -EBUSY;
+ if (get_buffers && try_get_buffers(&buffers))
+ return -EBUSY;
- tmp_buf_end = tmp_buf + MAX_BPRINTF_BUF_LEN;
+ if (bin_args) {
+ tmp_buf = buffers->bin_args;
+ tmp_buf_end = tmp_buf + MAX_PRINTF_BIN_ARGS;
*bin_args = (u32 *)tmp_buf;
}
+ if (buf)
+ *buf = buffers->buf;
+
for (i = 0; i < fmt_size; i++) {
if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
err = -EINVAL;
@@ -1020,7 +1025,7 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
err = 0;
out:
- if (err)
+ if (err && get_buffers)
bpf_bprintf_cleanup();
return err;
}
@@ -1039,7 +1044,7 @@ BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt,
/* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we
* can safely give an unbounded size.
*/
- err = bpf_bprintf_prepare(fmt, UINT_MAX, data, &bin_args, num_args);
+ err = bpf_bprintf_prepare(fmt, UINT_MAX, data, &bin_args, NULL, num_args);
if (err < 0)
return err;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a5255a0dcbb6..798f65c532b1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7636,7 +7636,7 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
/* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
* can focus on validating the format specifiers.
*/
- err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, NULL, num_args);
+ err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, NULL, NULL, num_args);
if (err < 0)
verbose(env, "Invalid format string\n");
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 3bbd3f0c810c..7a6a07b2180e 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -369,8 +369,6 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
return &bpf_probe_write_user_proto;
}
-static DEFINE_RAW_SPINLOCK(trace_printk_lock);
-
#define MAX_TRACE_PRINTK_VARARGS 3
#define BPF_TRACE_PRINTK_SIZE 1024
@@ -379,20 +377,17 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
{
u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 };
u32 *bin_args;
- static char buf[BPF_TRACE_PRINTK_SIZE];
- unsigned long flags;
+ char *buf;
int ret;
- ret = bpf_bprintf_prepare(fmt, fmt_size, args, &bin_args,
+ ret = bpf_bprintf_prepare(fmt, fmt_size, args, &bin_args, &buf,
MAX_TRACE_PRINTK_VARARGS);
if (ret < 0)
return ret;
- raw_spin_lock_irqsave(&trace_printk_lock, flags);
- ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
+ ret = bstr_printf(buf, MAX_PRINTF_BUF, fmt, bin_args);
trace_bpf_trace_printk(buf);
- raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
bpf_bprintf_cleanup();
@@ -430,25 +425,22 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, data,
u32, data_len)
{
- static char buf[BPF_TRACE_PRINTK_SIZE];
- unsigned long flags;
int ret, num_args;
u32 *bin_args;
+ char *buf;
if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 ||
(data_len && !data))
return -EINVAL;
num_args = data_len / 8;
- ret = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args);
+ ret = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, &buf, num_args);
if (ret < 0)
return ret;
- raw_spin_lock_irqsave(&trace_printk_lock, flags);
- ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
+ ret = bstr_printf(buf, MAX_PRINTF_BUF, fmt, bin_args);
trace_bpf_trace_printk(buf);
- raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
bpf_bprintf_cleanup();
@@ -482,7 +474,7 @@ BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
return -EINVAL;
num_args = data_len / 8;
- err = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args);
+ err = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, NULL, num_args);
if (err < 0)
return err;
^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2022-12-14 23:48 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-12-14 10:04 [PATCHv2 bpf-next] bpf: Remove trace_printk_lock Jiri Olsa
2022-12-14 16:55 ` Yonghong Song
2022-12-14 17:51 ` Song Liu
2022-12-14 19:48 ` Andrii Nakryiko
2022-12-14 23:48 ` Jiri Olsa
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox