Linux userland API discussions
 help / color / mirror / Atom feed
* [PATCH bpf-next v5 1/9] bpf: Extend BPF syscall with common attributes support
From: Leon Hwang @ 2026-01-12 14:56 UTC (permalink / raw)
  To: bpf
  Cc: Alexei Starovoitov, Daniel Borkmann, John Fastabend,
	Andrii Nakryiko, Martin KaFai Lau, Eduard Zingerman, Song Liu,
	Yonghong Song, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Shuah Khan, Christian Brauner, Seth Forshee, Yuichiro Tsuji,
	Andrey Albershteyn, Leon Hwang, Willem de Bruijn, Jason Xing,
	Tao Chen, Mykyta Yatsenko, Kumar Kartikeya Dwivedi,
	Anton Protopopov, Amery Hung, Rong Tao, linux-kernel, linux-api,
	linux-kselftest, kernel-patches-bot
In-Reply-To: <20260112145616.44195-1-leon.hwang@linux.dev>

Extend the BPF syscall to support a set of common attributes shared
across all BPF commands:

1. 'log_buf': User-provided buffer for storing logs.
2. 'log_size': Size of the log buffer.
3. 'log_level': Log verbosity level.
4. 'log_true_size': The size of log reported by kernel.

These common attributes are passed as the 4th argument to the BPF
syscall, with the 5th argument specifying the size of this structure.

To indicate the use of these common attributes from userspace, a new flag
'BPF_COMMON_ATTRS' ('1 << 16') is introduced. This flag is OR-ed into the
'cmd' field of the syscall.

When 'cmd & BPF_COMMON_ATTRS' is set, the kernel will copy the common
attributes from userspace into kernel space for use.

Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 include/linux/syscalls.h       |  3 ++-
 include/uapi/linux/bpf.h       |  8 ++++++++
 kernel/bpf/syscall.c           | 25 +++++++++++++++++++++----
 tools/include/uapi/linux/bpf.h |  8 ++++++++
 4 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index cf84d98964b2..729659202d77 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -937,7 +937,8 @@ asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
 asmlinkage long sys_getrandom(char __user *buf, size_t count,
 			      unsigned int flags);
 asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags);
-asmlinkage long sys_bpf(int cmd, union bpf_attr __user *attr, unsigned int size);
+asmlinkage long sys_bpf(int cmd, union bpf_attr __user *attr, unsigned int size,
+			struct bpf_common_attr __user *attr_common, unsigned int size_common);
 asmlinkage long sys_execveat(int dfd, const char __user *filename,
 			const char __user *const __user *argv,
 			const char __user *const __user *envp, int flags);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2a2ade4be60f..2f83eca0a357 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -986,6 +986,7 @@ enum bpf_cmd {
 	BPF_PROG_STREAM_READ_BY_FD,
 	BPF_PROG_ASSOC_STRUCT_OPS,
 	__MAX_BPF_CMD,
+	BPF_COMMON_ATTRS = 1 << 16, /* Indicate carrying bpf_common_attr. */
 };
 
 enum bpf_map_type {
@@ -1491,6 +1492,13 @@ struct bpf_stack_build_id {
 	};
 };
 
+struct bpf_common_attr {
+	__u64 log_buf;
+	__u32 log_size;
+	__u32 log_level;
+	__u32 log_true_size;
+};
+
 #define BPF_OBJ_NAME_LEN 16U
 
 enum {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ecc0929ce462..af703f7ea58e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -6163,8 +6163,10 @@ static int prog_assoc_struct_ops(union bpf_attr *attr)
 	return ret;
 }
 
-static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
+static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
+		     bpfptr_t uattr_common, unsigned int size_common)
 {
+	struct bpf_common_attr common_attr;
 	union bpf_attr attr;
 	int err;
 
@@ -6178,6 +6180,20 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
 	if (copy_from_bpfptr(&attr, uattr, size) != 0)
 		return -EFAULT;
 
+	memset(&common_attr, 0, sizeof(common_attr));
+	if (cmd & BPF_COMMON_ATTRS) {
+		err = bpf_check_uarg_tail_zero(uattr_common, sizeof(common_attr), size_common);
+		if (err)
+			return err;
+
+		cmd &= ~BPF_COMMON_ATTRS;
+		size_common = min_t(u32, size_common, sizeof(common_attr));
+		if (copy_from_bpfptr(&common_attr, uattr_common, size_common) != 0)
+			return -EFAULT;
+	} else {
+		size_common = 0;
+	}
+
 	err = security_bpf(cmd, &attr, size, uattr.is_kernel);
 	if (err < 0)
 		return err;
@@ -6313,9 +6329,10 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
 	return err;
 }
 
-SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
+SYSCALL_DEFINE5(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size,
+		struct bpf_common_attr __user *, uattr_common, unsigned int, size_common)
 {
-	return __sys_bpf(cmd, USER_BPFPTR(uattr), size);
+	return __sys_bpf(cmd, USER_BPFPTR(uattr), size, USER_BPFPTR(uattr_common), size_common);
 }
 
 static bool syscall_prog_is_valid_access(int off, int size,
@@ -6346,7 +6363,7 @@ BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size)
 	default:
 		return -EINVAL;
 	}
-	return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size);
+	return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size, KERNEL_BPFPTR(NULL), 0);
 }
 
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b816bc53d2e1..2b05c689d51a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -986,6 +986,7 @@ enum bpf_cmd {
 	BPF_PROG_STREAM_READ_BY_FD,
 	BPF_PROG_ASSOC_STRUCT_OPS,
 	__MAX_BPF_CMD,
+	BPF_COMMON_ATTRS = 1 << 16, /* Indicate carrying bpf_common_attr. */
 };
 
 enum bpf_map_type {
@@ -1491,6 +1492,13 @@ struct bpf_stack_build_id {
 	};
 };
 
+struct bpf_common_attr {
+	__u64 log_buf;
+	__u32 log_size;
+	__u32 log_level;
+	__u32 log_true_size;
+};
+
 #define BPF_OBJ_NAME_LEN 16U
 
 enum {
-- 
2.52.0


^ permalink raw reply related

* [PATCH bpf-next v5 0/9] bpf: Extend BPF syscall with common attributes support
From: Leon Hwang @ 2026-01-12 14:56 UTC (permalink / raw)
  To: bpf
  Cc: Alexei Starovoitov, Daniel Borkmann, John Fastabend,
	Andrii Nakryiko, Martin KaFai Lau, Eduard Zingerman, Song Liu,
	Yonghong Song, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Shuah Khan, Christian Brauner, Seth Forshee, Yuichiro Tsuji,
	Andrey Albershteyn, Leon Hwang, Willem de Bruijn, Jason Xing,
	Tao Chen, Mykyta Yatsenko, Kumar Kartikeya Dwivedi,
	Anton Protopopov, Amery Hung, Rong Tao, linux-kernel, linux-api,
	linux-kselftest, kernel-patches-bot

This patch series builds upon the discussion in
"[PATCH bpf-next v4 0/4] bpf: Improve error reporting for freplace attachment failure" [1].

This patch series introduces support for *common attributes* in the BPF
syscall, providing a unified mechanism for passing shared metadata across
all BPF commands.

The initial set of common attributes includes:

1. 'log_buf': User-provided buffer for storing log output.
2. 'log_size': Size of the provided log buffer.
3. 'log_level': Verbosity level for logging.
4. 'log_true_size': The size of log reported by kernel.

With this extension, the BPF syscall will be able to return meaningful
error messages (e.g., failures of creating map), improving debuggability
and user experience.

Links:
[1] https://lore.kernel.org/bpf/20250224153352.64689-1-leon.hwang@linux.dev/

Changes:
v4 -> v5:
* Rework reporting 'log_true_size' for prog_load, btf_load, and map_create
  (per Alexei).

RFC v3 -> v4:
* Drop RFC.
* Address comments from Andrii:
  * Add parentheses in 'sys_bpf_ext()'.
  * Avoid creating new fd in 'probe_sys_bpf_ext()'.
  * Add a new struct to wrap log fields in libbpf.
* Address comments from Alexei:
  * Do not skip writing to user space when log_true_size is zero.
  * Do not use 'bool' arguments.
  * Drop the adding WARN_ON_ONCE()'s.

RFC v2 -> RFC v3:
* Rename probe_sys_bpf_extended to probe_sys_bpf_ext.
* Refactor reporting 'log_true_size' for prog_load.
* Refactor reporting 'btf_log_true_size' for btf_load.
* Add warnings for internal bugs in map_create.
* Check log_true_size in test cases.
* Address comment from Alexei:
  * Change kvzalloc/kvfree to kzalloc/kfree.
* Address comments from Andrii:
  * Move BPF_COMMON_ATTRS to 'enum bpf_cmd' alongside brief comment.
  * Add bpf_check_uarg_tail_zero() for extra checks.
  * Rename sys_bpf_extended to sys_bpf_ext.
  * Rename sys_bpf_fd_extended to sys_bpf_ext_fd.
  * Probe the new feature using NULL and -EFAULT.
  * Move probe_sys_bpf_ext to libbpf_internal.h and drop LIBBPF_API.
  * Return -EUSERS when log attrs are conflict between bpf_attr and
    bpf_common_attr.
  * Avoid touching bpf_vlog_init().
  * Update the reason messages in map_create.
  * Finalize the log using __cleanup().
  * Report log size to users.
  * Change type of log_buf from '__u64' to 'const char *' and cast type
    using ptr_to_u64() in bpf_map_create().
  * Do not return -EOPNOTSUPP when kernel doesn't support this feature
    in bpf_map_create().
  * Add log_level support for map creation for consistency.
* Address comment from Eduard:
  * Use common_attrs->log_level instead of BPF_LOG_FIXED.

RFC v1 -> RFC v2:
* Fix build error reported by test bot.
* Address comments from Alexei:
  * Drop new uapi for freplace.
  * Add common attributes support for prog_load and btf_load.
  * Add common attributes support for map_create.

Leon Hwang (9):
  bpf: Extend BPF syscall with common attributes support
  libbpf: Add support for extended bpf syscall
  bpf: Refactor reporting log_true_size for prog_load
  bpf: Add syscall common attributes support for prog_load
  bpf: Refactor reporting btf_log_true_size for btf_load
  bpf: Add syscall common attributes support for btf_load
  bpf: Add syscall common attributes support for map_create
  libbpf: Add common attr support for map_create
  selftests/bpf: Add tests to verify map create failure log

 include/linux/bpf.h                           |  19 +-
 include/linux/bpf_verifier.h                  |  17 ++
 include/linux/btf.h                           |   3 +-
 include/linux/syscalls.h                      |   3 +-
 include/uapi/linux/bpf.h                      |   8 +
 kernel/bpf/btf.c                              |  32 +---
 kernel/bpf/log.c                              | 103 +++++++++++
 kernel/bpf/syscall.c                          | 122 ++++++++++---
 kernel/bpf/verifier.c                         |  19 +-
 tools/include/uapi/linux/bpf.h                |   8 +
 tools/lib/bpf/bpf.c                           |  49 ++++-
 tools/lib/bpf/bpf.h                           |  17 +-
 tools/lib/bpf/features.c                      |   8 +
 tools/lib/bpf/libbpf_internal.h               |   3 +
 .../selftests/bpf/prog_tests/map_init.c       | 168 ++++++++++++++++++
 15 files changed, 518 insertions(+), 61 deletions(-)

--
2.52.0

^ permalink raw reply

* Re: [PATCH net-next] net: uapi: Provide an UAPI definition of 'struct sockaddr'
From: Florian Weimer @ 2026-01-12 13:37 UTC (permalink / raw)
  To: Thomas Weißschuh
  Cc: Arnd Bergmann, Jakub Kicinski, Eric Dumazet, Kuniyuki Iwashima,
	Paolo Abeni, Willem de Bruijn, Netdev, linux-kernel, linux-api
In-Reply-To: <20260112143158-efc74534-0283-4db1-812f-402794eb8844@linutronix.de>

* Thomas Weißschuh:

> On Mon, Jan 12, 2026 at 02:25:25PM +0100, Florian Weimer wrote:
>> * Thomas Weißschuh:
>> 
>> >> If you call the data member sa_data just like glibc, it will only fail
>> >> in C++, not C.  GCC considers the two definitions sufficiently
>> >> equivalent (even though glibc adds a may_alias attribute to meet POSIX
>> >> requirements), and duplicate definitions are permitted in C.
>> >
>> > clang is not so lenient and will error out.
>> 
>> It seems it accepts it if you switch to C23 mode.
>
> The currently supported baseline for UAPI headers is C90.
> We can't really force userspace to switch here.

Including libc and UAPI headers at the same time is still officially
unsupported, right?

We don't test for it, so lots of combinations do not work.

Thanks,
Florian


^ permalink raw reply

* Re: [PATCH net-next] net: uapi: Provide an UAPI definition of 'struct sockaddr'
From: Thomas Weißschuh @ 2026-01-12 13:33 UTC (permalink / raw)
  To: Florian Weimer
  Cc: Arnd Bergmann, Jakub Kicinski, Eric Dumazet, Kuniyuki Iwashima,
	Paolo Abeni, Willem de Bruijn, Netdev, linux-kernel, linux-api
In-Reply-To: <lhutswrj73u.fsf@oldenburg.str.redhat.com>

On Mon, Jan 12, 2026 at 02:25:25PM +0100, Florian Weimer wrote:
> * Thomas Weißschuh:
> 
> >> If you call the data member sa_data just like glibc, it will only fail
> >> in C++, not C.  GCC considers the two definitions sufficiently
> >> equivalent (even though glibc adds a may_alias attribute to meet POSIX
> >> requirements), and duplicate definitions are permitted in C.
> >
> > clang is not so lenient and will error out.
> 
> It seems it accepts it if you switch to C23 mode.

The currently supported baseline for UAPI headers is C90.
We can't really force userspace to switch here.


Thomas

^ permalink raw reply

* Re: [PATCH net-next] net: uapi: Provide an UAPI definition of 'struct sockaddr'
From: Florian Weimer @ 2026-01-12 13:25 UTC (permalink / raw)
  To: Thomas Weißschuh
  Cc: Arnd Bergmann, Jakub Kicinski, Eric Dumazet, Kuniyuki Iwashima,
	Paolo Abeni, Willem de Bruijn, Netdev, linux-kernel, linux-api
In-Reply-To: <20260112124604-dbf7f68d-2182-438f-9495-2931cac02a81@linutronix.de>

* Thomas Weißschuh:

>> If you call the data member sa_data just like glibc, it will only fail
>> in C++, not C.  GCC considers the two definitions sufficiently
>> equivalent (even though glibc adds a may_alias attribute to meet POSIX
>> requirements), and duplicate definitions are permitted in C.
>
> clang is not so lenient and will error out.

It seems it accepts it if you switch to C23 mode.

Thanks,
Florian


^ permalink raw reply

* Re: [PATCH net-next] net: uapi: Provide an UAPI definition of 'struct sockaddr'
From: Thomas Weißschuh @ 2026-01-12 11:55 UTC (permalink / raw)
  To: Florian Weimer, Arnd Bergmann
  Cc: Jakub Kicinski, Eric Dumazet, Kuniyuki Iwashima, Paolo Abeni,
	Willem de Bruijn, Netdev, linux-kernel, linux-api
In-Reply-To: <lhu7btnkqg6.fsf@oldenburg.str.redhat.com>

On Mon, Jan 12, 2026 at 12:42:17PM +0100, Florian Weimer wrote:
> * Arnd Bergmann:
> 
> > On Wed, Jan 7, 2026, at 00:13, Jakub Kicinski wrote:
> >> On Tue, 6 Jan 2026 11:32:52 +0100 Thomas Weißschuh wrote:
> >>> As for the failure in netdev CI however I am not so sure.
> >>> Looking at net-next-2026-01-05--12-00, the only failures triggered by my
> >>> change are also the ones from the bpf-ci. Are these the ones you meant,
> >>> or am I missing some others?
> >>
> >> Multiple things broke at once so slightly hard to fish the relevant
> >> stuff out from here:
> >>
> >> https://netdev.bots.linux.dev/contest.html?branch=net-next-2026-01-05--15-00&pass=0&pw-n=0
> >>
> >> Here's one:
> >>
> >> make[1]: Entering directory 
> >> '/home/virtme/testing/wt-3/tools/testing/selftests/net'
> >>   CC       busy_poller
> >> In file included from [01m[K/usr/include/sys/socket.h:33[m[K,
> >>                  from [01m[K/usr/include/netinet/in.h:23[m[K,
> >>                  from [01m[K/usr/include/arpa/inet.h:22[m[K,
> >>                  from [01m[Kbusy_poller.c:14[m[K:
> >> [01m[K/usr/include/bits/socket.h:182:8:[m[K [01;31m[Kerror: 
> >> [m[Kredefinition of '[01m[Kstruct sockaddr[m[K'
> >
> >>                  from [01m[Kbusy_poller.c:12[m[K:
> >> [01m[K/home/virtme/testing/wt-3/usr/include/linux/socket.h:37:8:[m[K 
> >> [01;36m[Knote: [m[Koriginally defined here
> >
> > Maybe we can change all the instances of 'struct sockaddr' in
> > include/uapi/ to reference a new 'struct __kernel_sockaddr',
> > and then redirect that one if the libc header got included
> > first?
> >
> > struct __kernel_sockaddr {
> >        __kernel_sa_family_t    sa_family;      /* address family, AF_xxx       */
> >        char sa_data_min[14];           /* Minimum 14 bytes of protocol address */
> > };
> > #ifdef _SYS_SOCKET_H
> > #define __kernel_sockaddr sockaddr
> > #endif

I'm not a big fan of such a define in a generic header.

I do have a v2 of this patch currently in 0day. It reorders the inclusions
in the affected selftests. While it feels like a hack, interspersing the
different types of headers may already break randomly due to issues in
libc-compat.h (see below)

> > This will still fail when a user application includes linux/if.h
> > before sys/socket.h and then expects the structures in linux/if.h
> > to contain the libc version of sockaddr, but hopefully that is
> > much rarer. A survey of codesearch.debian.net shows almost all
> > users of linux/if.h first including sys/socket.h, and most of
> > them not caring about struct sockaddr either.

The whole linux/libc-compat.h machinery is brittle when UAPI headers are
included before libc headers. It will only detect the included libc headers
on its first inclusion. If overlapping libc and UAPI after that, they will
run into symbol clashes.

> If you call the data member sa_data just like glibc, it will only fail
> in C++, not C.  GCC considers the two definitions sufficiently
> equivalent (even though glibc adds a may_alias attribute to meet POSIX
> requirements), and duplicate definitions are permitted in C.

clang is not so lenient and will error out.

> C++ with modules will probably support duplicate definitions, too, but I
> haven't checked if it's possible to get this work with GCC 16.


Thomas

^ permalink raw reply

* Re: [PATCH net-next] net: uapi: Provide an UAPI definition of 'struct sockaddr'
From: Florian Weimer @ 2026-01-12 11:42 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Jakub Kicinski, Thomas Weißschuh, Eric Dumazet,
	Kuniyuki Iwashima, Paolo Abeni, Willem de Bruijn, Netdev,
	linux-kernel, linux-api
In-Reply-To: <06cf1396-c100-45ba-8b46-edb4ed4feb62@app.fastmail.com>

* Arnd Bergmann:

> On Wed, Jan 7, 2026, at 00:13, Jakub Kicinski wrote:
>> On Tue, 6 Jan 2026 11:32:52 +0100 Thomas Weißschuh wrote:
>>> As for the failure in netdev CI however I am not so sure.
>>> Looking at net-next-2026-01-05--12-00, the only failures triggered by my
>>> change are also the ones from the bpf-ci. Are these the ones you meant,
>>> or am I missing some others?
>>
>> Multiple things broke at once so slightly hard to fish the relevant
>> stuff out from here:
>>
>> https://netdev.bots.linux.dev/contest.html?branch=net-next-2026-01-05--15-00&pass=0&pw-n=0
>>
>> Here's one:
>>
>> make[1]: Entering directory 
>> '/home/virtme/testing/wt-3/tools/testing/selftests/net'
>>   CC       busy_poller
>> In file included from [01m[K/usr/include/sys/socket.h:33[m[K,
>>                  from [01m[K/usr/include/netinet/in.h:23[m[K,
>>                  from [01m[K/usr/include/arpa/inet.h:22[m[K,
>>                  from [01m[Kbusy_poller.c:14[m[K:
>> [01m[K/usr/include/bits/socket.h:182:8:[m[K [01;31m[Kerror: 
>> [m[Kredefinition of '[01m[Kstruct sockaddr[m[K'
>
>>                  from [01m[Kbusy_poller.c:12[m[K:
>> [01m[K/home/virtme/testing/wt-3/usr/include/linux/socket.h:37:8:[m[K 
>> [01;36m[Knote: [m[Koriginally defined here
>
> Maybe we can change all the instances of 'struct sockaddr' in
> include/uapi/ to reference a new 'struct __kernel_sockaddr',
> and then redirect that one if the libc header got included
> first?
>
> struct __kernel_sockaddr {
>        __kernel_sa_family_t    sa_family;      /* address family, AF_xxx       */
>        char sa_data_min[14];           /* Minimum 14 bytes of protocol address */
> };
> #ifdef _SYS_SOCKET_H
> #define __kernel_sockaddr sockaddr
> #endif
>
> This will still fail when a user application includes linux/if.h
> before sys/socket.h and then expects the structures in linux/if.h
> to contain the libc version of sockaddr, but hopefully that is
> much rarer. A survey of codesearch.debian.net shows almost all
> users of linux/if.h first including sys/socket.h, and most of
> them not caring about struct sockaddr either.

If you call the data member sa_data just like glibc, it will only fail
in C++, not C.  GCC considers the two definitions sufficiently
equivalent (even though glibc adds a may_alias attribute to meet POSIX
requirements), and duplicate definitions are permitted in C.

C++ with modules will probably support duplicate definitions, too, but I
haven't checked if it's possible to get this work with GCC 16.

Thanks,
Florian


^ permalink raw reply

* Re: [RESEND PATCH bpf-next v4 7/9] bpf: Add common attr support for map_create
From: Alexei Starovoitov @ 2026-01-09 21:08 UTC (permalink / raw)
  To: Leon Hwang
  Cc: bot+bpf-ci, bpf, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Martin KaFai Lau, Eduard, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, Jiri Olsa, Shuah Khan, Christian Brauner, Oleg Nesterov,
	sforshee, yuichtsu, aalbersh, Willem de Bruijn, Jason Xing,
	Paul Chaignon, Mykyta Yatsenko, Kumar Kartikeya Dwivedi,
	Anton Protopopov, Amery Hung, Rong Tao, LKML, Linux API,
	open list:KERNEL SELFTEST FRAMEWORK, kernel-patches-bot,
	Martin KaFai Lau, Chris Mason, Ihor Solodrai
In-Reply-To: <a936f459-a7a1-431d-8ef8-cae5cf4d2196@linux.dev>

On Tue, Jan 6, 2026 at 10:32 PM Leon Hwang <leon.hwang@linux.dev> wrote:
>
>
>
> On 7/1/26 14:20, Alexei Starovoitov wrote:
> > On Tue, Jan 6, 2026 at 9:57 PM Leon Hwang <leon.hwang@linux.dev> wrote:
> >>
> >>
> >>
> >> On 7/1/26 01:44, bot+bpf-ci@kernel.org wrote:
> >>>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> >>>> index 14fc5738f2b9..e64cc7504731 100644
> >>>> --- a/kernel/bpf/syscall.c
> >>>> +++ b/kernel/bpf/syscall.c
> >>>
> >>> [ ... ]
> >>>
> >>>> @@ -6279,7 +6345,11 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
> >>>>
> >>>>      switch (cmd) {
> >>>>      case BPF_MAP_CREATE:
> >>>> -            err = map_create(&attr, uattr);
> >>>> +            common_attrs.log_true_size = 0;
> >>>> +            err = map_create(&attr, uattr, &common_attrs);
> >>>> +            ret = copy_common_attr_log_true_size(uattr_common, size_common,
> >>>> +                                                 &common_attrs.log_true_size);
> >>>> +            err = ret ? ret : err;
> >>>
> >>> When map_create() succeeds, it returns a file descriptor that is already
> >>> installed in the caller's fd table via bpf_map_new_fd(). If
> >>> copy_common_attr_log_true_size() then fails (e.g., user provided a
> >>> read-only buffer for uattr_common), the syscall returns -EFAULT but the
> >>> fd remains installed.
> >>>
> >>> Could this leak the file descriptor? The user gets an error and has no
> >>> way to know what fd number was allocated, so they cannot close it.
> >>>
> >>
> >> Good catch — you’re right.
> >>
> >> If 'map_create()' succeeds and 'copy_common_attr_log_true_size()' later
> >> fails (e.g. returning -EFAULT), the newly created file descriptor would
> >> remain installed and could be leaked.
> >>
> >> I’ll fix this in the next revision by explicitly closing the fd when
> >> ret is non-zero.
> >
> > No. The refactoring was wrong. Don't make the kernel do extra work.
> > Patch 3 introduced a bug and closing fd is not a solution.
> > Such a pattern can be exploited for DoS.
>
> You’re right — closing the fd after the fact is not the correct
> solution, and introducing extra work in the kernel is undesirable. Doing
> so could also open the door to DoS-style abuse.
>
> The correct approach is to copy log_true_size into common_attrs
> before allocating and installing the new fd, so that a failure in
> copying cannot leave behind a partially created object.

Why move it at all?
I don't think you should be moving
copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, log_true_size),
                                  &log_true_size, sizeof(log_true_size))

from where it is in verifier.c

^ permalink raw reply

* Re: [PATCH net-next] net: uapi: Provide an UAPI definition of 'struct sockaddr'
From: Arnd Bergmann @ 2026-01-09 12:56 UTC (permalink / raw)
  To: Jakub Kicinski, Thomas Weißschuh
  Cc: Eric Dumazet, Kuniyuki Iwashima, Paolo Abeni, Willem de Bruijn,
	Netdev, linux-kernel, linux-api
In-Reply-To: <20260106151313.1f8bd508@kernel.org>

On Wed, Jan 7, 2026, at 00:13, Jakub Kicinski wrote:
> On Tue, 6 Jan 2026 11:32:52 +0100 Thomas Weißschuh wrote:
>> As for the failure in netdev CI however I am not so sure.
>> Looking at net-next-2026-01-05--12-00, the only failures triggered by my
>> change are also the ones from the bpf-ci. Are these the ones you meant,
>> or am I missing some others?
>
> Multiple things broke at once so slightly hard to fish the relevant
> stuff out from here:
>
> https://netdev.bots.linux.dev/contest.html?branch=net-next-2026-01-05--15-00&pass=0&pw-n=0
>
> Here's one:
>
> make[1]: Entering directory 
> '/home/virtme/testing/wt-3/tools/testing/selftests/net'
>   CC       busy_poller
> In file included from [01m[K/usr/include/sys/socket.h:33[m[K,
>                  from [01m[K/usr/include/netinet/in.h:23[m[K,
>                  from [01m[K/usr/include/arpa/inet.h:22[m[K,
>                  from [01m[Kbusy_poller.c:14[m[K:
> [01m[K/usr/include/bits/socket.h:182:8:[m[K [01;31m[Kerror: 
> [m[Kredefinition of '[01m[Kstruct sockaddr[m[K'

>                  from [01m[Kbusy_poller.c:12[m[K:
> [01m[K/home/virtme/testing/wt-3/usr/include/linux/socket.h:37:8:[m[K 
> [01;36m[Knote: [m[Koriginally defined here

Maybe we can change all the instances of 'struct sockaddr' in
include/uapi/ to reference a new 'struct __kernel_sockaddr',
and then redirect that one if the libc header got included
first?

struct __kernel_sockaddr {
       __kernel_sa_family_t    sa_family;      /* address family, AF_xxx       */
       char sa_data_min[14];           /* Minimum 14 bytes of protocol address */
};
#ifdef _SYS_SOCKET_H
#define __kernel_sockaddr sockaddr
#endif

This will still fail when a user application includes linux/if.h
before sys/socket.h and then expects the structures in linux/if.h
to contain the libc version of sockaddr, but hopefully that is
much rarer. A survey of codesearch.debian.net shows almost all
users of linux/if.h first including sys/socket.h, and most of
them not caring about struct sockaddr either.

      Arnd

^ permalink raw reply

* Re: [PATCH v8 00/18] Live Update Orchestrator
From: Jason Gunthorpe @ 2026-01-07 18:54 UTC (permalink / raw)
  To: Pasha Tatashin
  Cc: pratyush, jasonmiu, graf, rppt, dmatlack, rientjes, corbet,
	rdunlap, ilpo.jarvinen, kanie, ojeda, aliceryhl, masahiroy, akpm,
	tj, yoann.congal, mmaurer, roman.gushchin, chenridong, axboe,
	mark.rutland, jannh, vincent.guittot, hannes, dan.j.williams,
	david, joel.granados, rostedt, anna.schumaker, song, linux,
	linux-kernel, linux-doc, linux-mm, gregkh, tglx, mingo, bp,
	dave.hansen, x86, hpa, rafael, dakr, bartosz.golaszewski,
	cw00.choi, myungjoo.ham, yesanishhere, Jonathan.Cameron,
	quic_zijuhu, aleksander.lobakin, ira.weiny, andriy.shevchenko,
	leon, lukas, bhelgaas, wagi, djeffery, stuart.w.hayes, ptyadav,
	lennart, brauner, linux-api, linux-fsdevel, saeedm, ajayachandra,
	parav, leonro, witu, hughd, skhawaja, chrisl
In-Reply-To: <20251125165850.3389713-1-pasha.tatashin@soleen.com>

On Tue, Nov 25, 2025 at 11:58:30AM -0500, Pasha Tatashin wrote:
> Andrew: This series has been fully reviewed, and contains minimal
> changes compared to what is currently being tested in linux-next
> diff between v7 and v8 can be viewe, here: [8]

I didn't closely audit everything, but the overal design and operation
looks right to me:

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>

Jason

^ permalink raw reply

* Re: [PATCH v8 14/18] mm: memfd_luo: allow preserving memfd
From: Jason Gunthorpe @ 2026-01-07 18:54 UTC (permalink / raw)
  To: Pasha Tatashin
  Cc: pratyush, jasonmiu, graf, rppt, dmatlack, rientjes, corbet,
	rdunlap, ilpo.jarvinen, kanie, ojeda, aliceryhl, masahiroy, akpm,
	tj, yoann.congal, mmaurer, roman.gushchin, chenridong, axboe,
	mark.rutland, jannh, vincent.guittot, hannes, dan.j.williams,
	david, joel.granados, rostedt, anna.schumaker, song, linux,
	linux-kernel, linux-doc, linux-mm, gregkh, tglx, mingo, bp,
	dave.hansen, x86, hpa, rafael, dakr, bartosz.golaszewski,
	cw00.choi, myungjoo.ham, yesanishhere, Jonathan.Cameron,
	quic_zijuhu, aleksander.lobakin, ira.weiny, andriy.shevchenko,
	leon, lukas, bhelgaas, wagi, djeffery, stuart.w.hayes, ptyadav,
	lennart, brauner, linux-api, linux-fsdevel, saeedm, ajayachandra,
	parav, leonro, witu, hughd, skhawaja, chrisl
In-Reply-To: <20251125165850.3389713-15-pasha.tatashin@soleen.com>

On Tue, Nov 25, 2025 at 11:58:44AM -0500, Pasha Tatashin wrote:
> From: Pratyush Yadav <ptyadav@amazon.de>
> 
> The ability to preserve a memfd allows userspace to use KHO and LUO to
> transfer its memory contents to the next kernel. This is useful in many
> ways. For one, it can be used with IOMMUFD as the backing store for
> IOMMU page tables. Preserving IOMMUFD is essential for performing a
> hypervisor live update with passthrough devices. memfd support provides
> the first building block for making that possible.

I would lead with the use of memfd to back the guest memory pages for
use with KVM :)

Jason

^ permalink raw reply

* Re: [PATCH v8 05/18] liveupdate: luo_core: add user interface
From: Jason Gunthorpe @ 2026-01-07 18:27 UTC (permalink / raw)
  To: Pasha Tatashin
  Cc: pratyush, jasonmiu, graf, rppt, dmatlack, rientjes, corbet,
	rdunlap, ilpo.jarvinen, kanie, ojeda, aliceryhl, masahiroy, akpm,
	tj, yoann.congal, mmaurer, roman.gushchin, chenridong, axboe,
	mark.rutland, jannh, vincent.guittot, hannes, dan.j.williams,
	david, joel.granados, rostedt, anna.schumaker, song, linux,
	linux-kernel, linux-doc, linux-mm, gregkh, tglx, mingo, bp,
	dave.hansen, x86, hpa, rafael, dakr, bartosz.golaszewski,
	cw00.choi, myungjoo.ham, yesanishhere, Jonathan.Cameron,
	quic_zijuhu, aleksander.lobakin, ira.weiny, andriy.shevchenko,
	leon, lukas, bhelgaas, wagi, djeffery, stuart.w.hayes, ptyadav,
	lennart, brauner, linux-api, linux-fsdevel, saeedm, ajayachandra,
	parav, leonro, witu, hughd, skhawaja, chrisl
In-Reply-To: <20251125165850.3389713-6-pasha.tatashin@soleen.com>

On Tue, Nov 25, 2025 at 11:58:35AM -0500, Pasha Tatashin wrote:
> +struct liveupdate_ioctl_create_session {
> +	__u32		size;
> +	__s32		fd;
> +	__u8		name[LIVEUPDATE_SESSION_NAME_LENGTH];
> +};

IMHO I would use

 __u32 name_len;
 __u32 reserved;
 __aligned_u64 name_uptr;

And then have the kernel copy_from_user() the name into kernel
memory. That way you avoid making LIVEUPDATE_SESSION_NAME_LENGTH into
strict ABI.

I have also been marking the output members with out_, so:

	__s32		out_fd;

for example

Jason

^ permalink raw reply

* Re: [PATCH v8 04/18] liveupdate: luo_session: add sessions support
From: Jason Gunthorpe @ 2026-01-07 18:20 UTC (permalink / raw)
  To: Pasha Tatashin
  Cc: pratyush, jasonmiu, graf, rppt, dmatlack, rientjes, corbet,
	rdunlap, ilpo.jarvinen, kanie, ojeda, aliceryhl, masahiroy, akpm,
	tj, yoann.congal, mmaurer, roman.gushchin, chenridong, axboe,
	mark.rutland, jannh, vincent.guittot, hannes, dan.j.williams,
	david, joel.granados, rostedt, anna.schumaker, song, linux,
	linux-kernel, linux-doc, linux-mm, gregkh, tglx, mingo, bp,
	dave.hansen, x86, hpa, rafael, dakr, bartosz.golaszewski,
	cw00.choi, myungjoo.ham, yesanishhere, Jonathan.Cameron,
	quic_zijuhu, aleksander.lobakin, ira.weiny, andriy.shevchenko,
	leon, lukas, bhelgaas, wagi, djeffery, stuart.w.hayes, ptyadav,
	lennart, brauner, linux-api, linux-fsdevel, saeedm, ajayachandra,
	parav, leonro, witu, hughd, skhawaja, chrisl
In-Reply-To: <20251125165850.3389713-5-pasha.tatashin@soleen.com>

On Tue, Nov 25, 2025 at 11:58:34AM -0500, Pasha Tatashin wrote:
> +/* Create a "struct file" for session */
> +static int luo_session_getfile(struct luo_session *session, struct file **filep)
> +{
> +	char name_buf[128];
> +	struct file *file;
> +
> +	lockdep_assert_held(&session->mutex);
> +	snprintf(name_buf, sizeof(name_buf), "[luo_session] %s", session->name);
> +	file = anon_inode_getfile(name_buf, &luo_session_fops, session, O_RDWR);
> +	if (IS_ERR(file))
> +		return PTR_ERR(file);
> +
> +	*filep = file;
> +
> +	return 0;
> +}

This is a bit odd, I'd expect it to return the file * not int ?

> +int luo_session_create(const char *name, struct file **filep)
> +{

Here too

> +	struct luo_session *session;
> +	int err;
> +
> +	session = luo_session_alloc(name);
> +	if (IS_ERR(session))
> +		return PTR_ERR(session);
> +
> +	err = luo_session_insert(&luo_session_global.outgoing, session);
> +	if (err)
> +		goto err_free;
> +
> +	scoped_guard(mutex, &session->mutex)
> +		err = luo_session_getfile(session, filep);

Is it style guide to have {} around scoped_guard's body?

> +int luo_session_retrieve(const char *name, struct file **filep)
> +{

Also here

Jason

^ permalink raw reply

* Re: [PATCH 7/6] fs: improve comment in fserror_alloc_event
From: Jan Kara @ 2026-01-07  9:19 UTC (permalink / raw)
  To: Darrick J. Wong
  Cc: jack, brauner, linux-api, hch, linux-ext4, linux-xfs,
	linux-fsdevel, gabriel, amir73il
In-Reply-To: <20260106233349.GL191501@frogsfrogsfrogs>

On Tue 06-01-26 15:33:49, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> Document the ordering requirements between SB_ACTIVE and
> s_pending_errors in the new fserror code.
> 
> Cc: jack@suse.cz
> Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>

Thanks! I guess this will be folded into the original patch but just in
case:

Reviewed-by: Jan Kara <jack@suse.cz>

								Honza

> ---
>  fs/fserror.c |    5 +++++
>  1 file changed, 5 insertions(+)
> 
> diff --git a/fs/fserror.c b/fs/fserror.c
> index ec92f5a6db59ce..06ca86adab9b76 100644
> --- a/fs/fserror.c
> +++ b/fs/fserror.c
> @@ -79,6 +79,11 @@ static inline struct fserror_event *fserror_alloc_event(struct super_block *sb,
>  	 * If pending_errors already reached zero or is no longer active,
>  	 * the superblock is being deactivated so there's no point in
>  	 * continuing.
> +	 *
> +	 * The order of the check of s_pending_errors and SB_ACTIVE are
> +	 * mandated by order of accesses in generic_shutdown_super and
> +	 * fserror_unmount.  Barriers are implicitly provided by the refcount
> +	 * manipulations in this function and fserror_unmount.
>  	 */
>  	if (!refcount_inc_not_zero(&sb->s_pending_errors))
>  		return NULL;
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply

* Re: [RESEND PATCH bpf-next v4 7/9] bpf: Add common attr support for map_create
From: Leon Hwang @ 2026-01-07  6:31 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: bot+bpf-ci, bpf, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Martin KaFai Lau, Eduard, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, Jiri Olsa, Shuah Khan, Christian Brauner, Oleg Nesterov,
	sforshee, yuichtsu, aalbersh, Willem de Bruijn, Jason Xing,
	Paul Chaignon, Mykyta Yatsenko, Kumar Kartikeya Dwivedi,
	Anton Protopopov, Amery Hung, Rong Tao, LKML, Linux API,
	open list:KERNEL SELFTEST FRAMEWORK, kernel-patches-bot,
	Martin KaFai Lau, Chris Mason, Ihor Solodrai
In-Reply-To: <CAADnVQJLifBhcpe5ci7FBB2uzTR5OXPji5RPq2NLSoVXpTfScg@mail.gmail.com>



On 7/1/26 14:20, Alexei Starovoitov wrote:
> On Tue, Jan 6, 2026 at 9:57 PM Leon Hwang <leon.hwang@linux.dev> wrote:
>>
>>
>>
>> On 7/1/26 01:44, bot+bpf-ci@kernel.org wrote:
>>>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
>>>> index 14fc5738f2b9..e64cc7504731 100644
>>>> --- a/kernel/bpf/syscall.c
>>>> +++ b/kernel/bpf/syscall.c
>>>
>>> [ ... ]
>>>
>>>> @@ -6279,7 +6345,11 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
>>>>
>>>>      switch (cmd) {
>>>>      case BPF_MAP_CREATE:
>>>> -            err = map_create(&attr, uattr);
>>>> +            common_attrs.log_true_size = 0;
>>>> +            err = map_create(&attr, uattr, &common_attrs);
>>>> +            ret = copy_common_attr_log_true_size(uattr_common, size_common,
>>>> +                                                 &common_attrs.log_true_size);
>>>> +            err = ret ? ret : err;
>>>
>>> When map_create() succeeds, it returns a file descriptor that is already
>>> installed in the caller's fd table via bpf_map_new_fd(). If
>>> copy_common_attr_log_true_size() then fails (e.g., user provided a
>>> read-only buffer for uattr_common), the syscall returns -EFAULT but the
>>> fd remains installed.
>>>
>>> Could this leak the file descriptor? The user gets an error and has no
>>> way to know what fd number was allocated, so they cannot close it.
>>>
>>
>> Good catch — you’re right.
>>
>> If 'map_create()' succeeds and 'copy_common_attr_log_true_size()' later
>> fails (e.g. returning -EFAULT), the newly created file descriptor would
>> remain installed and could be leaked.
>>
>> I’ll fix this in the next revision by explicitly closing the fd when
>> ret is non-zero.
> 
> No. The refactoring was wrong. Don't make the kernel do extra work.
> Patch 3 introduced a bug and closing fd is not a solution.
> Such a pattern can be exploited for DoS.

You’re right — closing the fd after the fact is not the correct
solution, and introducing extra work in the kernel is undesirable. Doing
so could also open the door to DoS-style abuse.

The correct approach is to copy log_true_size into common_attrs
before allocating and installing the new fd, so that a failure in
copying cannot leave behind a partially created object.

I’ll rework this accordingly in the next revision.

Thanks,
Leon



^ permalink raw reply

* Re: [RESEND PATCH bpf-next v4 7/9] bpf: Add common attr support for map_create
From: Alexei Starovoitov @ 2026-01-07  6:20 UTC (permalink / raw)
  To: Leon Hwang
  Cc: bot+bpf-ci, bpf, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Martin KaFai Lau, Eduard, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, Jiri Olsa, Shuah Khan, Christian Brauner, Oleg Nesterov,
	sforshee, yuichtsu, aalbersh, Willem de Bruijn, Jason Xing,
	Paul Chaignon, Mykyta Yatsenko, Kumar Kartikeya Dwivedi,
	Anton Protopopov, Amery Hung, Rong Tao, LKML, Linux API,
	open list:KERNEL SELFTEST FRAMEWORK, kernel-patches-bot,
	Martin KaFai Lau, Chris Mason, Ihor Solodrai
In-Reply-To: <28ace50f-9ae6-46da-a05d-eab31f33f9cb@linux.dev>

On Tue, Jan 6, 2026 at 9:57 PM Leon Hwang <leon.hwang@linux.dev> wrote:
>
>
>
> On 7/1/26 01:44, bot+bpf-ci@kernel.org wrote:
> >> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> >> index 14fc5738f2b9..e64cc7504731 100644
> >> --- a/kernel/bpf/syscall.c
> >> +++ b/kernel/bpf/syscall.c
> >
> > [ ... ]
> >
> >> @@ -6279,7 +6345,11 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
> >>
> >>      switch (cmd) {
> >>      case BPF_MAP_CREATE:
> >> -            err = map_create(&attr, uattr);
> >> +            common_attrs.log_true_size = 0;
> >> +            err = map_create(&attr, uattr, &common_attrs);
> >> +            ret = copy_common_attr_log_true_size(uattr_common, size_common,
> >> +                                                 &common_attrs.log_true_size);
> >> +            err = ret ? ret : err;
> >
> > When map_create() succeeds, it returns a file descriptor that is already
> > installed in the caller's fd table via bpf_map_new_fd(). If
> > copy_common_attr_log_true_size() then fails (e.g., user provided a
> > read-only buffer for uattr_common), the syscall returns -EFAULT but the
> > fd remains installed.
> >
> > Could this leak the file descriptor? The user gets an error and has no
> > way to know what fd number was allocated, so they cannot close it.
> >
>
> Good catch — you’re right.
>
> If 'map_create()' succeeds and 'copy_common_attr_log_true_size()' later
> fails (e.g. returning -EFAULT), the newly created file descriptor would
> remain installed and could be leaked.
>
> I’ll fix this in the next revision by explicitly closing the fd when
> ret is non-zero.

No. The refactoring was wrong. Don't make the kernel do extra work.
Patch 3 introduced a bug and closing fd is not a solution.
Such a pattern can be exploited for DoS.

^ permalink raw reply

* Re: [RESEND PATCH bpf-next v4 7/9] bpf: Add common attr support for map_create
From: Leon Hwang @ 2026-01-07  5:56 UTC (permalink / raw)
  To: bot+bpf-ci, bpf
  Cc: ast, daniel, andrii, martin.lau, eddyz87, song, yonghong.song,
	john.fastabend, kpsingh, sdf, haoluo, jolsa, shuah, brauner, oleg,
	sforshee, yuichtsu, aalbersh, willemb, kerneljasonxing,
	paul.chaignon, yatsenko, memxor, a.s.protopopov, ameryhung,
	rongtao, linux-kernel, linux-api, linux-kselftest,
	kernel-patches-bot, martin.lau, clm, ihor.solodrai
In-Reply-To: <db428e27fdf0110ea05cdadf8980c7758d7f4dcd61289293394db7e34ce99601@mail.kernel.org>



On 7/1/26 01:44, bot+bpf-ci@kernel.org wrote:
>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
>> index 14fc5738f2b9..e64cc7504731 100644
>> --- a/kernel/bpf/syscall.c
>> +++ b/kernel/bpf/syscall.c
> 
> [ ... ]
> 
>> @@ -6279,7 +6345,11 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
>>
>>  	switch (cmd) {
>>  	case BPF_MAP_CREATE:
>> -		err = map_create(&attr, uattr);
>> +		common_attrs.log_true_size = 0;
>> +		err = map_create(&attr, uattr, &common_attrs);
>> +		ret = copy_common_attr_log_true_size(uattr_common, size_common,
>> +						     &common_attrs.log_true_size);
>> +		err = ret ? ret : err;
> 
> When map_create() succeeds, it returns a file descriptor that is already
> installed in the caller's fd table via bpf_map_new_fd(). If
> copy_common_attr_log_true_size() then fails (e.g., user provided a
> read-only buffer for uattr_common), the syscall returns -EFAULT but the
> fd remains installed.
> 
> Could this leak the file descriptor? The user gets an error and has no
> way to know what fd number was allocated, so they cannot close it.
> 

Good catch — you’re right.

If 'map_create()' succeeds and 'copy_common_attr_log_true_size()' later
fails (e.g. returning -EFAULT), the newly created file descriptor would
remain installed and could be leaked.

I’ll fix this in the next revision by explicitly closing the fd when
ret is non-zero.

Thanks,
Leon

> The sequence would be:
>   1. map_create() succeeds, returns fd (e.g., 5)
>   2. copy_common_attr_log_true_size() fails, ret = -EFAULT
>   3. err = ret ? ret : err = -EFAULT
>   4. syscall returns -EFAULT
>   5. map and fd persist, but user cannot close fd 5
> 
>> +		break;
>>  	case BPF_MAP_LOOKUP_ELEM:
>>  		err = map_lookup_elem(&attr);
> 
> 
> ---
> AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
> See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
> 
> CI run summary: https://github.com/kernel-patches/bpf/actions/runs/20756616585


^ permalink raw reply

* Re: [RESEND PATCH bpf-next v4 3/9] bpf: Refactor reporting log_true_size for prog_load
From: Leon Hwang @ 2026-01-07  5:52 UTC (permalink / raw)
  To: bot+bpf-ci, bpf
  Cc: ast, daniel, andrii, martin.lau, eddyz87, song, yonghong.song,
	john.fastabend, kpsingh, sdf, haoluo, jolsa, shuah, brauner, oleg,
	sforshee, yuichtsu, aalbersh, willemb, kerneljasonxing,
	paul.chaignon, yatsenko, memxor, a.s.protopopov, ameryhung,
	rongtao, linux-kernel, linux-api, linux-kselftest,
	kernel-patches-bot, martin.lau, clm, ihor.solodrai
In-Reply-To: <53542ad28c0d27ffcfcf9ea325fb6a77eeb3d569e17a6fad9d95685b5ea25467@mail.kernel.org>



On 7/1/26 01:44, bot+bpf-ci@kernel.org wrote:
>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
>> index 8f464b847405..1739601fb7bd 100644
>> --- a/kernel/bpf/syscall.c
>> +++ b/kernel/bpf/syscall.c
> 
> [ ... ]
> 
>> @@ -6160,12 +6160,22 @@ static int prog_assoc_struct_ops(union bpf_attr *attr)
>>  	return ret;
>>  }
>>
>> +static int copy_prog_load_log_true_size(union bpf_attr *attr, bpfptr_t uattr, unsigned int size)
>> +{
>> +	if (size >= offsetofend(union bpf_attr, log_true_size) &&
>> +	    copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, log_true_size),
>> +				  &attr->log_true_size, sizeof(attr->log_true_size)))
>> +		return -EFAULT;
>> +
>> +	return 0;
>> +}
>> +
>>  static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
>>  		     bpfptr_t uattr_common, unsigned int size_common)
>>  {
>>  	struct bpf_common_attr common_attrs;
>>  	union bpf_attr attr;
>> -	int err;
>> +	int err, ret;
> 
> [ ... ]
> 
>> @@ -6215,7 +6225,10 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
>>  		err = map_freeze(&attr);
>>  		break;
>>  	case BPF_PROG_LOAD:
>> -		err = bpf_prog_load(&attr, uattr, size);
>> +		attr.log_true_size = 0;
>> +		err = bpf_prog_load(&attr, uattr);
>> +		ret = copy_prog_load_log_true_size(&attr, uattr, size);
>> +		err = ret ? ret : err;
>                     ^^^^^^^^^^^^^^^^^^^^
> 
> If bpf_prog_load() succeeds, it returns a positive file descriptor.
> Then if copy_prog_load_log_true_size() fails with -EFAULT, this line
> overwrites err with -EFAULT, losing the FD value.
> 
> The BPF program has already been loaded and the FD allocated at this
> point. Since the FD is never returned to userspace, the program
> remains loaded but unreachable - could this leak the BPF program?
> 

Indeed, the normal FD could be leaked if 'ret == -EFAULT'.

I'll address it in the next revision.

Thanks,
Leon

> In the previous code, the copy_to_bpfptr_offset() happened inside
> bpf_check() before the program was fully committed:
> 
>     if (uattr_size >= offsetofend(...) &&
>         copy_to_bpfptr_offset(...)) {
>             ret = -EFAULT;
>             goto err_release_maps;
>         }
> 
> This goto caused bpf_check() to return an error, preventing the FD
> allocation in bpf_prog_load(). Moving the copy outside changes the
> semantics - the FD can now be allocated before a copy failure occurs.
> 
>> +		break;
> 
> 
> ---
> AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
> See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
> 
> CI run summary: https://github.com/kernel-patches/bpf/actions/runs/20756616585


^ permalink raw reply

* [PATCH 7/6] fs: improve comment in fserror_alloc_event
From: Darrick J. Wong @ 2026-01-06 23:33 UTC (permalink / raw)
  To: jack, brauner
  Cc: linux-api, hch, linux-ext4, linux-xfs, linux-fsdevel, gabriel,
	amir73il
In-Reply-To: <176602332085.686273.7564676516217176769.stgit@frogsfrogsfrogs>

From: Darrick J. Wong <djwong@kernel.org>

Document the ordering requirements between SB_ACTIVE and
s_pending_errors in the new fserror code.

Cc: jack@suse.cz
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
 fs/fserror.c |    5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/fserror.c b/fs/fserror.c
index ec92f5a6db59ce..06ca86adab9b76 100644
--- a/fs/fserror.c
+++ b/fs/fserror.c
@@ -79,6 +79,11 @@ static inline struct fserror_event *fserror_alloc_event(struct super_block *sb,
 	 * If pending_errors already reached zero or is no longer active,
 	 * the superblock is being deactivated so there's no point in
 	 * continuing.
+	 *
+	 * The order of the check of s_pending_errors and SB_ACTIVE are
+	 * mandated by order of accesses in generic_shutdown_super and
+	 * fserror_unmount.  Barriers are implicitly provided by the refcount
+	 * manipulations in this function and fserror_unmount.
 	 */
 	if (!refcount_inc_not_zero(&sb->s_pending_errors))
 		return NULL;

^ permalink raw reply related

* Re: [PATCH net-next] net: uapi: Provide an UAPI definition of 'struct sockaddr'
From: Jakub Kicinski @ 2026-01-06 23:13 UTC (permalink / raw)
  To: Thomas Weißschuh
  Cc: Eric Dumazet, Kuniyuki Iwashima, Paolo Abeni, Willem de Bruijn,
	netdev, linux-kernel, linux-api, Arnd Bergmann
In-Reply-To: <20260106112714-d47c16e0-0020-4851-9c2a-f8849c9a0677@linutronix.de>

On Tue, 6 Jan 2026 11:32:52 +0100 Thomas Weißschuh wrote:
> As for the failure in netdev CI however I am not so sure.
> Looking at net-next-2026-01-05--12-00, the only failures triggered by my
> change are also the ones from the bpf-ci. Are these the ones you meant,
> or am I missing some others?

Multiple things broke at once so slightly hard to fish the relevant
stuff out from here:

https://netdev.bots.linux.dev/contest.html?branch=net-next-2026-01-05--15-00&pass=0&pw-n=0

Here's one:

make[1]: Entering directory '/home/virtme/testing/wt-3/tools/testing/selftests/net'
  CC       busy_poller
In file included from [01m[K/usr/include/sys/socket.h:33[m[K,
                 from [01m[K/usr/include/netinet/in.h:23[m[K,
                 from [01m[K/usr/include/arpa/inet.h:22[m[K,
                 from [01m[Kbusy_poller.c:14[m[K:
[01m[K/usr/include/bits/socket.h:182:8:[m[K [01;31m[Kerror: [m[Kredefinition of '[01m[Kstruct sockaddr[m[K'
  182 | struct [01;31m[Ksockaddr[m[K
      |        [01;31m[K^~~~~~~~[m[K
In file included from [01m[K/home/virtme/testing/wt-3/usr/include/linux/netlink.h:6[m[K,
                 from [01m[K/home/virtme/testing/wt-3/usr/include/linux/genetlink.h:6[m[K,
                 from [01m[K/home/virtme/testing/wt-3/tools/testing/selftests/../../../tools/net/ynl/lib/ynl.h:7[m[K,
                 from [01m[Kbusy_poller.c:12[m[K:
[01m[K/home/virtme/testing/wt-3/usr/include/linux/socket.h:37:8:[m[K [01;36m[Knote: [m[Koriginally defined here
   37 | struct [01;36m[Ksockaddr[m[K {
      |        [01;36m[K^~~~~~~~[m[K
make[1]: *** [../lib.mk:225: /home/virtme/testing/wt-3/tools/testing/selftests/net/busy_poller] Error 1

https://netdev-3.bots.linux.dev/vmksft-net-dbg/results/460421/7-xfrm-policy-sh/stdout


^ permalink raw reply

* Re: [RESEND PATCH bpf-next v4 7/9] bpf: Add common attr support for map_create
From: bot+bpf-ci @ 2026-01-06 17:44 UTC (permalink / raw)
  To: leon.hwang, bpf
  Cc: ast, daniel, andrii, martin.lau, eddyz87, song, yonghong.song,
	john.fastabend, kpsingh, sdf, haoluo, jolsa, shuah, brauner, oleg,
	leon.hwang, sforshee, yuichtsu, aalbersh, willemb,
	kerneljasonxing, paul.chaignon, yatsenko, memxor, a.s.protopopov,
	ameryhung, rongtao, linux-kernel, linux-api, linux-kselftest,
	kernel-patches-bot, ast, andrii, daniel, martin.lau, eddyz87,
	yonghong.song, clm, ihor.solodrai
In-Reply-To: <20260106172018.57757-8-leon.hwang@linux.dev>

[-- Attachment #1: Type: text/plain, Size: 1585 bytes --]

> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 14fc5738f2b9..e64cc7504731 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c

[ ... ]

> @@ -6279,7 +6345,11 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
>
>  	switch (cmd) {
>  	case BPF_MAP_CREATE:
> -		err = map_create(&attr, uattr);
> +		common_attrs.log_true_size = 0;
> +		err = map_create(&attr, uattr, &common_attrs);
> +		ret = copy_common_attr_log_true_size(uattr_common, size_common,
> +						     &common_attrs.log_true_size);
> +		err = ret ? ret : err;

When map_create() succeeds, it returns a file descriptor that is already
installed in the caller's fd table via bpf_map_new_fd(). If
copy_common_attr_log_true_size() then fails (e.g., user provided a
read-only buffer for uattr_common), the syscall returns -EFAULT but the
fd remains installed.

Could this leak the file descriptor? The user gets an error and has no
way to know what fd number was allocated, so they cannot close it.

The sequence would be:
  1. map_create() succeeds, returns fd (e.g., 5)
  2. copy_common_attr_log_true_size() fails, ret = -EFAULT
  3. err = ret ? ret : err = -EFAULT
  4. syscall returns -EFAULT
  5. map and fd persist, but user cannot close fd 5

> +		break;
>  	case BPF_MAP_LOOKUP_ELEM:
>  		err = map_lookup_elem(&attr);


---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md

CI run summary: https://github.com/kernel-patches/bpf/actions/runs/20756616585

^ permalink raw reply

* Re: [RESEND PATCH bpf-next v4 3/9] bpf: Refactor reporting log_true_size for prog_load
From: bot+bpf-ci @ 2026-01-06 17:44 UTC (permalink / raw)
  To: leon.hwang, bpf
  Cc: ast, daniel, andrii, martin.lau, eddyz87, song, yonghong.song,
	john.fastabend, kpsingh, sdf, haoluo, jolsa, shuah, brauner, oleg,
	leon.hwang, sforshee, yuichtsu, aalbersh, willemb,
	kerneljasonxing, paul.chaignon, yatsenko, memxor, a.s.protopopov,
	ameryhung, rongtao, linux-kernel, linux-api, linux-kselftest,
	kernel-patches-bot, ast, andrii, daniel, martin.lau, eddyz87,
	yonghong.song, clm, ihor.solodrai
In-Reply-To: <20260106172018.57757-4-leon.hwang@linux.dev>

[-- Attachment #1: Type: text/plain, Size: 2405 bytes --]

> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 8f464b847405..1739601fb7bd 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c

[ ... ]

> @@ -6160,12 +6160,22 @@ static int prog_assoc_struct_ops(union bpf_attr *attr)
>  	return ret;
>  }
>
> +static int copy_prog_load_log_true_size(union bpf_attr *attr, bpfptr_t uattr, unsigned int size)
> +{
> +	if (size >= offsetofend(union bpf_attr, log_true_size) &&
> +	    copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, log_true_size),
> +				  &attr->log_true_size, sizeof(attr->log_true_size)))
> +		return -EFAULT;
> +
> +	return 0;
> +}
> +
>  static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
>  		     bpfptr_t uattr_common, unsigned int size_common)
>  {
>  	struct bpf_common_attr common_attrs;
>  	union bpf_attr attr;
> -	int err;
> +	int err, ret;

[ ... ]

> @@ -6215,7 +6225,10 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
>  		err = map_freeze(&attr);
>  		break;
>  	case BPF_PROG_LOAD:
> -		err = bpf_prog_load(&attr, uattr, size);
> +		attr.log_true_size = 0;
> +		err = bpf_prog_load(&attr, uattr);
> +		ret = copy_prog_load_log_true_size(&attr, uattr, size);
> +		err = ret ? ret : err;
                    ^^^^^^^^^^^^^^^^^^^^

If bpf_prog_load() succeeds, it returns a positive file descriptor.
Then if copy_prog_load_log_true_size() fails with -EFAULT, this line
overwrites err with -EFAULT, losing the FD value.

The BPF program has already been loaded and the FD allocated at this
point. Since the FD is never returned to userspace, the program
remains loaded but unreachable - could this leak the BPF program?

In the previous code, the copy_to_bpfptr_offset() happened inside
bpf_check() before the program was fully committed:

    if (uattr_size >= offsetofend(...) &&
        copy_to_bpfptr_offset(...)) {
            ret = -EFAULT;
            goto err_release_maps;
        }

This goto caused bpf_check() to return an error, preventing the FD
allocation in bpf_prog_load(). Moving the copy outside changes the
semantics - the FD can now be allocated before a copy failure occurs.

> +		break;


---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md

CI run summary: https://github.com/kernel-patches/bpf/actions/runs/20756616585

^ permalink raw reply

* [RESEND PATCH bpf-next v4 9/9] selftests/bpf: Add tests to verify map create failure log
From: Leon Hwang @ 2026-01-06 17:20 UTC (permalink / raw)
  To: bpf
  Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Shuah Khan, Christian Brauner, Oleg Nesterov, Leon Hwang,
	Seth Forshee, Yuichiro Tsuji, Andrey Albershteyn,
	Willem de Bruijn, Jason Xing, Paul Chaignon, Mykyta Yatsenko,
	Kumar Kartikeya Dwivedi, Anton Protopopov, Amery Hung, Rong Tao,
	linux-kernel, linux-api, linux-kselftest, kernel-patches-bot
In-Reply-To: <20260106172018.57757-1-leon.hwang@linux.dev>

As kernel is able to report log when fail to create map, add tests to
verify those logs.

Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 .../selftests/bpf/prog_tests/map_init.c       | 143 ++++++++++++++++++
 1 file changed, 143 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/map_init.c b/tools/testing/selftests/bpf/prog_tests/map_init.c
index 14a31109dd0e..52bd4b1966c8 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_init.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_init.c
@@ -212,3 +212,146 @@ void test_map_init(void)
 	if (test__start_subtest("pcpu_lru_map_init"))
 		test_pcpu_lru_map_init();
 }
+
+#define BPF_LOG_FIXED	8
+
+static void test_map_create(enum bpf_map_type map_type, const char *map_name,
+			    struct bpf_map_create_opts *opts, const char *exp_msg)
+{
+	const int key_size = 4, value_size = 4, max_entries = 1;
+	char log_buf[128];
+	int fd;
+	LIBBPF_OPTS(bpf_syscall_common_attr_opts, copts);
+
+	log_buf[0] = '\0';
+	copts.log_buf = log_buf;
+	copts.log_size = sizeof(log_buf);
+	copts.log_level = BPF_LOG_FIXED;
+	opts->cattr_opts = &copts;
+	fd = bpf_map_create(map_type, map_name, key_size, value_size, max_entries, opts);
+	if (!ASSERT_LT(fd, 0, "bpf_map_create")) {
+		close(fd);
+		return;
+	}
+
+	ASSERT_STREQ(log_buf, exp_msg, "log_buf");
+	ASSERT_EQ(copts.log_true_size, strlen(exp_msg) + 1, "log_true_size");
+}
+
+static void test_map_create_array(struct bpf_map_create_opts *opts, const char *exp_msg)
+{
+	test_map_create(BPF_MAP_TYPE_ARRAY, "test_map_create", opts, exp_msg);
+}
+
+static void test_invalid_vmlinux_value_type_id_struct_ops(void)
+{
+	const char *msg = "btf_vmlinux_value_type_id can only be used with struct_ops maps.\n";
+	LIBBPF_OPTS(bpf_map_create_opts, opts,
+		    .btf_vmlinux_value_type_id = 1,
+	);
+
+	test_map_create_array(&opts, msg);
+}
+
+static void test_invalid_vmlinux_value_type_id_kv_type_id(void)
+{
+	const char *msg = "btf_vmlinux_value_type_id is mutually exclusive with btf_key_type_id and btf_value_type_id.\n";
+	LIBBPF_OPTS(bpf_map_create_opts, opts,
+		    .btf_vmlinux_value_type_id = 1,
+		    .btf_key_type_id = 1,
+	);
+
+	test_map_create(BPF_MAP_TYPE_STRUCT_OPS, "test_map_create", &opts, msg);
+}
+
+static void test_invalid_value_type_id(void)
+{
+	const char *msg = "Invalid btf_value_type_id.\n";
+	LIBBPF_OPTS(bpf_map_create_opts, opts,
+		    .btf_key_type_id = 1,
+	);
+
+	test_map_create_array(&opts, msg);
+}
+
+static void test_invalid_map_extra(void)
+{
+	const char *msg = "Invalid map_extra.\n";
+	LIBBPF_OPTS(bpf_map_create_opts, opts,
+		    .map_extra = 1,
+	);
+
+	test_map_create_array(&opts, msg);
+}
+
+static void test_invalid_numa_node(void)
+{
+	const char *msg = "Invalid numa_node.\n";
+	LIBBPF_OPTS(bpf_map_create_opts, opts,
+		    .map_flags = BPF_F_NUMA_NODE,
+		    .numa_node = 0xFF,
+	);
+
+	test_map_create_array(&opts, msg);
+}
+
+static void test_invalid_map_type(void)
+{
+	const char *msg = "Invalid map_type.\n";
+	LIBBPF_OPTS(bpf_map_create_opts, opts);
+
+	test_map_create(__MAX_BPF_MAP_TYPE, "test_map_create", &opts, msg);
+}
+
+static void test_invalid_token_fd(void)
+{
+	const char *msg = "Invalid map_token_fd.\n";
+	LIBBPF_OPTS(bpf_map_create_opts, opts,
+		    .map_flags = BPF_F_TOKEN_FD,
+		    .token_fd = 0xFF,
+	);
+
+	test_map_create_array(&opts, msg);
+}
+
+static void test_invalid_map_name(void)
+{
+	const char *msg = "Invalid map_name.\n";
+	LIBBPF_OPTS(bpf_map_create_opts, opts);
+
+	test_map_create(BPF_MAP_TYPE_ARRAY, "test-!@#", &opts, msg);
+}
+
+static void test_invalid_btf_fd(void)
+{
+	const char *msg = "Invalid btf_fd.\n";
+	LIBBPF_OPTS(bpf_map_create_opts, opts,
+		    .btf_fd = -1,
+		    .btf_key_type_id = 1,
+		    .btf_value_type_id = 1,
+	);
+
+	test_map_create_array(&opts, msg);
+}
+
+void test_map_create_failure(void)
+{
+	if (test__start_subtest("invalid_vmlinux_value_type_id_struct_ops"))
+		test_invalid_vmlinux_value_type_id_struct_ops();
+	if (test__start_subtest("invalid_vmlinux_value_type_id_kv_type_id"))
+		test_invalid_vmlinux_value_type_id_kv_type_id();
+	if (test__start_subtest("invalid_value_type_id"))
+		test_invalid_value_type_id();
+	if (test__start_subtest("invalid_map_extra"))
+		test_invalid_map_extra();
+	if (test__start_subtest("invalid_numa_node"))
+		test_invalid_numa_node();
+	if (test__start_subtest("invalid_map_type"))
+		test_invalid_map_type();
+	if (test__start_subtest("invalid_token_fd"))
+		test_invalid_token_fd();
+	if (test__start_subtest("invalid_map_name"))
+		test_invalid_map_name();
+	if (test__start_subtest("invalid_btf_fd"))
+		test_invalid_btf_fd();
+}
-- 
2.52.0


^ permalink raw reply related

* [RESEND PATCH bpf-next v4 8/9] libbpf: Add common attr support for map_create
From: Leon Hwang @ 2026-01-06 17:20 UTC (permalink / raw)
  To: bpf
  Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Shuah Khan, Christian Brauner, Oleg Nesterov, Leon Hwang,
	Seth Forshee, Yuichiro Tsuji, Andrey Albershteyn,
	Willem de Bruijn, Jason Xing, Paul Chaignon, Mykyta Yatsenko,
	Kumar Kartikeya Dwivedi, Anton Protopopov, Amery Hung, Rong Tao,
	linux-kernel, linux-api, linux-kselftest, kernel-patches-bot
In-Reply-To: <20260106172018.57757-1-leon.hwang@linux.dev>

With the previous commit adding common attribute support for
BPF_MAP_CREATE, it is now possible to retrieve detailed error messages
when map creation fails by using the 'log_buf' field from the common
attributes.

Introduce a new struct bpf_syscall_common_attr_opts, which wraps these
new fields, 'log_buf', 'log_size', 'log_level', and 'log_true_size'.

Extend 'bpf_map_create_opts' with a new field, 'cattr_opts', allowing
users to capture and inspect those log messages.

Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 tools/lib/bpf/bpf.c | 15 ++++++++++++++-
 tools/lib/bpf/bpf.h | 17 ++++++++++++++++-
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 689ade4a822b..4a1ba4951785 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -207,6 +207,9 @@ int bpf_map_create(enum bpf_map_type map_type,
 		   const struct bpf_map_create_opts *opts)
 {
 	const size_t attr_sz = offsetofend(union bpf_attr, excl_prog_hash_size);
+	const size_t common_attrs_sz = sizeof(struct bpf_common_attr);
+	struct bpf_syscall_common_attr_opts *cattr_opts;
+	struct bpf_common_attr common_attrs;
 	union bpf_attr attr;
 	int fd;
 
@@ -240,7 +243,17 @@ int bpf_map_create(enum bpf_map_type map_type,
 	attr.excl_prog_hash = ptr_to_u64(OPTS_GET(opts, excl_prog_hash, NULL));
 	attr.excl_prog_hash_size = OPTS_GET(opts, excl_prog_hash_size, 0);
 
-	fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz);
+	cattr_opts = OPTS_GET(opts, cattr_opts, NULL);
+	if (cattr_opts && feat_supported(NULL, FEAT_EXTENDED_SYSCALL)) {
+		memset(&common_attrs, 0, common_attrs_sz);
+		common_attrs.log_buf = ptr_to_u64(OPTS_GET(cattr_opts, log_buf, NULL));
+		common_attrs.log_size = OPTS_GET(cattr_opts, log_size, 0);
+		common_attrs.log_level = OPTS_GET(cattr_opts, log_level, 0);
+		fd = sys_bpf_ext_fd(BPF_MAP_CREATE, &attr, attr_sz, &common_attrs, common_attrs_sz);
+		OPTS_SET(cattr_opts, log_true_size, common_attrs.log_true_size);
+	} else {
+		fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz);
+	}
 	return libbpf_err_errno(fd);
 }
 
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 1f9c28d27795..6f544c205b89 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -37,6 +37,18 @@ extern "C" {
 
 LIBBPF_API int libbpf_set_memlock_rlim(size_t memlock_bytes);
 
+struct bpf_syscall_common_attr_opts {
+	size_t sz; /* size of this struct for forward/backward compatibility */
+
+	const char *log_buf;
+	__u32 log_size;
+	__u32 log_level;
+	__u32 log_true_size;
+
+	size_t :0;
+};
+#define bpf_syscall_common_attr_opts__last_field log_true_size
+
 struct bpf_map_create_opts {
 	size_t sz; /* size of this struct for forward/backward compatibility */
 
@@ -57,9 +69,12 @@ struct bpf_map_create_opts {
 
 	const void *excl_prog_hash;
 	__u32 excl_prog_hash_size;
+
+	struct bpf_syscall_common_attr_opts *cattr_opts;
+
 	size_t :0;
 };
-#define bpf_map_create_opts__last_field excl_prog_hash_size
+#define bpf_map_create_opts__last_field cattr_opts
 
 LIBBPF_API int bpf_map_create(enum bpf_map_type map_type,
 			      const char *map_name,
-- 
2.52.0


^ permalink raw reply related

* [RESEND PATCH bpf-next v4 7/9] bpf: Add common attr support for map_create
From: Leon Hwang @ 2026-01-06 17:20 UTC (permalink / raw)
  To: bpf
  Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Shuah Khan, Christian Brauner, Oleg Nesterov, Leon Hwang,
	Seth Forshee, Yuichiro Tsuji, Andrey Albershteyn,
	Willem de Bruijn, Jason Xing, Paul Chaignon, Mykyta Yatsenko,
	Kumar Kartikeya Dwivedi, Anton Protopopov, Amery Hung, Rong Tao,
	linux-kernel, linux-api, linux-kselftest, kernel-patches-bot
In-Reply-To: <20260106172018.57757-1-leon.hwang@linux.dev>

Currently, many 'BPF_MAP_CREATE' failures return '-EINVAL' without
providing any explanation to user space.

With the extended BPF syscall support, detailed error messages can now be
reported. This allows users to understand the specific reason for a
failed map creation, rather than just receiving a generic '-EINVAL'.

Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 kernel/bpf/syscall.c | 88 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 79 insertions(+), 9 deletions(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 14fc5738f2b9..e64cc7504731 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1366,23 +1366,72 @@ static bool bpf_net_capable(void)
 	return capable(CAP_NET_ADMIN) || capable(CAP_SYS_ADMIN);
 }
 
+struct bpf_vlog_wrapper {
+	struct bpf_common_attr *attr;
+	struct bpf_verifier_log *log;
+};
+
+static void bpf_vlog_wrapper_destructor(struct bpf_vlog_wrapper *w)
+{
+	if (!w->log)
+		return;
+
+	(void) bpf_vlog_finalize(w->log, &w->attr->log_true_size);
+	kfree(w->log);
+}
+
+#define DEFINE_BPF_VLOG_WRAPPER(name, common_attrs)				\
+	struct bpf_vlog_wrapper name __cleanup(bpf_vlog_wrapper_destructor) = {	\
+		.attr = common_attrs,						\
+	}
+
+static int bpf_vlog_wrapper_init(struct bpf_vlog_wrapper *w)
+{
+	struct bpf_common_attr *attr = w->attr;
+	struct bpf_verifier_log *log;
+	int err;
+
+	if (!attr->log_buf)
+		return 0;
+
+	log = kzalloc(sizeof(*log), GFP_KERNEL);
+	if (!log)
+		return -ENOMEM;
+
+	err = bpf_vlog_init(log, attr->log_level, u64_to_user_ptr(attr->log_buf), attr->log_size);
+	if (err) {
+		kfree(log);
+		return err;
+	}
+
+	w->log = log;
+	return 0;
+}
+
 #define BPF_MAP_CREATE_LAST_FIELD excl_prog_hash_size
 /* called via syscall */
-static int map_create(union bpf_attr *attr, bpfptr_t uattr)
+static int map_create(union bpf_attr *attr, bpfptr_t uattr, struct bpf_common_attr *common_attrs)
 {
 	const struct bpf_map_ops *ops;
 	struct bpf_token *token = NULL;
 	int numa_node = bpf_map_attr_numa_node(attr);
 	u32 map_type = attr->map_type;
+	struct bpf_verifier_log *log;
 	struct bpf_map *map;
 	bool token_flag;
 	int f_flags;
 	int err;
+	DEFINE_BPF_VLOG_WRAPPER(log_wrapper, common_attrs);
 
 	err = CHECK_ATTR(BPF_MAP_CREATE);
 	if (err)
 		return -EINVAL;
 
+	err = bpf_vlog_wrapper_init(&log_wrapper);
+	if (err)
+		return err;
+	log = log_wrapper.log;
+
 	/* check BPF_F_TOKEN_FD flag, remember if it's set, and then clear it
 	 * to avoid per-map type checks tripping on unknown flag
 	 */
@@ -1390,17 +1439,25 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr)
 	attr->map_flags &= ~BPF_F_TOKEN_FD;
 
 	if (attr->btf_vmlinux_value_type_id) {
-		if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS ||
-		    attr->btf_key_type_id || attr->btf_value_type_id)
+		if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS) {
+			bpf_log(log, "btf_vmlinux_value_type_id can only be used with struct_ops maps.\n");
 			return -EINVAL;
+		}
+		if (attr->btf_key_type_id || attr->btf_value_type_id) {
+			bpf_log(log, "btf_vmlinux_value_type_id is mutually exclusive with btf_key_type_id and btf_value_type_id.\n");
+			return -EINVAL;
+		}
 	} else if (attr->btf_key_type_id && !attr->btf_value_type_id) {
+		bpf_log(log, "Invalid btf_value_type_id.\n");
 		return -EINVAL;
 	}
 
 	if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER &&
 	    attr->map_type != BPF_MAP_TYPE_ARENA &&
-	    attr->map_extra != 0)
+	    attr->map_extra != 0) {
+		bpf_log(log, "Invalid map_extra.\n");
 		return -EINVAL;
+	}
 
 	f_flags = bpf_get_file_flag(attr->map_flags);
 	if (f_flags < 0)
@@ -1408,13 +1465,17 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr)
 
 	if (numa_node != NUMA_NO_NODE &&
 	    ((unsigned int)numa_node >= nr_node_ids ||
-	     !node_online(numa_node)))
+	     !node_online(numa_node))) {
+		bpf_log(log, "Invalid numa_node.\n");
 		return -EINVAL;
+	}
 
 	/* find map type and init map: hashtable vs rbtree vs bloom vs ... */
 	map_type = attr->map_type;
-	if (map_type >= ARRAY_SIZE(bpf_map_types))
+	if (map_type >= ARRAY_SIZE(bpf_map_types)) {
+		bpf_log(log, "Invalid map_type.\n");
 		return -EINVAL;
+	}
 	map_type = array_index_nospec(map_type, ARRAY_SIZE(bpf_map_types));
 	ops = bpf_map_types[map_type];
 	if (!ops)
@@ -1432,8 +1493,10 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr)
 
 	if (token_flag) {
 		token = bpf_token_get_from_fd(attr->map_token_fd);
-		if (IS_ERR(token))
+		if (IS_ERR(token)) {
+			bpf_log(log, "Invalid map_token_fd.\n");
 			return PTR_ERR(token);
+		}
 
 		/* if current token doesn't grant map creation permissions,
 		 * then we can't use this token, so ignore it and rely on
@@ -1516,8 +1579,10 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr)
 
 	err = bpf_obj_name_cpy(map->name, attr->map_name,
 			       sizeof(attr->map_name));
-	if (err < 0)
+	if (err < 0) {
+		bpf_log(log, "Invalid map_name.\n");
 		goto free_map;
+	}
 
 	preempt_disable();
 	map->cookie = gen_cookie_next(&bpf_map_cookie);
@@ -1540,6 +1605,7 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr)
 
 		btf = btf_get_by_fd(attr->btf_fd);
 		if (IS_ERR(btf)) {
+			bpf_log(log, "Invalid btf_fd.\n");
 			err = PTR_ERR(btf);
 			goto free_map;
 		}
@@ -6279,7 +6345,11 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
 
 	switch (cmd) {
 	case BPF_MAP_CREATE:
-		err = map_create(&attr, uattr);
+		common_attrs.log_true_size = 0;
+		err = map_create(&attr, uattr, &common_attrs);
+		ret = copy_common_attr_log_true_size(uattr_common, size_common,
+						     &common_attrs.log_true_size);
+		err = ret ? ret : err;
 		break;
 	case BPF_MAP_LOOKUP_ELEM:
 		err = map_lookup_elem(&attr);
-- 
2.52.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox