Linux userland API discussions
 help / color / mirror / Atom feed
* Re: O_CLOEXEC use for OPEN_TREE_CLOEXEC
From: Christian Brauner @ 2026-01-16 10:00 UTC (permalink / raw)
  To: Florian Weimer
  Cc: linux-fsdevel, linux-api, linux-kernel, Al Viro, David Howells,
	DJ Delorie
In-Reply-To: <lhuwm1ji7bl.fsf@oldenburg.str.redhat.com>

On Thu, Jan 15, 2026 at 09:55:10AM +0100, Florian Weimer wrote:
> * Christian Brauner:
> 
> > On Tue, Jan 13, 2026 at 11:40:55PM +0100, Florian Weimer wrote:
> >> In <linux/mount.h>, we have this:
> >> 
> >> #define OPEN_TREE_CLOEXEC      O_CLOEXEC       /* Close the file on execve() */
> >> 
> >> This causes a few pain points for us to on the glibc side when we mirror
> >> this into <linux/mount.h> becuse O_CLOEXEC is defined in <fcntl.h>,
> >> which is one of the headers that's completely incompatible with the UAPI
> >> headers.
> >> 
> >> The reason why this is painful is because O_CLOEXEC has at least three
> >> different values across architectures: 0x80000, 0x200000, 0x400000
> >> 
> >> Even for the UAPI this isn't ideal because it effectively burns three
> >> open_tree flags, unless the flags are made architecture-specific, too.
> >
> > I think that just got cargo-culted... A long time ago some API define as
> > O_CLOEXEC and now a lot of APIs have done the same.
> 
> Yes, it looks like inotify is in the same boat.

It's unfortunately nost just inotify...:

include/linux/net.h:#define SOCK_CLOEXEC        O_CLOEXEC
include/uapi/drm/drm.h:#define DRM_CLOEXEC O_CLOEXEC
include/uapi/linux/eventfd.h:#define EFD_CLOEXEC O_CLOEXEC
include/uapi/linux/eventpoll.h:#define EPOLL_CLOEXEC O_CLOEXEC
include/uapi/linux/inotify.h:#define IN_CLOEXEC O_CLOEXEC
include/uapi/linux/signalfd.h:#define SFD_CLOEXEC O_CLOEXEC
include/uapi/linux/timerfd.h:#define TFD_CLOEXEC O_CLOEXEC

> 
> > I'm pretty sure we can't change that now but we can document that this
> > shouldn't be ifdefed and instead be a separate per-syscall bit. But I
> > think that's the best we can do right now.
> 
> Maybe add something like this as a safety measure, to ensure that the
> flags don't overlap?
> 
> diff --git a/fs/namespace.c b/fs/namespace.c
> index c58674a20cad..5bbfd379ec44 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -3069,6 +3069,9 @@ static struct file *vfs_open_tree(int dfd, const char __user *filename, unsigned
>  	bool detached = flags & OPEN_TREE_CLONE;
>  
>  	BUILD_BUG_ON(OPEN_TREE_CLOEXEC != O_CLOEXEC);
> +	BUILD_BUG_IN(!(O_CLOEXEC & OPEN_TREE_CLONE));
> +	BUILD_BUG_ON(!((AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_RECURSIVE | AT_SYMLINK_NOFOLLOW) &
> +		       (O_CLOEXEC | OPEN_TREE_CLONE)));

Yeah, we can do something like that!

^ permalink raw reply

* Re: [PATCH bpf-next v5 8/9] libbpf: Add common attr support for map_create
From: Andrii Nakryiko @ 2026-01-16  1:03 UTC (permalink / raw)
  To: Leon Hwang
  Cc: bpf, Alexei Starovoitov, Daniel Borkmann, John Fastabend,
	Andrii Nakryiko, Martin KaFai Lau, Eduard Zingerman, Song Liu,
	Yonghong Song, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Shuah Khan, Christian Brauner, Seth Forshee, Yuichiro Tsuji,
	Andrey Albershteyn, Willem de Bruijn, Jason Xing, Tao Chen,
	Mykyta Yatsenko, Kumar Kartikeya Dwivedi, Anton Protopopov,
	Amery Hung, Rong Tao, linux-kernel, linux-api, linux-kselftest,
	kernel-patches-bot
In-Reply-To: <20260112145616.44195-9-leon.hwang@linux.dev>

On Mon, Jan 12, 2026 at 6:59 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>
> With the previous commit adding common attribute support for
> BPF_MAP_CREATE, users can now retrieve detailed error messages when map
> creation fails via the log_buf field.
>
> Introduce struct bpf_syscall_common_attr_opts with the following fields:
> log_buf, log_size, log_level, and log_true_size.
>
> Extend bpf_map_create_opts with a new field common_attr_opts, allowing
> users to capture and inspect log messages on map creation failures.
>
> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
> ---
>  tools/lib/bpf/bpf.c | 15 ++++++++++++++-
>  tools/lib/bpf/bpf.h | 17 ++++++++++++++++-
>  2 files changed, 30 insertions(+), 2 deletions(-)
>
> diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
> index d44e667aaf02..d65df1b7b2be 100644
> --- a/tools/lib/bpf/bpf.c
> +++ b/tools/lib/bpf/bpf.c
> @@ -207,6 +207,9 @@ int bpf_map_create(enum bpf_map_type map_type,
>                    const struct bpf_map_create_opts *opts)
>  {
>         const size_t attr_sz = offsetofend(union bpf_attr, excl_prog_hash_size);
> +       const size_t common_attr_sz = sizeof(struct bpf_common_attr);
> +       struct bpf_syscall_common_attr_opts *common_attr_opts;
> +       struct bpf_common_attr common_attr;
>         union bpf_attr attr;
>         int fd;
>
> @@ -240,7 +243,17 @@ int bpf_map_create(enum bpf_map_type map_type,
>         attr.excl_prog_hash = ptr_to_u64(OPTS_GET(opts, excl_prog_hash, NULL));
>         attr.excl_prog_hash_size = OPTS_GET(opts, excl_prog_hash_size, 0);
>
> -       fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz);
> +       common_attr_opts = OPTS_GET(opts, common_attr_opts, NULL);
> +       if (common_attr_opts && feat_supported(NULL, FEAT_EXTENDED_SYSCALL)) {
> +               memset(&common_attr, 0, common_attr_sz);
> +               common_attr.log_buf = ptr_to_u64(OPTS_GET(common_attr_opts, log_buf, NULL));
> +               common_attr.log_size = OPTS_GET(common_attr_opts, log_size, 0);
> +               common_attr.log_level = OPTS_GET(common_attr_opts, log_level, 0);
> +               fd = sys_bpf_ext_fd(BPF_MAP_CREATE, &attr, attr_sz, &common_attr, common_attr_sz);
> +               OPTS_SET(common_attr_opts, log_true_size, common_attr.log_true_size);
> +       } else {
> +               fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz);

OPTS_SET(log_true_size) to zero here, maybe?

> +       }
>         return libbpf_err_errno(fd);
>  }
>
> diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
> index 2c8e88ddb674..c4a26e6b71ea 100644
> --- a/tools/lib/bpf/bpf.h
> +++ b/tools/lib/bpf/bpf.h
> @@ -37,6 +37,18 @@ extern "C" {
>
>  LIBBPF_API int libbpf_set_memlock_rlim(size_t memlock_bytes);
>
> +struct bpf_syscall_common_attr_opts {
> +       size_t sz; /* size of this struct for forward/backward compatibility */
> +
> +       char *log_buf;
> +       __u32 log_size;
> +       __u32 log_level;
> +       __u32 log_true_size;
> +
> +       size_t :0;
> +};
> +#define bpf_syscall_common_attr_opts__last_field log_true_size

see below, let's drop this struct and just add these 4 fields directly
to bpf_map_create_opts

> +
>  struct bpf_map_create_opts {
>         size_t sz; /* size of this struct for forward/backward compatibility */
>
> @@ -57,9 +69,12 @@ struct bpf_map_create_opts {
>
>         const void *excl_prog_hash;
>         __u32 excl_prog_hash_size;
> +
> +       struct bpf_syscall_common_attr_opts *common_attr_opts;

maybe let's just add those log_xxx fields here directly? This whole
extra bpf_syscall_common_attr_opts pointer and struct seems like a
cumbersome API.

> +
>         size_t :0;
>  };
> -#define bpf_map_create_opts__last_field excl_prog_hash_size
> +#define bpf_map_create_opts__last_field common_attr_opts
>
>  LIBBPF_API int bpf_map_create(enum bpf_map_type map_type,
>                               const char *map_name,
> --
> 2.52.0
>

^ permalink raw reply

* Re: [PATCH bpf-next v5 4/9] bpf: Add syscall common attributes support for prog_load
From: Andrii Nakryiko @ 2026-01-16  0:54 UTC (permalink / raw)
  To: Leon Hwang
  Cc: bpf, Alexei Starovoitov, Daniel Borkmann, John Fastabend,
	Andrii Nakryiko, Martin KaFai Lau, Eduard Zingerman, Song Liu,
	Yonghong Song, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Shuah Khan, Christian Brauner, Seth Forshee, Yuichiro Tsuji,
	Andrey Albershteyn, Willem de Bruijn, Jason Xing, Tao Chen,
	Mykyta Yatsenko, Kumar Kartikeya Dwivedi, Anton Protopopov,
	Amery Hung, Rong Tao, linux-kernel, linux-api, linux-kselftest,
	kernel-patches-bot
In-Reply-To: <20260112145616.44195-5-leon.hwang@linux.dev>

On Mon, Jan 12, 2026 at 6:59 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>
> The log buffer of common attributes would be confusing with the one in
> 'union bpf_attr' for BPF_PROG_LOAD.
>
> In order to clarify the usage of these two log buffers, they both can be
> used for logging if:
>
> * They are same, including 'log_buf', 'log_level' and 'log_size'.
> * One of them is missing, then another one will be used for logging.
>
> If they both have 'log_buf' but they are not same totally, return -EUSERS.

why use this special error code that we don't seem to use in BPF
subsystem at all? What's wrong with -EINVAL. This shouldn't be an easy
mistake to do, tbh.

>
> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
> ---
>  include/linux/bpf_verifier.h |  4 +++-
>  kernel/bpf/log.c             | 29 ++++++++++++++++++++++++++---
>  kernel/bpf/syscall.c         |  9 ++++++---
>  3 files changed, 35 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> index 4c9632c40059..da2d37ca60e7 100644
> --- a/include/linux/bpf_verifier.h
> +++ b/include/linux/bpf_verifier.h
> @@ -637,9 +637,11 @@ struct bpf_log_attr {
>         u32 log_level;
>         struct bpf_attrs *attrs;
>         u32 offsetof_log_true_size;
> +       struct bpf_attrs *attrs_common;
>  };
>
> -int bpf_prog_load_log_attr_init(struct bpf_log_attr *log_attr, struct bpf_attrs *attrs);
> +int bpf_prog_load_log_attr_init(struct bpf_log_attr *log_attr, struct bpf_attrs *attrs,
> +                               struct bpf_attrs *attrs_common);
>  int bpf_log_attr_finalize(struct bpf_log_attr *log_attr, struct bpf_verifier_log *log);
>
>  #define BPF_MAX_SUBPROGS 256
> diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
> index 457b724c4176..eba60a13e244 100644
> --- a/kernel/bpf/log.c
> +++ b/kernel/bpf/log.c
> @@ -865,23 +865,41 @@ void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_st
>  }
>
>  static int bpf_log_attr_init(struct bpf_log_attr *log_attr, struct bpf_attrs *attrs, u64 log_buf,
> -                            u32 log_size, u32 log_level, int offsetof_log_true_size)
> +                            u32 log_size, u32 log_level, int offsetof_log_true_size,
> +                            struct bpf_attrs *attrs_common)
>  {
> +       const struct bpf_common_attr *common_attr = attrs_common ? attrs_common->attr : NULL;
> +

There is something to be said about naming choices here :) it's easy
to get lost in attrs_common being actually bpf_attrs, which contains
attr field, which is actually of bpf_common_attr type... It's a bit
disorienting. :)

>         memset(log_attr, 0, sizeof(*log_attr));
>         log_attr->log_buf = log_buf;
>         log_attr->log_size = log_size;
>         log_attr->log_level = log_level;
>         log_attr->attrs = attrs;
>         log_attr->offsetof_log_true_size = offsetof_log_true_size;
> +       log_attr->attrs_common = attrs_common;
> +
> +       if (log_buf && common_attr && common_attr->log_buf &&
> +               (log_buf != common_attr->log_buf ||
> +                log_size != common_attr->log_size ||
> +                log_level != common_attr->log_level))
> +               return -EUSERS;
> +
> +       if (!log_buf && common_attr && common_attr->log_buf) {
> +               log_attr->log_buf = common_attr->log_buf;
> +               log_attr->log_size = common_attr->log_size;
> +               log_attr->log_level = common_attr->log_level;
> +       }
> +
>         return 0;
>  }
>

[...]

^ permalink raw reply

* Re: [PATCH bpf-next v5 2/9] libbpf: Add support for extended bpf syscall
From: Andrii Nakryiko @ 2026-01-16  0:42 UTC (permalink / raw)
  To: Leon Hwang
  Cc: bpf, Alexei Starovoitov, Daniel Borkmann, John Fastabend,
	Andrii Nakryiko, Martin KaFai Lau, Eduard Zingerman, Song Liu,
	Yonghong Song, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Shuah Khan, Christian Brauner, Seth Forshee, Yuichiro Tsuji,
	Andrey Albershteyn, Willem de Bruijn, Jason Xing, Tao Chen,
	Mykyta Yatsenko, Kumar Kartikeya Dwivedi, Anton Protopopov,
	Amery Hung, Rong Tao, linux-kernel, linux-api, linux-kselftest,
	kernel-patches-bot
In-Reply-To: <20260112145616.44195-3-leon.hwang@linux.dev>

On Mon, Jan 12, 2026 at 6:58 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>
> To support the extended BPF syscall introduced in the previous commit,
> introduce the following internal APIs:
>
> * 'sys_bpf_ext()'
> * 'sys_bpf_ext_fd()'
>   They wrap the raw 'syscall()' interface to support passing extended
>   attributes.
> * 'probe_sys_bpf_ext()'
>   Check whether current kernel supports the extended attributes.
>
> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
> ---
>  tools/lib/bpf/bpf.c             | 34 +++++++++++++++++++++++++++++++++
>  tools/lib/bpf/features.c        |  8 ++++++++
>  tools/lib/bpf/libbpf_internal.h |  3 +++
>  3 files changed, 45 insertions(+)
>
> diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
> index 21b57a629916..d44e667aaf02 100644
> --- a/tools/lib/bpf/bpf.c
> +++ b/tools/lib/bpf/bpf.c
> @@ -69,6 +69,40 @@ static inline __u64 ptr_to_u64(const void *ptr)
>         return (__u64) (unsigned long) ptr;
>  }
>
> +static inline int sys_bpf_ext(enum bpf_cmd cmd, union bpf_attr *attr,
> +                             unsigned int size,
> +                             struct bpf_common_attr *common_attr,

nit: kernel uses consistent attr_common/size_common pattern, but here
you are inverting attr_common -> common_attr, let's not?

> +                             unsigned int size_common)
> +{
> +       cmd = common_attr ? (cmd | BPF_COMMON_ATTRS) : (cmd & ~BPF_COMMON_ATTRS);
> +       return syscall(__NR_bpf, cmd, attr, size, common_attr, size_common);
> +}
> +
> +static inline int sys_bpf_ext_fd(enum bpf_cmd cmd, union bpf_attr *attr,
> +                                unsigned int size,
> +                                struct bpf_common_attr *common_attr,
> +                                unsigned int size_common)
> +{
> +       int fd;
> +
> +       fd = sys_bpf_ext(cmd, attr, size, common_attr, size_common);
> +       return ensure_good_fd(fd);
> +}
> +
> +int probe_sys_bpf_ext(void)
> +{
> +       const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd);
> +       union bpf_attr attr;
> +       int fd;
> +
> +       memset(&attr, 0, attr_sz);
> +       fd = syscall(__NR_bpf, BPF_PROG_LOAD | BPF_COMMON_ATTRS, &attr, attr_sz, NULL,
> +                    sizeof(struct bpf_common_attr));
> +       if (fd >= 0)
> +               close(fd);

hm... close can change errno, this is fragile. If fd >= 0, something
is wrong with our detection, just return error right away?

> +       return errno == EFAULT;
> +}
> +
>  static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
>                           unsigned int size)
>  {
> diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c
> index b842b83e2480..d786a815f1ae 100644
> --- a/tools/lib/bpf/features.c
> +++ b/tools/lib/bpf/features.c
> @@ -506,6 +506,11 @@ static int probe_kern_arg_ctx_tag(int token_fd)
>         return probe_fd(prog_fd);
>  }
>
> +static int probe_kern_extended_syscall(int token_fd)
> +{
> +       return probe_sys_bpf_ext();
> +}
> +
>  typedef int (*feature_probe_fn)(int /* token_fd */);
>
>  static struct kern_feature_cache feature_cache;
> @@ -581,6 +586,9 @@ static struct kern_feature_desc {
>         [FEAT_BTF_QMARK_DATASEC] = {
>                 "BTF DATASEC names starting from '?'", probe_kern_btf_qmark_datasec,
>         },
> +       [FEAT_EXTENDED_SYSCALL] = {
> +               "Kernel supports extended syscall", probe_kern_extended_syscall,

"extended syscall" is a bit vague... We specifically detect common
attrs support, maybe say that?

> +       },
>  };
>
>  bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id)
> diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
> index fc59b21b51b5..e2a6ef4b45ae 100644
> --- a/tools/lib/bpf/libbpf_internal.h
> +++ b/tools/lib/bpf/libbpf_internal.h
> @@ -392,6 +392,8 @@ enum kern_feature_id {
>         FEAT_ARG_CTX_TAG,
>         /* Kernel supports '?' at the front of datasec names */
>         FEAT_BTF_QMARK_DATASEC,
> +       /* Kernel supports extended syscall */
> +       FEAT_EXTENDED_SYSCALL,

FEAT_BPF_COMMON_ATTRS ?

>         __FEAT_CNT,
>  };
>
> @@ -757,4 +759,5 @@ int probe_fd(int fd);
>  #define SHA256_DWORD_SIZE SHA256_DIGEST_LENGTH / sizeof(__u64)
>
>  void libbpf_sha256(const void *data, size_t len, __u8 out[SHA256_DIGEST_LENGTH]);
> +int probe_sys_bpf_ext(void);
>  #endif /* __LIBBPF_LIBBPF_INTERNAL_H */
> --
> 2.52.0
>

^ permalink raw reply

* Re: O_CLOEXEC use for OPEN_TREE_CLOEXEC
From: Florian Weimer @ 2026-01-15  8:55 UTC (permalink / raw)
  To: Christian Brauner
  Cc: linux-fsdevel, linux-api, linux-kernel, Al Viro, David Howells,
	DJ Delorie
In-Reply-To: <20260114-alias-riefen-2cb8c09d0ded@brauner>

* Christian Brauner:

> On Tue, Jan 13, 2026 at 11:40:55PM +0100, Florian Weimer wrote:
>> In <linux/mount.h>, we have this:
>> 
>> #define OPEN_TREE_CLOEXEC      O_CLOEXEC       /* Close the file on execve() */
>> 
>> This causes a few pain points for us to on the glibc side when we mirror
>> this into <linux/mount.h> becuse O_CLOEXEC is defined in <fcntl.h>,
>> which is one of the headers that's completely incompatible with the UAPI
>> headers.
>> 
>> The reason why this is painful is because O_CLOEXEC has at least three
>> different values across architectures: 0x80000, 0x200000, 0x400000
>> 
>> Even for the UAPI this isn't ideal because it effectively burns three
>> open_tree flags, unless the flags are made architecture-specific, too.
>
> I think that just got cargo-culted... A long time ago some API define as
> O_CLOEXEC and now a lot of APIs have done the same.

Yes, it looks like inotify is in the same boat.

> I'm pretty sure we can't change that now but we can document that this
> shouldn't be ifdefed and instead be a separate per-syscall bit. But I
> think that's the best we can do right now.

Maybe add something like this as a safety measure, to ensure that the
flags don't overlap?

diff --git a/fs/namespace.c b/fs/namespace.c
index c58674a20cad..5bbfd379ec44 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3069,6 +3069,9 @@ static struct file *vfs_open_tree(int dfd, const char __user *filename, unsigned
 	bool detached = flags & OPEN_TREE_CLONE;
 
 	BUILD_BUG_ON(OPEN_TREE_CLOEXEC != O_CLOEXEC);
+	BUILD_BUG_IN(!(O_CLOEXEC & OPEN_TREE_CLONE));
+	BUILD_BUG_ON(!((AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_RECURSIVE | AT_SYMLINK_NOFOLLOW) &
+		       (O_CLOEXEC | OPEN_TREE_CLONE)));
 
 	if (flags & ~(AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_RECURSIVE |
 		      AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLONE |
@@ -3100,7 +3103,7 @@ static struct file *vfs_open_tree(int dfd, const char __user *filename, unsigned
 
 SYSCALL_DEFINE3(open_tree, int, dfd, const char __user *, filename, unsigned, flags)
 {
-	return FD_ADD(flags, vfs_open_tree(dfd, filename, flags));
+	return FD_ADD(flags & O_CLOEXEC, vfs_open_tree(dfd, filename, flags));
 }
 
 /*

(Completely untested.)

Passing the mix of flags to FD_ADD isn't really future-proof if FD_ADD
ever recognizes more than just O_CLOEXEC.

Thanks,
Florian


^ permalink raw reply related

* Re: O_CLOEXEC use for OPEN_TREE_CLOEXEC
From: Aleksa Sarai @ 2026-01-14 21:18 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Christian Brauner, Florian Weimer, linux-fsdevel, linux-api,
	linux-kernel, Al Viro, David Howells, DJ Delorie
In-Reply-To: <CALCETrWMWs3_G5JhJb7+h+JQjpqXxqOh2vNcQaG1HuXjaeCqQw@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 2010 bytes --]

On 2026-01-14, Andy Lutomirski <luto@amacapital.net> wrote:
> On Wed, Jan 14, 2026 at 8:09 AM Christian Brauner <brauner@kernel.org> wrote:
> >
> > On Tue, Jan 13, 2026 at 11:40:55PM +0100, Florian Weimer wrote:
> > > In <linux/mount.h>, we have this:
> > >
> > > #define OPEN_TREE_CLOEXEC      O_CLOEXEC       /* Close the file on execve() */
> > >
> > > This causes a few pain points for us to on the glibc side when we mirror
> > > this into <linux/mount.h> becuse O_CLOEXEC is defined in <fcntl.h>,
> > > which is one of the headers that's completely incompatible with the UAPI
> > > headers.
> > >
> > > The reason why this is painful is because O_CLOEXEC has at least three
> > > different values across architectures: 0x80000, 0x200000, 0x400000
> > >
> > > Even for the UAPI this isn't ideal because it effectively burns three
> > > open_tree flags, unless the flags are made architecture-specific, too.
> >
> > I think that just got cargo-culted... A long time ago some API define as
> > O_CLOEXEC and now a lot of APIs have done the same. I'm pretty sure we
> > can't change that now but we can document that this shouldn't be ifdefed
> > and instead be a separate per-syscall bit. But I think that's the best
> > we can do right now.
> >
> 
> How about, for future syscalls, we make CLOEXEC unconditional?  If
> anyone wants an ofd to get inherited across exec, they can F_SETFD it
> themselves.

I believe newer interfaces have already started doing that (e.g., all of
the pidfd stuff is O_CLOEXEC by default) but we should definitely update
the documentation in Documentation/process/adding-syscalls.rst to stop
recommending the inclusion of the O_CLOEXEC flag.

The funniest thing about open_tree(2) is that it actually borrows flag
bits from three distinct namespaces! It has an OPEN_TREE_* namespace,
the AT_* namespace (which now has a concept of "per-syscall flags"), and
O_CLOEXEC. What a fun interface!

-- 
Aleksa Sarai
https://www.cyphar.com/

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 265 bytes --]

^ permalink raw reply

* Re: O_CLOEXEC use for OPEN_TREE_CLOEXEC
From: Andy Lutomirski @ 2026-01-14 19:42 UTC (permalink / raw)
  To: Christian Brauner
  Cc: Florian Weimer, linux-fsdevel, linux-api, linux-kernel, Al Viro,
	David Howells, DJ Delorie
In-Reply-To: <20260114-alias-riefen-2cb8c09d0ded@brauner>

On Wed, Jan 14, 2026 at 8:09 AM Christian Brauner <brauner@kernel.org> wrote:
>
> On Tue, Jan 13, 2026 at 11:40:55PM +0100, Florian Weimer wrote:
> > In <linux/mount.h>, we have this:
> >
> > #define OPEN_TREE_CLOEXEC      O_CLOEXEC       /* Close the file on execve() */
> >
> > This causes a few pain points for us to on the glibc side when we mirror
> > this into <linux/mount.h> becuse O_CLOEXEC is defined in <fcntl.h>,
> > which is one of the headers that's completely incompatible with the UAPI
> > headers.
> >
> > The reason why this is painful is because O_CLOEXEC has at least three
> > different values across architectures: 0x80000, 0x200000, 0x400000
> >
> > Even for the UAPI this isn't ideal because it effectively burns three
> > open_tree flags, unless the flags are made architecture-specific, too.
>
> I think that just got cargo-culted... A long time ago some API define as
> O_CLOEXEC and now a lot of APIs have done the same. I'm pretty sure we
> can't change that now but we can document that this shouldn't be ifdefed
> and instead be a separate per-syscall bit. But I think that's the best
> we can do right now.
>

How about, for future syscalls, we make CLOEXEC unconditional?  If
anyone wants an ofd to get inherited across exec, they can F_SETFD it
themselves.

--Andy

^ permalink raw reply

* Re: [PATCH v8 14/18] mm: memfd_luo: allow preserving memfd
From: Pratyush Yadav @ 2026-01-14 19:02 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Pasha Tatashin, pratyush, jasonmiu, graf, rppt, dmatlack,
	rientjes, corbet, rdunlap, ilpo.jarvinen, kanie, ojeda, aliceryhl,
	masahiroy, akpm, tj, yoann.congal, mmaurer, roman.gushchin,
	chenridong, axboe, mark.rutland, jannh, vincent.guittot, hannes,
	dan.j.williams, david, joel.granados, rostedt, anna.schumaker,
	song, linux, linux-kernel, linux-doc, linux-mm, gregkh, tglx,
	mingo, bp, dave.hansen, x86, hpa, rafael, dakr,
	bartosz.golaszewski, cw00.choi, myungjoo.ham, yesanishhere,
	Jonathan.Cameron, quic_zijuhu, aleksander.lobakin, ira.weiny,
	andriy.shevchenko, leon, lukas, bhelgaas, wagi, djeffery,
	stuart.w.hayes, lennart, brauner, linux-api, linux-fsdevel,
	saeedm, ajayachandra, parav, leonro, witu, hughd, skhawaja,
	chrisl
In-Reply-To: <20260107185414.GG293394@nvidia.com>

On Wed, Jan 07 2026, Jason Gunthorpe wrote:

> On Tue, Nov 25, 2025 at 11:58:44AM -0500, Pasha Tatashin wrote:
>> From: Pratyush Yadav <ptyadav@amazon.de>
>> 
>> The ability to preserve a memfd allows userspace to use KHO and LUO to
>> transfer its memory contents to the next kernel. This is useful in many
>> ways. For one, it can be used with IOMMUFD as the backing store for
>> IOMMU page tables. Preserving IOMMUFD is essential for performing a
>> hypervisor live update with passthrough devices. memfd support provides
>> the first building block for making that possible.
>
> I would lead with the use of memfd to back the guest memory pages for
> use with KVM :)

I would assume using 1G-page-backed memfd is the more common use case,
and this patch doesn't come with 1G page support.

Anyway, the patch is now already applied so we can't go back and fix
the commit message...

-- 
Regards,
Pratyush Yadav

^ permalink raw reply

* Re: [PATCH v8 14/18] mm: memfd_luo: allow preserving memfd
From: Pratyush Yadav @ 2026-01-14 18:59 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: Chris Mason, Pratyush Yadav, Pasha Tatashin, jasonmiu, graf,
	dmatlack, rientjes, corbet, rdunlap, ilpo.jarvinen, kanie, ojeda,
	aliceryhl, masahiroy, akpm, tj, yoann.congal, mmaurer,
	roman.gushchin, chenridong, axboe, mark.rutland, jannh,
	vincent.guittot, hannes, dan.j.williams, david, joel.granados,
	rostedt, anna.schumaker, song, linux, linux-kernel, linux-doc,
	linux-mm, gregkh, tglx, mingo, bp, dave.hansen, x86, hpa, rafael,
	dakr, bartosz.golaszewski, cw00.choi, myungjoo.ham, yesanishhere,
	Jonathan.Cameron, quic_zijuhu, aleksander.lobakin, ira.weiny,
	andriy.shevchenko, leon, lukas, bhelgaas, wagi, djeffery,
	stuart.w.hayes, lennart, brauner, linux-api, linux-fsdevel,
	saeedm, ajayachandra, jgg, parav, leonro, witu, hughd, skhawaja,
	chrisl
In-Reply-To: <aWfLS48tG7XInpNN@kernel.org>

On Wed, Jan 14 2026, Mike Rapoport wrote:

> On Tue, Jan 13, 2026 at 06:09:23AM -0800, Chris Mason wrote:
>> On Tue, 25 Nov 2025 11:58:44 -0500 Pasha Tatashin <pasha.tatashin@soleen.com> wrote:
>> 
>> Hi everyone,
>> 
>> I'm running the v6.19 mm commits through patch review automation, and this
>> commit was flagged.  I don't know this code well, but it looks like it
>> might be a real bug.
>> 
>> [AI review output below]
>> 
>> > diff --git a/mm/memfd_luo.c b/mm/memfd_luo.c
>> > new file mode 100644
>> > index 0000000000000..4f6ba63b43105
>> > --- /dev/null
>> > +++ b/mm/memfd_luo.c
>> 
>> [ ... ]
>> 
>> > +static int memfd_luo_retrieve_folios(struct file *file,
>> > +				     struct memfd_luo_folio_ser *folios_ser,
>> > +				     u64 nr_folios)
>> > +{
>> > +	struct inode *inode = file_inode(file);
>> > +	struct address_space *mapping = inode->i_mapping;
>> > +	struct folio *folio;
>> > +	int err = -EIO;
>> > +	long i;
>> > +
>> > +	for (i = 0; i < nr_folios; i++) {
>> 
>> [ ... ]
>> 
>> > +		err = shmem_add_to_page_cache(folio, mapping, index, NULL,
>> > +					      mapping_gfp_mask(mapping));
>> > +		if (err) {
>> > +			pr_err("shmem: failed to add to page cache folio index %ld: %d\n",
>> > +			       i, err);
>> > +			goto unlock_folio;
>> > +		}
>> > +
>> > +		if (flags & MEMFD_LUO_FOLIO_UPTODATE)
>> > +			folio_mark_uptodate(folio);
>> > +		if (flags & MEMFD_LUO_FOLIO_DIRTY)
>> > +			folio_mark_dirty(folio);
>> > +
>> > +		err = shmem_inode_acct_blocks(inode, 1);
>> > +		if (err) {
>> > +			pr_err("shmem: failed to account folio index %ld: %d\n",
>> > +			       i, err);
>> > +			goto unlock_folio;
>> > +		}
>> 
>> When shmem_inode_acct_blocks() fails here, the folio has already been
>> added to the page cache by shmem_add_to_page_cache(). Should the folio be
>> removed from the page cache before going to unlock_folio?
>> 
>> Looking at shmem_alloc_and_add_folio() in mm/shmem.c, when
>> shmem_inode_acct_blocks() fails after the folio has been added, it calls
>> filemap_remove_folio() to remove it:
>> 
>>     error = shmem_inode_acct_blocks(inode, pages);
>>     if (error) {
>>         ...
>>         if (error) {
>>             filemap_remove_folio(folio);
>>             goto unlock;
>>         }
>>     }
>> 
>> Without this, the folio remains in the page cache (counted in
>> mapping->nrpages) but info->alloced is not incremented (since
>> shmem_recalc_inode is not called). This could cause shmem accounting
>> inconsistency.
>  
> My understanding that if anything fails in memfd_luo_retrieve_folios() the
> file is destroyed anyway and the accounting wouldn't matter.
>
> But to be on the safe side we should fix the error handling here.
> @Pratyush, what do you say?

Yeah, I don't think the inode's alloced accounting is a real issue here
since the file will be destroyed immediately after. This is why I didn't
want to add the extra complexity of the error handling.

But now that I think of it, perhaps the lingering unaccounted folio
might cause an underflow in vm_committed_as. shmem_inode_acct_blocks()
cleans up the vm_acct_memory() call in case of failure. But perhaps the
iput() triggers an extra shmem_unacct_memory() because of the lingering
folio.

I am not 100% sure that can actually happen since the code is a bit
complex. Let me check and get back to you.

-- 
Regards,
Pratyush Yadav

^ permalink raw reply

* Re: [PATCH v8 14/18] mm: memfd_luo: allow preserving memfd
From: Mike Rapoport @ 2026-01-14 16:58 UTC (permalink / raw)
  To: Chris Mason, Pratyush Yadav
  Cc: Pasha Tatashin, jasonmiu, graf, dmatlack, rientjes, corbet,
	rdunlap, ilpo.jarvinen, kanie, ojeda, aliceryhl, masahiroy, akpm,
	tj, yoann.congal, mmaurer, roman.gushchin, chenridong, axboe,
	mark.rutland, jannh, vincent.guittot, hannes, dan.j.williams,
	david, joel.granados, rostedt, anna.schumaker, song, linux,
	linux-kernel, linux-doc, linux-mm, gregkh, tglx, mingo, bp,
	dave.hansen, x86, hpa, rafael, dakr, bartosz.golaszewski,
	cw00.choi, myungjoo.ham, yesanishhere, Jonathan.Cameron,
	quic_zijuhu, aleksander.lobakin, ira.weiny, andriy.shevchenko,
	leon, lukas, bhelgaas, wagi, djeffery, stuart.w.hayes, ptyadav,
	lennart, brauner, linux-api, linux-fsdevel, saeedm, ajayachandra,
	jgg, parav, leonro, witu, hughd, skhawaja, chrisl
In-Reply-To: <20260113140927.1074142-1-clm@meta.com>

On Tue, Jan 13, 2026 at 06:09:23AM -0800, Chris Mason wrote:
> On Tue, 25 Nov 2025 11:58:44 -0500 Pasha Tatashin <pasha.tatashin@soleen.com> wrote:
> 
> Hi everyone,
> 
> I'm running the v6.19 mm commits through patch review automation, and this
> commit was flagged.  I don't know this code well, but it looks like it
> might be a real bug.
> 
> [AI review output below]
> 
> > diff --git a/mm/memfd_luo.c b/mm/memfd_luo.c
> > new file mode 100644
> > index 0000000000000..4f6ba63b43105
> > --- /dev/null
> > +++ b/mm/memfd_luo.c
> 
> [ ... ]
> 
> > +static int memfd_luo_retrieve_folios(struct file *file,
> > +				     struct memfd_luo_folio_ser *folios_ser,
> > +				     u64 nr_folios)
> > +{
> > +	struct inode *inode = file_inode(file);
> > +	struct address_space *mapping = inode->i_mapping;
> > +	struct folio *folio;
> > +	int err = -EIO;
> > +	long i;
> > +
> > +	for (i = 0; i < nr_folios; i++) {
> 
> [ ... ]
> 
> > +		err = shmem_add_to_page_cache(folio, mapping, index, NULL,
> > +					      mapping_gfp_mask(mapping));
> > +		if (err) {
> > +			pr_err("shmem: failed to add to page cache folio index %ld: %d\n",
> > +			       i, err);
> > +			goto unlock_folio;
> > +		}
> > +
> > +		if (flags & MEMFD_LUO_FOLIO_UPTODATE)
> > +			folio_mark_uptodate(folio);
> > +		if (flags & MEMFD_LUO_FOLIO_DIRTY)
> > +			folio_mark_dirty(folio);
> > +
> > +		err = shmem_inode_acct_blocks(inode, 1);
> > +		if (err) {
> > +			pr_err("shmem: failed to account folio index %ld: %d\n",
> > +			       i, err);
> > +			goto unlock_folio;
> > +		}
> 
> When shmem_inode_acct_blocks() fails here, the folio has already been
> added to the page cache by shmem_add_to_page_cache(). Should the folio be
> removed from the page cache before going to unlock_folio?
> 
> Looking at shmem_alloc_and_add_folio() in mm/shmem.c, when
> shmem_inode_acct_blocks() fails after the folio has been added, it calls
> filemap_remove_folio() to remove it:
> 
>     error = shmem_inode_acct_blocks(inode, pages);
>     if (error) {
>         ...
>         if (error) {
>             filemap_remove_folio(folio);
>             goto unlock;
>         }
>     }
> 
> Without this, the folio remains in the page cache (counted in
> mapping->nrpages) but info->alloced is not incremented (since
> shmem_recalc_inode is not called). This could cause shmem accounting
> inconsistency.
 
My understanding that if anything fails in memfd_luo_retrieve_folios() the
file is destroyed anyway and the accounting wouldn't matter.

But to be on the safe side we should fix the error handling here.
@Pratyush, what do you say?

-- 
Sincerely yours,
Mike.

^ permalink raw reply

* Re: O_CLOEXEC use for OPEN_TREE_CLOEXEC
From: Christian Brauner @ 2026-01-14 16:03 UTC (permalink / raw)
  To: Florian Weimer
  Cc: linux-fsdevel, linux-api, linux-kernel, Al Viro, David Howells,
	DJ Delorie
In-Reply-To: <lhupl7dcf0o.fsf@oldenburg.str.redhat.com>

On Tue, Jan 13, 2026 at 11:40:55PM +0100, Florian Weimer wrote:
> In <linux/mount.h>, we have this:
> 
> #define OPEN_TREE_CLOEXEC      O_CLOEXEC       /* Close the file on execve() */
> 
> This causes a few pain points for us to on the glibc side when we mirror
> this into <linux/mount.h> becuse O_CLOEXEC is defined in <fcntl.h>,
> which is one of the headers that's completely incompatible with the UAPI
> headers.
> 
> The reason why this is painful is because O_CLOEXEC has at least three
> different values across architectures: 0x80000, 0x200000, 0x400000
> 
> Even for the UAPI this isn't ideal because it effectively burns three
> open_tree flags, unless the flags are made architecture-specific, too.

I think that just got cargo-culted... A long time ago some API define as
O_CLOEXEC and now a lot of APIs have done the same. I'm pretty sure we
can't change that now but we can document that this shouldn't be ifdefed
and instead be a separate per-syscall bit. But I think that's the best
we can do right now.

^ permalink raw reply

* O_CLOEXEC use for OPEN_TREE_CLOEXEC
From: Florian Weimer @ 2026-01-13 22:40 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: linux-api, linux-kernel, Al Viro, David Howells, DJ Delorie

In <linux/mount.h>, we have this:

#define OPEN_TREE_CLOEXEC      O_CLOEXEC       /* Close the file on execve() */

This causes a few pain points for us to on the glibc side when we mirror
this into <linux/mount.h> becuse O_CLOEXEC is defined in <fcntl.h>,
which is one of the headers that's completely incompatible with the UAPI
headers.

The reason why this is painful is because O_CLOEXEC has at least three
different values across architectures: 0x80000, 0x200000, 0x400000

Even for the UAPI this isn't ideal because it effectively burns three
open_tree flags, unless the flags are made architecture-specific, too.

Thanks,
Florian


^ permalink raw reply

* Re: [PATCH v8 14/18] mm: memfd_luo: allow preserving memfd
From: Chris Mason @ 2026-01-13 14:09 UTC (permalink / raw)
  To: Pasha Tatashin
  Cc: Chris Mason, jasonmiu, graf, rppt, dmatlack, rientjes, corbet,
	rdunlap, ilpo.jarvinen, kanie, ojeda, aliceryhl, masahiroy, akpm,
	tj, yoann.congal, mmaurer, roman.gushchin, chenridong, axboe,
	mark.rutland, jannh, vincent.guittot, hannes, dan.j.williams,
	david, joel.granados, rostedt, anna.schumaker, song, linux,
	linux-kernel, linux-doc, linux-mm, gregkh, tglx, mingo, bp,
	dave.hansen, x86, hpa, rafael, dakr, bartosz.golaszewski,
	cw00.choi, myungjoo.ham, yesanishhere, Jonathan.Cameron,
	quic_zijuhu, aleksander.lobakin, ira.weiny, andriy.shevchenko,
	leon, lukas, bhelgaas, wagi, djeffery, stuart.w.hayes, ptyadav,
	lennart, brauner, linux-api, linux-fsdevel, saeedm, ajayachandra,
	jgg, parav, leonro, witu, hughd, skhawaja, chrisl
In-Reply-To: <20251125165850.3389713-15-pasha.tatashin@soleen.com>

On Tue, 25 Nov 2025 11:58:44 -0500 Pasha Tatashin <pasha.tatashin@soleen.com> wrote:

Hi everyone,

I'm running the v6.19 mm commits through patch review automation, and this
commit was flagged.  I don't know this code well, but it looks like it
might be a real bug.

[AI review output below]

> diff --git a/mm/memfd_luo.c b/mm/memfd_luo.c
> new file mode 100644
> index 0000000000000..4f6ba63b43105
> --- /dev/null
> +++ b/mm/memfd_luo.c

[ ... ]

> +static int memfd_luo_retrieve_folios(struct file *file,
> +				     struct memfd_luo_folio_ser *folios_ser,
> +				     u64 nr_folios)
> +{
> +	struct inode *inode = file_inode(file);
> +	struct address_space *mapping = inode->i_mapping;
> +	struct folio *folio;
> +	int err = -EIO;
> +	long i;
> +
> +	for (i = 0; i < nr_folios; i++) {

[ ... ]

> +		err = shmem_add_to_page_cache(folio, mapping, index, NULL,
> +					      mapping_gfp_mask(mapping));
> +		if (err) {
> +			pr_err("shmem: failed to add to page cache folio index %ld: %d\n",
> +			       i, err);
> +			goto unlock_folio;
> +		}
> +
> +		if (flags & MEMFD_LUO_FOLIO_UPTODATE)
> +			folio_mark_uptodate(folio);
> +		if (flags & MEMFD_LUO_FOLIO_DIRTY)
> +			folio_mark_dirty(folio);
> +
> +		err = shmem_inode_acct_blocks(inode, 1);
> +		if (err) {
> +			pr_err("shmem: failed to account folio index %ld: %d\n",
> +			       i, err);
> +			goto unlock_folio;
> +		}

When shmem_inode_acct_blocks() fails here, the folio has already been
added to the page cache by shmem_add_to_page_cache(). Should the folio be
removed from the page cache before going to unlock_folio?

Looking at shmem_alloc_and_add_folio() in mm/shmem.c, when
shmem_inode_acct_blocks() fails after the folio has been added, it calls
filemap_remove_folio() to remove it:

    error = shmem_inode_acct_blocks(inode, pages);
    if (error) {
        ...
        if (error) {
            filemap_remove_folio(folio);
            goto unlock;
        }
    }

Without this, the folio remains in the page cache (counted in
mapping->nrpages) but info->alloced is not incremented (since
shmem_recalc_inode is not called). This could cause shmem accounting
inconsistency.



^ permalink raw reply

* Re: [PATCHSET v5] fs: generic file IO error reporting
From: Christian Brauner @ 2026-01-13  8:58 UTC (permalink / raw)
  To: Darrick J. Wong
  Cc: Christian Brauner, linux-api, jack, hch, linux-xfs, linux-ext4,
	linux-fsdevel, gabriel, amir73il, Gao Xiang
In-Reply-To: <176826402528.3490369.2415315475116356277.stgit@frogsfrogsfrogs>

On Mon, 12 Jan 2026 16:31:03 -0800, Darrick J. Wong wrote:
> This patchset adds some generic helpers so that filesystems can report
> errors to fsnotify in a standard way.  Then it adapts iomap to use the
> generic helpers so that any iomap-enabled filesystem can report I/O
> errors through this mechanism as well.  Finally, it makes XFS report
> metadata errors through this mechanism in much the same way that ext4
> does now.
> 
> [...]

Applied to the vfs-7.0.fserror branch of the vfs/vfs.git tree.
Patches in the vfs-7.0.fserror branch should appear in linux-next soon.

Please report any outstanding bugs that were missed during review in a
new review to the original patch series allowing us to drop it.

It's encouraged to provide Acked-bys and Reviewed-bys even though the
patch has now been applied. If possible patch trailers will be updated.

Note that commit hashes shown below are subject to change due to rebase,
trailer updates or similar. If in doubt, please check the listed branch.

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
branch: vfs-7.0.fserror

[1/6] uapi: promote EFSCORRUPTED and EUCLEAN to errno.h
      https://git.kernel.org/vfs/vfs/c/602544773763
[2/6] fs: report filesystem and file I/O errors to fsnotify
      https://git.kernel.org/vfs/vfs/c/21945e6cb516
[3/6] iomap: report file I/O errors to the VFS
      https://git.kernel.org/vfs/vfs/c/a9d573ee88af
[4/6] xfs: report fs metadata errors via fsnotify
      https://git.kernel.org/vfs/vfs/c/efd87a100729
[5/6] xfs: translate fsdax media errors into file "data lost" errors when convenient
      https://git.kernel.org/vfs/vfs/c/94503211d2fd
[6/6] ext4: convert to new fserror helpers
      https://git.kernel.org/vfs/vfs/c/81d2e13a57c9

^ permalink raw reply

* [PATCH 1/6] uapi: promote EFSCORRUPTED and EUCLEAN to errno.h
From: Darrick J. Wong @ 2026-01-13  0:31 UTC (permalink / raw)
  To: djwong, brauner
  Cc: hch, hsiangkao, jack, linux-api, linux-xfs, jack, linux-ext4,
	linux-fsdevel, gabriel, hch, amir73il
In-Reply-To: <176826402528.3490369.2415315475116356277.stgit@frogsfrogsfrogs>

From: Darrick J. Wong <djwong@kernel.org>

Stop definining these privately and instead move them to the uapi
errno.h so that they become canonical instead of copy pasta.

Cc: linux-api@vger.kernel.org
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 arch/alpha/include/uapi/asm/errno.h        |    2 ++
 arch/mips/include/uapi/asm/errno.h         |    2 ++
 arch/parisc/include/uapi/asm/errno.h       |    2 ++
 arch/sparc/include/uapi/asm/errno.h        |    2 ++
 fs/erofs/internal.h                        |    2 --
 fs/ext2/ext2.h                             |    1 -
 fs/ext4/ext4.h                             |    3 ---
 fs/f2fs/f2fs.h                             |    3 ---
 fs/minix/minix.h                           |    2 --
 fs/udf/udf_sb.h                            |    2 --
 fs/xfs/xfs_linux.h                         |    2 --
 include/linux/jbd2.h                       |    3 ---
 include/uapi/asm-generic/errno.h           |    2 ++
 tools/arch/alpha/include/uapi/asm/errno.h  |    2 ++
 tools/arch/mips/include/uapi/asm/errno.h   |    2 ++
 tools/arch/parisc/include/uapi/asm/errno.h |    2 ++
 tools/arch/sparc/include/uapi/asm/errno.h  |    2 ++
 tools/include/uapi/asm-generic/errno.h     |    2 ++
 18 files changed, 20 insertions(+), 18 deletions(-)


diff --git a/arch/alpha/include/uapi/asm/errno.h b/arch/alpha/include/uapi/asm/errno.h
index 3d265f6babaf0a..6791f6508632ee 100644
--- a/arch/alpha/include/uapi/asm/errno.h
+++ b/arch/alpha/include/uapi/asm/errno.h
@@ -55,6 +55,7 @@
 #define	ENOSR		82	/* Out of streams resources */
 #define	ETIME		83	/* Timer expired */
 #define	EBADMSG		84	/* Not a data message */
+#define	EFSBADCRC	EBADMSG	/* Bad CRC detected */
 #define	EPROTO		85	/* Protocol error */
 #define	ENODATA		86	/* No data available */
 #define	ENOSTR		87	/* Device not a stream */
@@ -96,6 +97,7 @@
 #define	EREMCHG		115	/* Remote address changed */
 
 #define	EUCLEAN		117	/* Structure needs cleaning */
+#define	EFSCORRUPTED	EUCLEAN	/* Filesystem is corrupted */
 #define	ENOTNAM		118	/* Not a XENIX named type file */
 #define	ENAVAIL		119	/* No XENIX semaphores available */
 #define	EISNAM		120	/* Is a named type file */
diff --git a/arch/mips/include/uapi/asm/errno.h b/arch/mips/include/uapi/asm/errno.h
index 2fb714e2d6d8fc..c01ed91b1ef44b 100644
--- a/arch/mips/include/uapi/asm/errno.h
+++ b/arch/mips/include/uapi/asm/errno.h
@@ -50,6 +50,7 @@
 #define EDOTDOT		73	/* RFS specific error */
 #define EMULTIHOP	74	/* Multihop attempted */
 #define EBADMSG		77	/* Not a data message */
+#define EFSBADCRC	EBADMSG	/* Bad CRC detected */
 #define ENAMETOOLONG	78	/* File name too long */
 #define EOVERFLOW	79	/* Value too large for defined data type */
 #define ENOTUNIQ	80	/* Name not unique on network */
@@ -88,6 +89,7 @@
 #define EISCONN		133	/* Transport endpoint is already connected */
 #define ENOTCONN	134	/* Transport endpoint is not connected */
 #define EUCLEAN		135	/* Structure needs cleaning */
+#define EFSCORRUPTED	EUCLEAN	/* Filesystem is corrupted */
 #define ENOTNAM		137	/* Not a XENIX named type file */
 #define ENAVAIL		138	/* No XENIX semaphores available */
 #define EISNAM		139	/* Is a named type file */
diff --git a/arch/parisc/include/uapi/asm/errno.h b/arch/parisc/include/uapi/asm/errno.h
index 8d94739d75c67c..8cbc07c1903e4c 100644
--- a/arch/parisc/include/uapi/asm/errno.h
+++ b/arch/parisc/include/uapi/asm/errno.h
@@ -36,6 +36,7 @@
 
 #define	EDOTDOT		66	/* RFS specific error */
 #define	EBADMSG		67	/* Not a data message */
+#define	EFSBADCRC	EBADMSG	/* Bad CRC detected */
 #define	EUSERS		68	/* Too many users */
 #define	EDQUOT		69	/* Quota exceeded */
 #define	ESTALE		70	/* Stale file handle */
@@ -62,6 +63,7 @@
 #define	ERESTART	175	/* Interrupted system call should be restarted */
 #define	ESTRPIPE	176	/* Streams pipe error */
 #define	EUCLEAN		177	/* Structure needs cleaning */
+#define	EFSCORRUPTED	EUCLEAN	/* Filesystem is corrupted */
 #define	ENOTNAM		178	/* Not a XENIX named type file */
 #define	ENAVAIL		179	/* No XENIX semaphores available */
 #define	EISNAM		180	/* Is a named type file */
diff --git a/arch/sparc/include/uapi/asm/errno.h b/arch/sparc/include/uapi/asm/errno.h
index 81a732b902ee38..4a41e7835fd5b8 100644
--- a/arch/sparc/include/uapi/asm/errno.h
+++ b/arch/sparc/include/uapi/asm/errno.h
@@ -48,6 +48,7 @@
 #define	ENOSR		74	/* Out of streams resources */
 #define	ENOMSG		75	/* No message of desired type */
 #define	EBADMSG		76	/* Not a data message */
+#define	EFSBADCRC	EBADMSG	/* Bad CRC detected */
 #define	EIDRM		77	/* Identifier removed */
 #define	EDEADLK		78	/* Resource deadlock would occur */
 #define	ENOLCK		79	/* No record locks available */
@@ -91,6 +92,7 @@
 #define	ENOTUNIQ	115	/* Name not unique on network */
 #define	ERESTART	116	/* Interrupted syscall should be restarted */
 #define	EUCLEAN		117	/* Structure needs cleaning */
+#define	EFSCORRUPTED	EUCLEAN	/* Filesystem is corrupted */
 #define	ENOTNAM		118	/* Not a XENIX named type file */
 #define	ENAVAIL		119	/* No XENIX semaphores available */
 #define	EISNAM		120	/* Is a named type file */
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index f7f622836198da..d06e99baf5d5ae 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -541,6 +541,4 @@ long erofs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
 long erofs_compat_ioctl(struct file *filp, unsigned int cmd,
 			unsigned long arg);
 
-#define EFSCORRUPTED    EUCLEAN         /* Filesystem is corrupted */
-
 #endif	/* __EROFS_INTERNAL_H */
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index cf97b76e9fd3e9..5e0c6c5fcb6cd6 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -357,7 +357,6 @@ struct ext2_inode {
  */
 #define	EXT2_VALID_FS			0x0001	/* Unmounted cleanly */
 #define	EXT2_ERROR_FS			0x0002	/* Errors detected */
-#define	EFSCORRUPTED			EUCLEAN	/* Filesystem is corrupted */
 
 /*
  * Mount flags
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 56112f201cace7..62c091b52bacdf 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3938,7 +3938,4 @@ extern int ext4_block_write_begin(handle_t *handle, struct folio *folio,
 				  get_block_t *get_block);
 #endif	/* __KERNEL__ */
 
-#define EFSBADCRC	EBADMSG		/* Bad CRC detected */
-#define EFSCORRUPTED	EUCLEAN		/* Filesystem is corrupted */
-
 #endif	/* _EXT4_H */
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 20edbb99b814a7..9f3aa3c7f12613 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -5004,7 +5004,4 @@ static inline void f2fs_invalidate_internal_cache(struct f2fs_sb_info *sbi,
 	f2fs_invalidate_compress_pages_range(sbi, blkaddr, len);
 }
 
-#define EFSBADCRC	EBADMSG		/* Bad CRC detected */
-#define EFSCORRUPTED	EUCLEAN		/* Filesystem is corrupted */
-
 #endif /* _LINUX_F2FS_H */
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 2bfaf377f2086c..7e1f652f16d311 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -175,6 +175,4 @@ static inline int minix_test_bit(int nr, const void *vaddr)
 	__minix_error_inode((inode), __func__, __LINE__,	\
 			    (fmt), ##__VA_ARGS__)
 
-#define EFSCORRUPTED	EUCLEAN		/* Filesystem is corrupted */
-
 #endif /* FS_MINIX_H */
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index 08ec8756b9487b..8399accc788dea 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -55,8 +55,6 @@
 #define MF_DUPLICATE_MD		0x01
 #define MF_MIRROR_FE_LOADED	0x02
 
-#define EFSCORRUPTED EUCLEAN
-
 struct udf_meta_data {
 	__u32	s_meta_file_loc;
 	__u32	s_mirror_file_loc;
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 4dd747bdbccab2..55064228c4d574 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -121,8 +121,6 @@ typedef __u32			xfs_nlink_t;
 
 #define ENOATTR		ENODATA		/* Attribute not found */
 #define EWRONGFS	EINVAL		/* Mount with wrong filesystem type */
-#define EFSCORRUPTED	EUCLEAN		/* Filesystem is corrupted */
-#define EFSBADCRC	EBADMSG		/* Bad CRC detected */
 
 #define __return_address __builtin_return_address(0)
 
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index f5eaf76198f377..a53a00d36228ce 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1815,7 +1815,4 @@ static inline int jbd2_handle_buffer_credits(handle_t *handle)
 
 #endif	/* __KERNEL__ */
 
-#define EFSBADCRC	EBADMSG		/* Bad CRC detected */
-#define EFSCORRUPTED	EUCLEAN		/* Filesystem is corrupted */
-
 #endif	/* _LINUX_JBD2_H */
diff --git a/include/uapi/asm-generic/errno.h b/include/uapi/asm-generic/errno.h
index cf9c51ac49f97e..92e7ae493ee315 100644
--- a/include/uapi/asm-generic/errno.h
+++ b/include/uapi/asm-generic/errno.h
@@ -55,6 +55,7 @@
 #define	EMULTIHOP	72	/* Multihop attempted */
 #define	EDOTDOT		73	/* RFS specific error */
 #define	EBADMSG		74	/* Not a data message */
+#define	EFSBADCRC	EBADMSG	/* Bad CRC detected */
 #define	EOVERFLOW	75	/* Value too large for defined data type */
 #define	ENOTUNIQ	76	/* Name not unique on network */
 #define	EBADFD		77	/* File descriptor in bad state */
@@ -98,6 +99,7 @@
 #define	EINPROGRESS	115	/* Operation now in progress */
 #define	ESTALE		116	/* Stale file handle */
 #define	EUCLEAN		117	/* Structure needs cleaning */
+#define	EFSCORRUPTED	EUCLEAN	/* Filesystem is corrupted */
 #define	ENOTNAM		118	/* Not a XENIX named type file */
 #define	ENAVAIL		119	/* No XENIX semaphores available */
 #define	EISNAM		120	/* Is a named type file */
diff --git a/tools/arch/alpha/include/uapi/asm/errno.h b/tools/arch/alpha/include/uapi/asm/errno.h
index 3d265f6babaf0a..6791f6508632ee 100644
--- a/tools/arch/alpha/include/uapi/asm/errno.h
+++ b/tools/arch/alpha/include/uapi/asm/errno.h
@@ -55,6 +55,7 @@
 #define	ENOSR		82	/* Out of streams resources */
 #define	ETIME		83	/* Timer expired */
 #define	EBADMSG		84	/* Not a data message */
+#define	EFSBADCRC	EBADMSG	/* Bad CRC detected */
 #define	EPROTO		85	/* Protocol error */
 #define	ENODATA		86	/* No data available */
 #define	ENOSTR		87	/* Device not a stream */
@@ -96,6 +97,7 @@
 #define	EREMCHG		115	/* Remote address changed */
 
 #define	EUCLEAN		117	/* Structure needs cleaning */
+#define	EFSCORRUPTED	EUCLEAN	/* Filesystem is corrupted */
 #define	ENOTNAM		118	/* Not a XENIX named type file */
 #define	ENAVAIL		119	/* No XENIX semaphores available */
 #define	EISNAM		120	/* Is a named type file */
diff --git a/tools/arch/mips/include/uapi/asm/errno.h b/tools/arch/mips/include/uapi/asm/errno.h
index 2fb714e2d6d8fc..c01ed91b1ef44b 100644
--- a/tools/arch/mips/include/uapi/asm/errno.h
+++ b/tools/arch/mips/include/uapi/asm/errno.h
@@ -50,6 +50,7 @@
 #define EDOTDOT		73	/* RFS specific error */
 #define EMULTIHOP	74	/* Multihop attempted */
 #define EBADMSG		77	/* Not a data message */
+#define EFSBADCRC	EBADMSG	/* Bad CRC detected */
 #define ENAMETOOLONG	78	/* File name too long */
 #define EOVERFLOW	79	/* Value too large for defined data type */
 #define ENOTUNIQ	80	/* Name not unique on network */
@@ -88,6 +89,7 @@
 #define EISCONN		133	/* Transport endpoint is already connected */
 #define ENOTCONN	134	/* Transport endpoint is not connected */
 #define EUCLEAN		135	/* Structure needs cleaning */
+#define EFSCORRUPTED	EUCLEAN	/* Filesystem is corrupted */
 #define ENOTNAM		137	/* Not a XENIX named type file */
 #define ENAVAIL		138	/* No XENIX semaphores available */
 #define EISNAM		139	/* Is a named type file */
diff --git a/tools/arch/parisc/include/uapi/asm/errno.h b/tools/arch/parisc/include/uapi/asm/errno.h
index 8d94739d75c67c..8cbc07c1903e4c 100644
--- a/tools/arch/parisc/include/uapi/asm/errno.h
+++ b/tools/arch/parisc/include/uapi/asm/errno.h
@@ -36,6 +36,7 @@
 
 #define	EDOTDOT		66	/* RFS specific error */
 #define	EBADMSG		67	/* Not a data message */
+#define	EFSBADCRC	EBADMSG	/* Bad CRC detected */
 #define	EUSERS		68	/* Too many users */
 #define	EDQUOT		69	/* Quota exceeded */
 #define	ESTALE		70	/* Stale file handle */
@@ -62,6 +63,7 @@
 #define	ERESTART	175	/* Interrupted system call should be restarted */
 #define	ESTRPIPE	176	/* Streams pipe error */
 #define	EUCLEAN		177	/* Structure needs cleaning */
+#define	EFSCORRUPTED	EUCLEAN	/* Filesystem is corrupted */
 #define	ENOTNAM		178	/* Not a XENIX named type file */
 #define	ENAVAIL		179	/* No XENIX semaphores available */
 #define	EISNAM		180	/* Is a named type file */
diff --git a/tools/arch/sparc/include/uapi/asm/errno.h b/tools/arch/sparc/include/uapi/asm/errno.h
index 81a732b902ee38..4a41e7835fd5b8 100644
--- a/tools/arch/sparc/include/uapi/asm/errno.h
+++ b/tools/arch/sparc/include/uapi/asm/errno.h
@@ -48,6 +48,7 @@
 #define	ENOSR		74	/* Out of streams resources */
 #define	ENOMSG		75	/* No message of desired type */
 #define	EBADMSG		76	/* Not a data message */
+#define	EFSBADCRC	EBADMSG	/* Bad CRC detected */
 #define	EIDRM		77	/* Identifier removed */
 #define	EDEADLK		78	/* Resource deadlock would occur */
 #define	ENOLCK		79	/* No record locks available */
@@ -91,6 +92,7 @@
 #define	ENOTUNIQ	115	/* Name not unique on network */
 #define	ERESTART	116	/* Interrupted syscall should be restarted */
 #define	EUCLEAN		117	/* Structure needs cleaning */
+#define	EFSCORRUPTED	EUCLEAN	/* Filesystem is corrupted */
 #define	ENOTNAM		118	/* Not a XENIX named type file */
 #define	ENAVAIL		119	/* No XENIX semaphores available */
 #define	EISNAM		120	/* Is a named type file */
diff --git a/tools/include/uapi/asm-generic/errno.h b/tools/include/uapi/asm-generic/errno.h
index cf9c51ac49f97e..92e7ae493ee315 100644
--- a/tools/include/uapi/asm-generic/errno.h
+++ b/tools/include/uapi/asm-generic/errno.h
@@ -55,6 +55,7 @@
 #define	EMULTIHOP	72	/* Multihop attempted */
 #define	EDOTDOT		73	/* RFS specific error */
 #define	EBADMSG		74	/* Not a data message */
+#define	EFSBADCRC	EBADMSG	/* Bad CRC detected */
 #define	EOVERFLOW	75	/* Value too large for defined data type */
 #define	ENOTUNIQ	76	/* Name not unique on network */
 #define	EBADFD		77	/* File descriptor in bad state */
@@ -98,6 +99,7 @@
 #define	EINPROGRESS	115	/* Operation now in progress */
 #define	ESTALE		116	/* Stale file handle */
 #define	EUCLEAN		117	/* Structure needs cleaning */
+#define	EFSCORRUPTED	EUCLEAN	/* Filesystem is corrupted */
 #define	ENOTNAM		118	/* Not a XENIX named type file */
 #define	ENAVAIL		119	/* No XENIX semaphores available */
 #define	EISNAM		120	/* Is a named type file */


^ permalink raw reply related

* [PATCHSET v5] fs: generic file IO error reporting
From: Darrick J. Wong @ 2026-01-13  0:31 UTC (permalink / raw)
  To: djwong, brauner
  Cc: linux-api, jack, hch, hsiangkao, linux-xfs, jack, linux-ext4,
	linux-fsdevel, gabriel, hch, amir73il

Hi all,

This patchset adds some generic helpers so that filesystems can report
errors to fsnotify in a standard way.  Then it adapts iomap to use the
generic helpers so that any iomap-enabled filesystem can report I/O
errors through this mechanism as well.  Finally, it makes XFS report
metadata errors through this mechanism in much the same way that ext4
does now.

These are a prerequisite for the XFS self-healing series which will
come at a later time.

v5: tidy comments, un-inline the unmount function
v4: drag out of RFC status, finalize the sign of errnos that we accept

If you're going to start using this code, I strongly recommend pulling
from my git trees, which are linked below.

This has been running on the djcloud for months with no problems.  Enjoy!
Comments and questions are, as always, welcome.

--D

kernel git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfs-linux.git/log/?h=filesystem-error-reporting

fstests git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfstests-dev.git/log/?h=filesystem-error-reporting
---
Commits in this patchset:
 * uapi: promote EFSCORRUPTED and EUCLEAN to errno.h
 * fs: report filesystem and file I/O errors to fsnotify
 * iomap: report file I/O errors to the VFS
 * xfs: report fs metadata errors via fsnotify
 * xfs: translate fsdax media errors into file "data lost" errors when convenient
 * ext4: convert to new fserror helpers
---
 arch/alpha/include/uapi/asm/errno.h        |    2 
 arch/mips/include/uapi/asm/errno.h         |    2 
 arch/parisc/include/uapi/asm/errno.h       |    2 
 arch/sparc/include/uapi/asm/errno.h        |    2 
 fs/erofs/internal.h                        |    2 
 fs/ext2/ext2.h                             |    1 
 fs/ext4/ext4.h                             |    3 
 fs/f2fs/f2fs.h                             |    3 
 fs/minix/minix.h                           |    2 
 fs/udf/udf_sb.h                            |    2 
 fs/xfs/xfs_linux.h                         |    2 
 include/linux/fs/super_types.h             |    7 +
 include/linux/fserror.h                    |   75 +++++++++++
 include/linux/jbd2.h                       |    3 
 include/uapi/asm-generic/errno.h           |    2 
 tools/arch/alpha/include/uapi/asm/errno.h  |    2 
 tools/arch/mips/include/uapi/asm/errno.h   |    2 
 tools/arch/parisc/include/uapi/asm/errno.h |    2 
 tools/arch/sparc/include/uapi/asm/errno.h  |    2 
 tools/include/uapi/asm-generic/errno.h     |    2 
 fs/Makefile                                |    2 
 fs/ext4/ioctl.c                            |    2 
 fs/ext4/super.c                            |   13 +-
 fs/fserror.c                               |  194 ++++++++++++++++++++++++++++
 fs/iomap/buffered-io.c                     |   23 +++
 fs/iomap/direct-io.c                       |   12 ++
 fs/iomap/ioend.c                           |    6 +
 fs/super.c                                 |    3 
 fs/xfs/xfs_fsops.c                         |    4 +
 fs/xfs/xfs_health.c                        |   14 ++
 fs/xfs/xfs_notify_failure.c                |    4 +
 31 files changed, 373 insertions(+), 24 deletions(-)
 create mode 100644 include/linux/fserror.h
 create mode 100644 fs/fserror.c


^ permalink raw reply

* [PATCH bpf-next v5 9/9] selftests/bpf: Add tests to verify map create failure log
From: Leon Hwang @ 2026-01-12 14:56 UTC (permalink / raw)
  To: bpf
  Cc: Alexei Starovoitov, Daniel Borkmann, John Fastabend,
	Andrii Nakryiko, Martin KaFai Lau, Eduard Zingerman, Song Liu,
	Yonghong Song, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Shuah Khan, Christian Brauner, Seth Forshee, Yuichiro Tsuji,
	Andrey Albershteyn, Leon Hwang, Willem de Bruijn, Jason Xing,
	Tao Chen, Mykyta Yatsenko, Kumar Kartikeya Dwivedi,
	Anton Protopopov, Amery Hung, Rong Tao, linux-kernel, linux-api,
	linux-kselftest, kernel-patches-bot
In-Reply-To: <20260112145616.44195-1-leon.hwang@linux.dev>

Add tests to verify that the kernel reports the expected error messages
when map creation fails.

Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 .../selftests/bpf/prog_tests/map_init.c       | 168 ++++++++++++++++++
 1 file changed, 168 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/map_init.c b/tools/testing/selftests/bpf/prog_tests/map_init.c
index 14a31109dd0e..824e2bea74bf 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_init.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_init.c
@@ -212,3 +212,171 @@ void test_map_init(void)
 	if (test__start_subtest("pcpu_lru_map_init"))
 		test_pcpu_lru_map_init();
 }
+
+#define BPF_LOG_FIXED	8
+
+static void test_map_create(enum bpf_map_type map_type, const char *map_name,
+			    struct bpf_map_create_opts *opts, const char *exp_msg)
+{
+	const int key_size = 4, value_size = 4, max_entries = 1;
+	char log_buf[128];
+	int fd;
+	LIBBPF_OPTS(bpf_syscall_common_attr_opts, copts);
+
+	log_buf[0] = '\0';
+	copts.log_buf = log_buf;
+	copts.log_size = sizeof(log_buf);
+	copts.log_level = BPF_LOG_FIXED;
+	opts->common_attr_opts = &copts;
+	fd = bpf_map_create(map_type, map_name, key_size, value_size, max_entries, opts);
+	if (!ASSERT_LT(fd, 0, "bpf_map_create")) {
+		close(fd);
+		return;
+	}
+
+	ASSERT_STREQ(log_buf, exp_msg, "log_buf");
+	ASSERT_EQ(copts.log_true_size, strlen(exp_msg) + 1, "log_true_size");
+}
+
+static void test_map_create_array(struct bpf_map_create_opts *opts, const char *exp_msg)
+{
+	test_map_create(BPF_MAP_TYPE_ARRAY, "test_map_create", opts, exp_msg);
+}
+
+static void test_invalid_vmlinux_value_type_id_struct_ops(void)
+{
+	const char *msg = "btf_vmlinux_value_type_id can only be used with struct_ops maps.\n";
+	LIBBPF_OPTS(bpf_map_create_opts, opts,
+		    .btf_vmlinux_value_type_id = 1,
+	);
+
+	test_map_create_array(&opts, msg);
+}
+
+static void test_invalid_vmlinux_value_type_id_kv_type_id(void)
+{
+	const char *msg = "btf_vmlinux_value_type_id is mutually exclusive with btf_key_type_id and btf_value_type_id.\n";
+	LIBBPF_OPTS(bpf_map_create_opts, opts,
+		    .btf_vmlinux_value_type_id = 1,
+		    .btf_key_type_id = 1,
+	);
+
+	test_map_create(BPF_MAP_TYPE_STRUCT_OPS, "test_map_create", &opts, msg);
+}
+
+static void test_invalid_value_type_id(void)
+{
+	const char *msg = "Invalid btf_value_type_id.\n";
+	LIBBPF_OPTS(bpf_map_create_opts, opts,
+		    .btf_key_type_id = 1,
+	);
+
+	test_map_create_array(&opts, msg);
+}
+
+static void test_invalid_map_extra(void)
+{
+	const char *msg = "Invalid map_extra.\n";
+	LIBBPF_OPTS(bpf_map_create_opts, opts,
+		    .map_extra = 1,
+	);
+
+	test_map_create_array(&opts, msg);
+}
+
+static void test_invalid_numa_node(void)
+{
+	const char *msg = "Invalid numa_node.\n";
+	LIBBPF_OPTS(bpf_map_create_opts, opts,
+		    .map_flags = BPF_F_NUMA_NODE,
+		    .numa_node = 0xFF,
+	);
+
+	test_map_create_array(&opts, msg);
+}
+
+static void test_invalid_map_type(void)
+{
+	const char *msg = "Invalid map_type.\n";
+	LIBBPF_OPTS(bpf_map_create_opts, opts);
+
+	test_map_create(__MAX_BPF_MAP_TYPE, "test_map_create", &opts, msg);
+}
+
+static void test_invalid_token_fd(void)
+{
+	const char *msg = "Invalid map_token_fd.\n";
+	LIBBPF_OPTS(bpf_map_create_opts, opts,
+		    .map_flags = BPF_F_TOKEN_FD,
+		    .token_fd = 0xFF,
+	);
+
+	test_map_create_array(&opts, msg);
+}
+
+static void test_invalid_map_name(void)
+{
+	const char *msg = "Invalid map_name.\n";
+	LIBBPF_OPTS(bpf_map_create_opts, opts);
+
+	test_map_create(BPF_MAP_TYPE_ARRAY, "test-!@#", &opts, msg);
+}
+
+static void test_invalid_btf_fd(void)
+{
+	const char *msg = "Invalid btf_fd.\n";
+	LIBBPF_OPTS(bpf_map_create_opts, opts,
+		    .btf_fd = -1,
+		    .btf_key_type_id = 1,
+		    .btf_value_type_id = 1,
+	);
+
+	test_map_create_array(&opts, msg);
+}
+
+static void test_excl_prog_hash_size_1(void)
+{
+	const char *msg = "Invalid excl_prog_hash_size.\n";
+	const char *hash = "DEADCODE";
+	LIBBPF_OPTS(bpf_map_create_opts, opts,
+		    .excl_prog_hash = hash,
+	);
+
+	test_map_create_array(&opts, msg);
+}
+
+static void test_excl_prog_hash_size_2(void)
+{
+	const char *msg = "Invalid excl_prog_hash_size.\n";
+	LIBBPF_OPTS(bpf_map_create_opts, opts,
+		    .excl_prog_hash_size = 1,
+	);
+
+	test_map_create_array(&opts, msg);
+}
+
+void test_map_create_failure(void)
+{
+	if (test__start_subtest("invalid_vmlinux_value_type_id_struct_ops"))
+		test_invalid_vmlinux_value_type_id_struct_ops();
+	if (test__start_subtest("invalid_vmlinux_value_type_id_kv_type_id"))
+		test_invalid_vmlinux_value_type_id_kv_type_id();
+	if (test__start_subtest("invalid_value_type_id"))
+		test_invalid_value_type_id();
+	if (test__start_subtest("invalid_map_extra"))
+		test_invalid_map_extra();
+	if (test__start_subtest("invalid_numa_node"))
+		test_invalid_numa_node();
+	if (test__start_subtest("invalid_map_type"))
+		test_invalid_map_type();
+	if (test__start_subtest("invalid_token_fd"))
+		test_invalid_token_fd();
+	if (test__start_subtest("invalid_map_name"))
+		test_invalid_map_name();
+	if (test__start_subtest("invalid_btf_fd"))
+		test_invalid_btf_fd();
+	if (test__start_subtest("invalid_excl_prog_hash_size_1"))
+		test_excl_prog_hash_size_1();
+	if (test__start_subtest("invalid_excl_prog_hash_size_2"))
+		test_excl_prog_hash_size_2();
+}
-- 
2.52.0


^ permalink raw reply related

* [PATCH bpf-next v5 8/9] libbpf: Add common attr support for map_create
From: Leon Hwang @ 2026-01-12 14:56 UTC (permalink / raw)
  To: bpf
  Cc: Alexei Starovoitov, Daniel Borkmann, John Fastabend,
	Andrii Nakryiko, Martin KaFai Lau, Eduard Zingerman, Song Liu,
	Yonghong Song, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Shuah Khan, Christian Brauner, Seth Forshee, Yuichiro Tsuji,
	Andrey Albershteyn, Leon Hwang, Willem de Bruijn, Jason Xing,
	Tao Chen, Mykyta Yatsenko, Kumar Kartikeya Dwivedi,
	Anton Protopopov, Amery Hung, Rong Tao, linux-kernel, linux-api,
	linux-kselftest, kernel-patches-bot
In-Reply-To: <20260112145616.44195-1-leon.hwang@linux.dev>

With the previous commit adding common attribute support for
BPF_MAP_CREATE, users can now retrieve detailed error messages when map
creation fails via the log_buf field.

Introduce struct bpf_syscall_common_attr_opts with the following fields:
log_buf, log_size, log_level, and log_true_size.

Extend bpf_map_create_opts with a new field common_attr_opts, allowing
users to capture and inspect log messages on map creation failures.

Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 tools/lib/bpf/bpf.c | 15 ++++++++++++++-
 tools/lib/bpf/bpf.h | 17 ++++++++++++++++-
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index d44e667aaf02..d65df1b7b2be 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -207,6 +207,9 @@ int bpf_map_create(enum bpf_map_type map_type,
 		   const struct bpf_map_create_opts *opts)
 {
 	const size_t attr_sz = offsetofend(union bpf_attr, excl_prog_hash_size);
+	const size_t common_attr_sz = sizeof(struct bpf_common_attr);
+	struct bpf_syscall_common_attr_opts *common_attr_opts;
+	struct bpf_common_attr common_attr;
 	union bpf_attr attr;
 	int fd;
 
@@ -240,7 +243,17 @@ int bpf_map_create(enum bpf_map_type map_type,
 	attr.excl_prog_hash = ptr_to_u64(OPTS_GET(opts, excl_prog_hash, NULL));
 	attr.excl_prog_hash_size = OPTS_GET(opts, excl_prog_hash_size, 0);
 
-	fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz);
+	common_attr_opts = OPTS_GET(opts, common_attr_opts, NULL);
+	if (common_attr_opts && feat_supported(NULL, FEAT_EXTENDED_SYSCALL)) {
+		memset(&common_attr, 0, common_attr_sz);
+		common_attr.log_buf = ptr_to_u64(OPTS_GET(common_attr_opts, log_buf, NULL));
+		common_attr.log_size = OPTS_GET(common_attr_opts, log_size, 0);
+		common_attr.log_level = OPTS_GET(common_attr_opts, log_level, 0);
+		fd = sys_bpf_ext_fd(BPF_MAP_CREATE, &attr, attr_sz, &common_attr, common_attr_sz);
+		OPTS_SET(common_attr_opts, log_true_size, common_attr.log_true_size);
+	} else {
+		fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz);
+	}
 	return libbpf_err_errno(fd);
 }
 
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 2c8e88ddb674..c4a26e6b71ea 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -37,6 +37,18 @@ extern "C" {
 
 LIBBPF_API int libbpf_set_memlock_rlim(size_t memlock_bytes);
 
+struct bpf_syscall_common_attr_opts {
+	size_t sz; /* size of this struct for forward/backward compatibility */
+
+	char *log_buf;
+	__u32 log_size;
+	__u32 log_level;
+	__u32 log_true_size;
+
+	size_t :0;
+};
+#define bpf_syscall_common_attr_opts__last_field log_true_size
+
 struct bpf_map_create_opts {
 	size_t sz; /* size of this struct for forward/backward compatibility */
 
@@ -57,9 +69,12 @@ struct bpf_map_create_opts {
 
 	const void *excl_prog_hash;
 	__u32 excl_prog_hash_size;
+
+	struct bpf_syscall_common_attr_opts *common_attr_opts;
+
 	size_t :0;
 };
-#define bpf_map_create_opts__last_field excl_prog_hash_size
+#define bpf_map_create_opts__last_field common_attr_opts
 
 LIBBPF_API int bpf_map_create(enum bpf_map_type map_type,
 			      const char *map_name,
-- 
2.52.0


^ permalink raw reply related

* [PATCH bpf-next v5 7/9] bpf: Add syscall common attributes support for map_create
From: Leon Hwang @ 2026-01-12 14:56 UTC (permalink / raw)
  To: bpf
  Cc: Alexei Starovoitov, Daniel Borkmann, John Fastabend,
	Andrii Nakryiko, Martin KaFai Lau, Eduard Zingerman, Song Liu,
	Yonghong Song, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Shuah Khan, Christian Brauner, Seth Forshee, Yuichiro Tsuji,
	Andrey Albershteyn, Leon Hwang, Willem de Bruijn, Jason Xing,
	Tao Chen, Mykyta Yatsenko, Kumar Kartikeya Dwivedi,
	Anton Protopopov, Amery Hung, Rong Tao, linux-kernel, linux-api,
	linux-kselftest, kernel-patches-bot
In-Reply-To: <20260112145616.44195-1-leon.hwang@linux.dev>

Currently, many BPF_MAP_CREATE failures return -EINVAL without providing
any explanation to userspace.

With extended BPF syscall support, detailed error messages can now be
reported via the log buffer, allowing users to understand the specific
reason for a failed map creation.

Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 include/linux/bpf_verifier.h |  2 ++
 kernel/bpf/log.c             | 30 +++++++++++++++++
 kernel/bpf/syscall.c         | 65 ++++++++++++++++++++++++++++++------
 3 files changed, 87 insertions(+), 10 deletions(-)

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 9022e4f515f9..280beca480ea 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -644,6 +644,8 @@ int bpf_prog_load_log_attr_init(struct bpf_log_attr *log_attr, struct bpf_attrs
 				struct bpf_attrs *attrs_common);
 int bpf_btf_load_log_attr_init(struct bpf_log_attr *log_attr, struct bpf_attrs *attrs,
 			       struct bpf_attrs *attrs_common);
+struct bpf_verifier_log *bpf_log_attr_create_vlog(struct bpf_log_attr *log_attr,
+						  struct bpf_attrs *attrs_common);
 int bpf_log_attr_finalize(struct bpf_log_attr *log_attr, struct bpf_verifier_log *log);
 
 #define BPF_MAX_SUBPROGS 256
diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
index 0dba014ca055..6586d752970f 100644
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -912,6 +912,36 @@ int bpf_btf_load_log_attr_init(struct bpf_log_attr *log_attr, struct bpf_attrs *
 				 attrs_common);
 }
 
+struct bpf_verifier_log *bpf_log_attr_create_vlog(struct bpf_log_attr *log_attr,
+						  struct bpf_attrs *attrs_common)
+{
+	const struct bpf_common_attr *common_attr = attrs_common->attr;
+	struct bpf_verifier_log *log;
+	int err;
+
+	memset(log_attr, 0, sizeof(*log_attr));
+	log_attr->log_buf = common_attr->log_buf;
+	log_attr->log_size = common_attr->log_size;
+	log_attr->log_level = common_attr->log_level;
+	log_attr->attrs_common = attrs_common;
+
+	if (!log_attr->log_buf)
+		return NULL;
+
+	log = kzalloc(sizeof(*log), GFP_KERNEL);
+	if (!log)
+		return ERR_PTR(-ENOMEM);
+
+	err = bpf_vlog_init(log, log_attr->log_level, u64_to_user_ptr(log_attr->log_buf),
+			    log_attr->log_size);
+	if (err) {
+		kfree(log);
+		return ERR_PTR(err);
+	}
+
+	return log;
+}
+
 int bpf_log_attr_finalize(struct bpf_log_attr *log_attr, struct bpf_verifier_log *log)
 {
 	u32 log_true_size, off;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index d0440e640e40..52e1ab142da9 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1370,7 +1370,7 @@ static bool bpf_net_capable(void)
 
 #define BPF_MAP_CREATE_LAST_FIELD excl_prog_hash_size
 /* called via syscall */
-static int map_create(union bpf_attr *attr, bpfptr_t uattr)
+static int __map_create(union bpf_attr *attr, bpfptr_t uattr, struct bpf_verifier_log *log)
 {
 	const struct bpf_map_ops *ops;
 	struct bpf_token *token = NULL;
@@ -1382,8 +1382,10 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr)
 	int err;
 
 	err = CHECK_ATTR(BPF_MAP_CREATE);
-	if (err)
+	if (err) {
+		bpf_log(log, "Invalid attr.\n");
 		return -EINVAL;
+	}
 
 	/* check BPF_F_TOKEN_FD flag, remember if it's set, and then clear it
 	 * to avoid per-map type checks tripping on unknown flag
@@ -1392,17 +1394,25 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr)
 	attr->map_flags &= ~BPF_F_TOKEN_FD;
 
 	if (attr->btf_vmlinux_value_type_id) {
-		if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS ||
-		    attr->btf_key_type_id || attr->btf_value_type_id)
+		if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS) {
+			bpf_log(log, "btf_vmlinux_value_type_id can only be used with struct_ops maps.\n");
 			return -EINVAL;
+		}
+		if (attr->btf_key_type_id || attr->btf_value_type_id) {
+			bpf_log(log, "btf_vmlinux_value_type_id is mutually exclusive with btf_key_type_id and btf_value_type_id.\n");
+			return -EINVAL;
+		}
 	} else if (attr->btf_key_type_id && !attr->btf_value_type_id) {
+		bpf_log(log, "Invalid btf_value_type_id.\n");
 		return -EINVAL;
 	}
 
 	if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER &&
 	    attr->map_type != BPF_MAP_TYPE_ARENA &&
-	    attr->map_extra != 0)
+	    attr->map_extra != 0) {
+		bpf_log(log, "Invalid map_extra.\n");
 		return -EINVAL;
+	}
 
 	f_flags = bpf_get_file_flag(attr->map_flags);
 	if (f_flags < 0)
@@ -1410,13 +1420,17 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr)
 
 	if (numa_node != NUMA_NO_NODE &&
 	    ((unsigned int)numa_node >= nr_node_ids ||
-	     !node_online(numa_node)))
+	     !node_online(numa_node))) {
+		bpf_log(log, "Invalid numa_node.\n");
 		return -EINVAL;
+	}
 
 	/* find map type and init map: hashtable vs rbtree vs bloom vs ... */
 	map_type = attr->map_type;
-	if (map_type >= ARRAY_SIZE(bpf_map_types))
+	if (map_type >= ARRAY_SIZE(bpf_map_types)) {
+		bpf_log(log, "Invalid map_type.\n");
 		return -EINVAL;
+	}
 	map_type = array_index_nospec(map_type, ARRAY_SIZE(bpf_map_types));
 	ops = bpf_map_types[map_type];
 	if (!ops)
@@ -1434,8 +1448,10 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr)
 
 	if (token_flag) {
 		token = bpf_token_get_from_fd(attr->map_token_fd);
-		if (IS_ERR(token))
+		if (IS_ERR(token)) {
+			bpf_log(log, "Invalid map_token_fd.\n");
 			return PTR_ERR(token);
+		}
 
 		/* if current token doesn't grant map creation permissions,
 		 * then we can't use this token, so ignore it and rely on
@@ -1518,8 +1534,10 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr)
 
 	err = bpf_obj_name_cpy(map->name, attr->map_name,
 			       sizeof(attr->map_name));
-	if (err < 0)
+	if (err < 0) {
+		bpf_log(log, "Invalid map_name.\n");
 		goto free_map;
+	}
 
 	preempt_disable();
 	map->cookie = gen_cookie_next(&bpf_map_cookie);
@@ -1542,6 +1560,7 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr)
 
 		btf = btf_get_by_fd(attr->btf_fd);
 		if (IS_ERR(btf)) {
+			bpf_log(log, "Invalid btf_fd.\n");
 			err = PTR_ERR(btf);
 			goto free_map;
 		}
@@ -1569,6 +1588,7 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr)
 		bpfptr_t uprog_hash = make_bpfptr(attr->excl_prog_hash, uattr.is_kernel);
 
 		if (attr->excl_prog_hash_size != SHA256_DIGEST_SIZE) {
+			bpf_log(log, "Invalid excl_prog_hash_size.\n");
 			err = -EINVAL;
 			goto free_map;
 		}
@@ -1584,6 +1604,7 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr)
 			goto free_map;
 		}
 	} else if (attr->excl_prog_hash_size) {
+		bpf_log(log, "Invalid excl_prog_hash_size.\n");
 		err = -EINVAL;
 		goto free_map;
 	}
@@ -1622,6 +1643,29 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr)
 	return err;
 }
 
+static int map_create(union bpf_attr *attr, bpfptr_t uattr, struct bpf_attrs *common_attrs)
+{
+	struct bpf_verifier_log *log;
+	struct bpf_log_attr log_attr;
+	int err, ret;
+
+	log = bpf_log_attr_create_vlog(&log_attr, common_attrs);
+	if (IS_ERR(log))
+		return PTR_ERR(log);
+
+	err = __map_create(attr, uattr, log);
+	if (err >= 0)
+		goto free;
+
+	ret = bpf_log_attr_finalize(&log_attr, log);
+	if (ret)
+		err = ret;
+
+free:
+	kfree(log);
+	return err;
+}
+
 void bpf_map_inc(struct bpf_map *map)
 {
 	atomic64_inc(&map->refcnt);
@@ -6218,7 +6262,8 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
 
 	switch (cmd) {
 	case BPF_MAP_CREATE:
-		err = map_create(&attr, uattr);
+		bpf_attrs_init(&common_attrs, &common_attr, uattr_common, size_common);
+		err = map_create(&attr, uattr, &common_attrs);
 		break;
 	case BPF_MAP_LOOKUP_ELEM:
 		err = map_lookup_elem(&attr);
-- 
2.52.0


^ permalink raw reply related

* [PATCH bpf-next v5 6/9] bpf: Add syscall common attributes support for btf_load
From: Leon Hwang @ 2026-01-12 14:56 UTC (permalink / raw)
  To: bpf
  Cc: Alexei Starovoitov, Daniel Borkmann, John Fastabend,
	Andrii Nakryiko, Martin KaFai Lau, Eduard Zingerman, Song Liu,
	Yonghong Song, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Shuah Khan, Christian Brauner, Seth Forshee, Yuichiro Tsuji,
	Andrey Albershteyn, Leon Hwang, Willem de Bruijn, Jason Xing,
	Tao Chen, Mykyta Yatsenko, Kumar Kartikeya Dwivedi,
	Anton Protopopov, Amery Hung, Rong Tao, linux-kernel, linux-api,
	linux-kselftest, kernel-patches-bot
In-Reply-To: <20260112145616.44195-1-leon.hwang@linux.dev>

Since bpf_log_attr_init() now supports struct bpf_common_attr, pass the
common attributes to it to enable syscall common attributes support for
BPF_BTF_LOAD.

Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 include/linux/bpf_verifier.h | 3 ++-
 kernel/bpf/log.c             | 5 +++--
 kernel/bpf/syscall.c         | 8 +++++---
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 44dd60de1966..9022e4f515f9 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -642,7 +642,8 @@ struct bpf_log_attr {
 
 int bpf_prog_load_log_attr_init(struct bpf_log_attr *log_attr, struct bpf_attrs *attrs,
 				struct bpf_attrs *attrs_common);
-int bpf_btf_load_log_attr_init(struct bpf_log_attr *log_attr, struct bpf_attrs *attrs);
+int bpf_btf_load_log_attr_init(struct bpf_log_attr *log_attr, struct bpf_attrs *attrs,
+			       struct bpf_attrs *attrs_common);
 int bpf_log_attr_finalize(struct bpf_log_attr *log_attr, struct bpf_verifier_log *log);
 
 #define BPF_MAX_SUBPROGS 256
diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
index 96df089a2c89..0dba014ca055 100644
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -902,13 +902,14 @@ int bpf_prog_load_log_attr_init(struct bpf_log_attr *log_attr, struct bpf_attrs
 				 offsetof(union bpf_attr, log_true_size), attrs_common);
 }
 
-int bpf_btf_load_log_attr_init(struct bpf_log_attr *log_attr, struct bpf_attrs *attrs)
+int bpf_btf_load_log_attr_init(struct bpf_log_attr *log_attr, struct bpf_attrs *attrs,
+			       struct bpf_attrs *attrs_common)
 {
 	const union bpf_attr *attr = attrs->attr;
 
 	return bpf_log_attr_init(log_attr, attrs, attr->btf_log_buf, attr->btf_log_size,
 				 attr->btf_log_level, offsetof(union bpf_attr, btf_log_true_size),
-				 NULL);
+				 attrs_common);
 }
 
 int bpf_log_attr_finalize(struct bpf_log_attr *log_attr, struct bpf_verifier_log *log)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 8382fafc8d17..d0440e640e40 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -5433,7 +5433,8 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
 
 #define BPF_BTF_LOAD_LAST_FIELD btf_token_fd
 
-static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
+static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size,
+			struct bpf_attrs *common_attrs)
 {
 	struct bpf_token *token = NULL;
 	struct bpf_log_attr log_attr;
@@ -5447,7 +5448,7 @@ static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_
 		return -EINVAL;
 
 	bpf_attrs_init(&attrs, attr, uattr, uattr_size);
-	err = bpf_btf_load_log_attr_init(&log_attr, &attrs);
+	err = bpf_btf_load_log_attr_init(&log_attr, &attrs, common_attrs);
 	if (err)
 		return err;
 
@@ -6281,7 +6282,8 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
 		err = bpf_raw_tracepoint_open(&attr);
 		break;
 	case BPF_BTF_LOAD:
-		err = bpf_btf_load(&attr, uattr, size);
+		bpf_attrs_init(&common_attrs, &common_attr, uattr_common, size_common);
+		err = bpf_btf_load(&attr, uattr, size, &common_attrs);
 		break;
 	case BPF_BTF_GET_FD_BY_ID:
 		err = bpf_btf_get_fd_by_id(&attr);
-- 
2.52.0


^ permalink raw reply related

* [PATCH bpf-next v5 5/9] bpf: Refactor reporting btf_log_true_size for btf_load
From: Leon Hwang @ 2026-01-12 14:56 UTC (permalink / raw)
  To: bpf
  Cc: Alexei Starovoitov, Daniel Borkmann, John Fastabend,
	Andrii Nakryiko, Martin KaFai Lau, Eduard Zingerman, Song Liu,
	Yonghong Song, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Shuah Khan, Christian Brauner, Seth Forshee, Yuichiro Tsuji,
	Andrey Albershteyn, Leon Hwang, Willem de Bruijn, Jason Xing,
	Tao Chen, Mykyta Yatsenko, Kumar Kartikeya Dwivedi,
	Anton Protopopov, Amery Hung, Rong Tao, linux-kernel, linux-api,
	linux-kselftest, kernel-patches-bot
In-Reply-To: <20260112145616.44195-1-leon.hwang@linux.dev>

In the next commit, it will be able to report logs via extended common
attributes, which will report 'log_true_size' via the extended common
attributes meanwhile.

Therefore, refactor the way of 'btf_log_true_size' reporting in order to
report 'log_true_size' via the extended common attributes easily.

Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 include/linux/bpf_verifier.h |  1 +
 include/linux/btf.h          |  3 ++-
 kernel/bpf/btf.c             | 32 +++++++++-----------------------
 kernel/bpf/log.c             |  9 +++++++++
 kernel/bpf/syscall.c         | 10 +++++++++-
 5 files changed, 30 insertions(+), 25 deletions(-)

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index da2d37ca60e7..44dd60de1966 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -642,6 +642,7 @@ struct bpf_log_attr {
 
 int bpf_prog_load_log_attr_init(struct bpf_log_attr *log_attr, struct bpf_attrs *attrs,
 				struct bpf_attrs *attrs_common);
+int bpf_btf_load_log_attr_init(struct bpf_log_attr *log_attr, struct bpf_attrs *attrs);
 int bpf_log_attr_finalize(struct bpf_log_attr *log_attr, struct bpf_verifier_log *log);
 
 #define BPF_MAX_SUBPROGS 256
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 691f09784933..df04843a4635 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -144,7 +144,8 @@ const char *btf_get_name(const struct btf *btf);
 void btf_get(struct btf *btf);
 void btf_put(struct btf *btf);
 const struct btf_header *btf_header(const struct btf *btf);
-int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_sz);
+struct bpf_log_attr;
+int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_log_attr *log_attr);
 struct btf *btf_get_by_fd(int fd);
 int btf_get_info_by_fd(const struct btf *btf,
 		       const union bpf_attr *attr,
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 539c9fdea41d..f9973f12a482 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -5745,25 +5745,11 @@ static int btf_check_type_tags(struct btf_verifier_env *env,
 	return 0;
 }
 
-static int finalize_log(struct bpf_verifier_log *log, bpfptr_t uattr, u32 uattr_size)
-{
-	u32 log_true_size;
-	int err;
-
-	err = bpf_vlog_finalize(log, &log_true_size);
-
-	if (uattr_size >= offsetofend(union bpf_attr, btf_log_true_size) &&
-	    copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, btf_log_true_size),
-				  &log_true_size, sizeof(log_true_size)))
-		err = -EFAULT;
-
-	return err;
-}
-
-static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
+static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr,
+			     struct bpf_log_attr *log_attr)
 {
 	bpfptr_t btf_data = make_bpfptr(attr->btf, uattr.is_kernel);
-	char __user *log_ubuf = u64_to_user_ptr(attr->btf_log_buf);
+	char __user *log_ubuf = u64_to_user_ptr(log_attr->log_buf);
 	struct btf_struct_metas *struct_meta_tab;
 	struct btf_verifier_env *env = NULL;
 	struct btf *btf = NULL;
@@ -5780,8 +5766,8 @@ static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uat
 	/* user could have requested verbose verifier output
 	 * and supplied buffer to store the verification trace
 	 */
-	err = bpf_vlog_init(&env->log, attr->btf_log_level,
-			    log_ubuf, attr->btf_log_size);
+	err = bpf_vlog_init(&env->log, log_attr->log_level,
+			    log_ubuf, log_attr->log_size);
 	if (err)
 		goto errout_free;
 
@@ -5841,7 +5827,7 @@ static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uat
 		}
 	}
 
-	err = finalize_log(&env->log, uattr, uattr_size);
+	err = bpf_log_attr_finalize(log_attr, &env->log);
 	if (err)
 		goto errout_free;
 
@@ -5853,7 +5839,7 @@ static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uat
 	btf_free_struct_meta_tab(btf);
 errout:
 	/* overwrite err with -ENOSPC or -EFAULT */
-	ret = finalize_log(&env->log, uattr, uattr_size);
+	ret = bpf_log_attr_finalize(log_attr, &env->log);
 	if (ret)
 		err = ret;
 errout_free:
@@ -8017,12 +8003,12 @@ static int __btf_new_fd(struct btf *btf)
 	return anon_inode_getfd("btf", &btf_fops, btf, O_RDONLY | O_CLOEXEC);
 }
 
-int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
+int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_log_attr *log_attr)
 {
 	struct btf *btf;
 	int ret;
 
-	btf = btf_parse(attr, uattr, uattr_size);
+	btf = btf_parse(attr, uattr, log_attr);
 	if (IS_ERR(btf))
 		return PTR_ERR(btf);
 
diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
index eba60a13e244..96df089a2c89 100644
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -902,6 +902,15 @@ int bpf_prog_load_log_attr_init(struct bpf_log_attr *log_attr, struct bpf_attrs
 				 offsetof(union bpf_attr, log_true_size), attrs_common);
 }
 
+int bpf_btf_load_log_attr_init(struct bpf_log_attr *log_attr, struct bpf_attrs *attrs)
+{
+	const union bpf_attr *attr = attrs->attr;
+
+	return bpf_log_attr_init(log_attr, attrs, attr->btf_log_buf, attr->btf_log_size,
+				 attr->btf_log_level, offsetof(union bpf_attr, btf_log_true_size),
+				 NULL);
+}
+
 int bpf_log_attr_finalize(struct bpf_log_attr *log_attr, struct bpf_verifier_log *log)
 {
 	u32 log_true_size, off;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index f369b9ec9d60..8382fafc8d17 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -5436,6 +5436,9 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
 static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
 {
 	struct bpf_token *token = NULL;
+	struct bpf_log_attr log_attr;
+	struct bpf_attrs attrs;
+	int err;
 
 	if (CHECK_ATTR(BPF_BTF_LOAD))
 		return -EINVAL;
@@ -5443,6 +5446,11 @@ static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_
 	if (attr->btf_flags & ~BPF_F_TOKEN_FD)
 		return -EINVAL;
 
+	bpf_attrs_init(&attrs, attr, uattr, uattr_size);
+	err = bpf_btf_load_log_attr_init(&log_attr, &attrs);
+	if (err)
+		return err;
+
 	if (attr->btf_flags & BPF_F_TOKEN_FD) {
 		token = bpf_token_get_from_fd(attr->btf_token_fd);
 		if (IS_ERR(token))
@@ -5460,7 +5468,7 @@ static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_
 
 	bpf_token_put(token);
 
-	return btf_new_fd(attr, uattr, uattr_size);
+	return btf_new_fd(attr, uattr, &log_attr);
 }
 
 #define BPF_BTF_GET_FD_BY_ID_LAST_FIELD fd_by_id_token_fd
-- 
2.52.0


^ permalink raw reply related

* [PATCH bpf-next v5 4/9] bpf: Add syscall common attributes support for prog_load
From: Leon Hwang @ 2026-01-12 14:56 UTC (permalink / raw)
  To: bpf
  Cc: Alexei Starovoitov, Daniel Borkmann, John Fastabend,
	Andrii Nakryiko, Martin KaFai Lau, Eduard Zingerman, Song Liu,
	Yonghong Song, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Shuah Khan, Christian Brauner, Seth Forshee, Yuichiro Tsuji,
	Andrey Albershteyn, Leon Hwang, Willem de Bruijn, Jason Xing,
	Tao Chen, Mykyta Yatsenko, Kumar Kartikeya Dwivedi,
	Anton Protopopov, Amery Hung, Rong Tao, linux-kernel, linux-api,
	linux-kselftest, kernel-patches-bot
In-Reply-To: <20260112145616.44195-1-leon.hwang@linux.dev>

The log buffer of common attributes would be confusing with the one in
'union bpf_attr' for BPF_PROG_LOAD.

In order to clarify the usage of these two log buffers, they both can be
used for logging if:

* They are same, including 'log_buf', 'log_level' and 'log_size'.
* One of them is missing, then another one will be used for logging.

If they both have 'log_buf' but they are not same totally, return -EUSERS.

Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 include/linux/bpf_verifier.h |  4 +++-
 kernel/bpf/log.c             | 29 ++++++++++++++++++++++++++---
 kernel/bpf/syscall.c         |  9 ++++++---
 3 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 4c9632c40059..da2d37ca60e7 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -637,9 +637,11 @@ struct bpf_log_attr {
 	u32 log_level;
 	struct bpf_attrs *attrs;
 	u32 offsetof_log_true_size;
+	struct bpf_attrs *attrs_common;
 };
 
-int bpf_prog_load_log_attr_init(struct bpf_log_attr *log_attr, struct bpf_attrs *attrs);
+int bpf_prog_load_log_attr_init(struct bpf_log_attr *log_attr, struct bpf_attrs *attrs,
+				struct bpf_attrs *attrs_common);
 int bpf_log_attr_finalize(struct bpf_log_attr *log_attr, struct bpf_verifier_log *log);
 
 #define BPF_MAX_SUBPROGS 256
diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
index 457b724c4176..eba60a13e244 100644
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -865,23 +865,41 @@ void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_st
 }
 
 static int bpf_log_attr_init(struct bpf_log_attr *log_attr, struct bpf_attrs *attrs, u64 log_buf,
-			     u32 log_size, u32 log_level, int offsetof_log_true_size)
+			     u32 log_size, u32 log_level, int offsetof_log_true_size,
+			     struct bpf_attrs *attrs_common)
 {
+	const struct bpf_common_attr *common_attr = attrs_common ? attrs_common->attr : NULL;
+
 	memset(log_attr, 0, sizeof(*log_attr));
 	log_attr->log_buf = log_buf;
 	log_attr->log_size = log_size;
 	log_attr->log_level = log_level;
 	log_attr->attrs = attrs;
 	log_attr->offsetof_log_true_size = offsetof_log_true_size;
+	log_attr->attrs_common = attrs_common;
+
+	if (log_buf && common_attr && common_attr->log_buf &&
+		(log_buf != common_attr->log_buf ||
+		 log_size != common_attr->log_size ||
+		 log_level != common_attr->log_level))
+		return -EUSERS;
+
+	if (!log_buf && common_attr && common_attr->log_buf) {
+		log_attr->log_buf = common_attr->log_buf;
+		log_attr->log_size = common_attr->log_size;
+		log_attr->log_level = common_attr->log_level;
+	}
+
 	return 0;
 }
 
-int bpf_prog_load_log_attr_init(struct bpf_log_attr *log_attr, struct bpf_attrs *attrs)
+int bpf_prog_load_log_attr_init(struct bpf_log_attr *log_attr, struct bpf_attrs *attrs,
+				struct bpf_attrs *attrs_common)
 {
 	const union bpf_attr *attr = attrs->attr;
 
 	return bpf_log_attr_init(log_attr, attrs, attr->log_buf, attr->log_size, attr->log_level,
-				 offsetof(union bpf_attr, log_true_size));
+				 offsetof(union bpf_attr, log_true_size), attrs_common);
 }
 
 int bpf_log_attr_finalize(struct bpf_log_attr *log_attr, struct bpf_verifier_log *log)
@@ -901,5 +919,10 @@ int bpf_log_attr_finalize(struct bpf_log_attr *log_attr, struct bpf_verifier_log
 	    copy_to_bpfptr_offset(log_attr->attrs->uattr, off, &log_true_size, size))
 		err = -EFAULT;
 
+	off = offsetof(struct bpf_common_attr, log_true_size);
+	if (log_attr->attrs_common && log_attr->attrs_common->size >= off + size &&
+	    copy_to_bpfptr_offset(log_attr->attrs_common->uattr, off, &log_true_size, size))
+		err = -EFAULT;
+
 	return err;
 }
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0b389bc6add8..f369b9ec9d60 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2865,7 +2865,8 @@ static int bpf_prog_mark_insn_arrays_ready(struct bpf_prog *prog)
 /* last field in 'union bpf_attr' used by this command */
 #define BPF_PROG_LOAD_LAST_FIELD keyring_id
 
-static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
+static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size,
+			 struct bpf_attrs *common_attrs)
 {
 	enum bpf_prog_type type = attr->prog_type;
 	struct bpf_prog *prog, *dst_prog = NULL;
@@ -3085,7 +3086,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 		goto free_prog_sec;
 
 	bpf_attrs_init(&attrs, attr, uattr, uattr_size);
-	err = bpf_prog_load_log_attr_init(&log_attr, &attrs);
+	err = bpf_prog_load_log_attr_init(&log_attr, &attrs, common_attrs);
 	if (err < 0)
 		goto free_used_maps;
 
@@ -6174,6 +6175,7 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
 		     bpfptr_t uattr_common, unsigned int size_common)
 {
 	struct bpf_common_attr common_attr;
+	struct bpf_attrs common_attrs;
 	union bpf_attr attr;
 	int err;
 
@@ -6225,7 +6227,8 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
 		err = map_freeze(&attr);
 		break;
 	case BPF_PROG_LOAD:
-		err = bpf_prog_load(&attr, uattr, size);
+		bpf_attrs_init(&common_attrs, &common_attr, uattr_common, size_common);
+		err = bpf_prog_load(&attr, uattr, size, &common_attrs);
 		break;
 	case BPF_OBJ_PIN:
 		err = bpf_obj_pin(&attr);
-- 
2.52.0


^ permalink raw reply related

* [PATCH bpf-next v5 3/9] bpf: Refactor reporting log_true_size for prog_load
From: Leon Hwang @ 2026-01-12 14:56 UTC (permalink / raw)
  To: bpf
  Cc: Alexei Starovoitov, Daniel Borkmann, John Fastabend,
	Andrii Nakryiko, Martin KaFai Lau, Eduard Zingerman, Song Liu,
	Yonghong Song, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Shuah Khan, Christian Brauner, Seth Forshee, Yuichiro Tsuji,
	Andrey Albershteyn, Leon Hwang, Willem de Bruijn, Jason Xing,
	Tao Chen, Mykyta Yatsenko, Kumar Kartikeya Dwivedi,
	Anton Protopopov, Amery Hung, Rong Tao, linux-kernel, linux-api,
	linux-kselftest, kernel-patches-bot
In-Reply-To: <20260112145616.44195-1-leon.hwang@linux.dev>

The next commit will add support for reporting logs via extended common
attributes, including 'log_true_size'.

To prepare for that, refactor the 'log_true_size' reporting logic by
introducing a new struct bpf_log_attr to encapsulate log-related behavior:

 * bpf_prog_load_log_attr_init(): initialize the log fields, which will
   support extended common attributes in the next commit.
 * bpf_log_attr_finalize(): handle log finalization and write back
   'log_true_size' to userspace.

Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 include/linux/bpf.h          | 19 ++++++++++++++++-
 include/linux/bpf_verifier.h | 11 ++++++++++
 kernel/bpf/log.c             | 40 ++++++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c         |  9 +++++++-
 kernel/bpf/verifier.c        | 19 ++++++-----------
 5 files changed, 83 insertions(+), 15 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5936f8e2996f..3a525a7e8747 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2867,8 +2867,25 @@ int bpf_get_file_flag(int flags);
 int bpf_check_uarg_tail_zero(bpfptr_t uaddr, size_t expected_size,
 			     size_t actual_size);
 
+struct bpf_attrs {
+	const void *attr;
+	bpfptr_t uattr;
+	u32 size;
+};
+
+static inline void bpf_attrs_init(struct bpf_attrs *attrs, const void *attr, bpfptr_t uattr,
+				  u32 size)
+{
+	memset(attrs, 0, sizeof(*attrs));
+	attrs->attr = attr;
+	attrs->uattr = uattr;
+	attrs->size = size;
+}
+
 /* verify correctness of eBPF program */
-int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size);
+struct bpf_log_attr;
+int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr,
+	      struct bpf_log_attr *log_attr);
 
 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
 void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 130bcbd66f60..4c9632c40059 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -631,6 +631,17 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
 	return log && log->level;
 }
 
+struct bpf_log_attr {
+	u64 log_buf;
+	u32 log_size;
+	u32 log_level;
+	struct bpf_attrs *attrs;
+	u32 offsetof_log_true_size;
+};
+
+int bpf_prog_load_log_attr_init(struct bpf_log_attr *log_attr, struct bpf_attrs *attrs);
+int bpf_log_attr_finalize(struct bpf_log_attr *log_attr, struct bpf_verifier_log *log);
+
 #define BPF_MAX_SUBPROGS 256
 
 struct bpf_subprog_arg_info {
diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
index a0c3b35de2ce..457b724c4176 100644
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -863,3 +863,43 @@ void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_st
 	}
 	print_verifier_state(env, vstate, frameno, false);
 }
+
+static int bpf_log_attr_init(struct bpf_log_attr *log_attr, struct bpf_attrs *attrs, u64 log_buf,
+			     u32 log_size, u32 log_level, int offsetof_log_true_size)
+{
+	memset(log_attr, 0, sizeof(*log_attr));
+	log_attr->log_buf = log_buf;
+	log_attr->log_size = log_size;
+	log_attr->log_level = log_level;
+	log_attr->attrs = attrs;
+	log_attr->offsetof_log_true_size = offsetof_log_true_size;
+	return 0;
+}
+
+int bpf_prog_load_log_attr_init(struct bpf_log_attr *log_attr, struct bpf_attrs *attrs)
+{
+	const union bpf_attr *attr = attrs->attr;
+
+	return bpf_log_attr_init(log_attr, attrs, attr->log_buf, attr->log_size, attr->log_level,
+				 offsetof(union bpf_attr, log_true_size));
+}
+
+int bpf_log_attr_finalize(struct bpf_log_attr *log_attr, struct bpf_verifier_log *log)
+{
+	u32 log_true_size, off;
+	size_t size;
+	int err;
+
+	if (!log)
+		return 0;
+
+	err = bpf_vlog_finalize(log, &log_true_size);
+
+	size = sizeof(log_true_size);
+	off = log_attr->offsetof_log_true_size;
+	if (log_attr->attrs && log_attr->attrs->size >= off + size &&
+	    copy_to_bpfptr_offset(log_attr->attrs->uattr, off, &log_true_size, size))
+		err = -EFAULT;
+
+	return err;
+}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index af703f7ea58e..0b389bc6add8 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2871,6 +2871,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 	struct bpf_prog *prog, *dst_prog = NULL;
 	struct btf *attach_btf = NULL;
 	struct bpf_token *token = NULL;
+	struct bpf_log_attr log_attr;
+	struct bpf_attrs attrs;
 	bool bpf_cap;
 	int err;
 	char license[128];
@@ -3082,8 +3084,13 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 	if (err)
 		goto free_prog_sec;
 
+	bpf_attrs_init(&attrs, attr, uattr, uattr_size);
+	err = bpf_prog_load_log_attr_init(&log_attr, &attrs);
+	if (err < 0)
+		goto free_used_maps;
+
 	/* run eBPF verifier */
-	err = bpf_check(&prog, attr, uattr, uattr_size);
+	err = bpf_check(&prog, attr, uattr, &log_attr);
 	if (err < 0)
 		goto free_used_maps;
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 53635ea2e41b..921a7b6a6686 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -25106,12 +25106,12 @@ static int compute_scc(struct bpf_verifier_env *env)
 	return err;
 }
 
-int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
+int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr,
+	      struct bpf_log_attr *log_attr)
 {
 	u64 start_time = ktime_get_ns();
 	struct bpf_verifier_env *env;
 	int i, len, ret = -EINVAL, err;
-	u32 log_true_size;
 	bool is_priv;
 
 	BTF_TYPE_EMIT(enum bpf_features);
@@ -25158,9 +25158,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 	/* user could have requested verbose verifier output
 	 * and supplied buffer to store the verification trace
 	 */
-	ret = bpf_vlog_init(&env->log, attr->log_level,
-			    (char __user *) (unsigned long) attr->log_buf,
-			    attr->log_size);
+	ret = bpf_vlog_init(&env->log, log_attr->log_level,
+			    u64_to_user_ptr(log_attr->log_buf),
+			    log_attr->log_size);
 	if (ret)
 		goto err_unlock;
 
@@ -25310,17 +25310,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 	env->prog->aux->verified_insns = env->insn_processed;
 
 	/* preserve original error even if log finalization is successful */
-	err = bpf_vlog_finalize(&env->log, &log_true_size);
+	err = bpf_log_attr_finalize(log_attr, &env->log);
 	if (err)
 		ret = err;
 
-	if (uattr_size >= offsetofend(union bpf_attr, log_true_size) &&
-	    copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, log_true_size),
-				  &log_true_size, sizeof(log_true_size))) {
-		ret = -EFAULT;
-		goto err_release_maps;
-	}
-
 	if (ret)
 		goto err_release_maps;
 
-- 
2.52.0


^ permalink raw reply related

* [PATCH bpf-next v5 2/9] libbpf: Add support for extended bpf syscall
From: Leon Hwang @ 2026-01-12 14:56 UTC (permalink / raw)
  To: bpf
  Cc: Alexei Starovoitov, Daniel Borkmann, John Fastabend,
	Andrii Nakryiko, Martin KaFai Lau, Eduard Zingerman, Song Liu,
	Yonghong Song, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Shuah Khan, Christian Brauner, Seth Forshee, Yuichiro Tsuji,
	Andrey Albershteyn, Leon Hwang, Willem de Bruijn, Jason Xing,
	Tao Chen, Mykyta Yatsenko, Kumar Kartikeya Dwivedi,
	Anton Protopopov, Amery Hung, Rong Tao, linux-kernel, linux-api,
	linux-kselftest, kernel-patches-bot
In-Reply-To: <20260112145616.44195-1-leon.hwang@linux.dev>

To support the extended BPF syscall introduced in the previous commit,
introduce the following internal APIs:

* 'sys_bpf_ext()'
* 'sys_bpf_ext_fd()'
  They wrap the raw 'syscall()' interface to support passing extended
  attributes.
* 'probe_sys_bpf_ext()'
  Check whether current kernel supports the extended attributes.

Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 tools/lib/bpf/bpf.c             | 34 +++++++++++++++++++++++++++++++++
 tools/lib/bpf/features.c        |  8 ++++++++
 tools/lib/bpf/libbpf_internal.h |  3 +++
 3 files changed, 45 insertions(+)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 21b57a629916..d44e667aaf02 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -69,6 +69,40 @@ static inline __u64 ptr_to_u64(const void *ptr)
 	return (__u64) (unsigned long) ptr;
 }
 
+static inline int sys_bpf_ext(enum bpf_cmd cmd, union bpf_attr *attr,
+			      unsigned int size,
+			      struct bpf_common_attr *common_attr,
+			      unsigned int size_common)
+{
+	cmd = common_attr ? (cmd | BPF_COMMON_ATTRS) : (cmd & ~BPF_COMMON_ATTRS);
+	return syscall(__NR_bpf, cmd, attr, size, common_attr, size_common);
+}
+
+static inline int sys_bpf_ext_fd(enum bpf_cmd cmd, union bpf_attr *attr,
+				 unsigned int size,
+				 struct bpf_common_attr *common_attr,
+				 unsigned int size_common)
+{
+	int fd;
+
+	fd = sys_bpf_ext(cmd, attr, size, common_attr, size_common);
+	return ensure_good_fd(fd);
+}
+
+int probe_sys_bpf_ext(void)
+{
+	const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd);
+	union bpf_attr attr;
+	int fd;
+
+	memset(&attr, 0, attr_sz);
+	fd = syscall(__NR_bpf, BPF_PROG_LOAD | BPF_COMMON_ATTRS, &attr, attr_sz, NULL,
+		     sizeof(struct bpf_common_attr));
+	if (fd >= 0)
+		close(fd);
+	return errno == EFAULT;
+}
+
 static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
 			  unsigned int size)
 {
diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c
index b842b83e2480..d786a815f1ae 100644
--- a/tools/lib/bpf/features.c
+++ b/tools/lib/bpf/features.c
@@ -506,6 +506,11 @@ static int probe_kern_arg_ctx_tag(int token_fd)
 	return probe_fd(prog_fd);
 }
 
+static int probe_kern_extended_syscall(int token_fd)
+{
+	return probe_sys_bpf_ext();
+}
+
 typedef int (*feature_probe_fn)(int /* token_fd */);
 
 static struct kern_feature_cache feature_cache;
@@ -581,6 +586,9 @@ static struct kern_feature_desc {
 	[FEAT_BTF_QMARK_DATASEC] = {
 		"BTF DATASEC names starting from '?'", probe_kern_btf_qmark_datasec,
 	},
+	[FEAT_EXTENDED_SYSCALL] = {
+		"Kernel supports extended syscall", probe_kern_extended_syscall,
+	},
 };
 
 bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id)
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index fc59b21b51b5..e2a6ef4b45ae 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -392,6 +392,8 @@ enum kern_feature_id {
 	FEAT_ARG_CTX_TAG,
 	/* Kernel supports '?' at the front of datasec names */
 	FEAT_BTF_QMARK_DATASEC,
+	/* Kernel supports extended syscall */
+	FEAT_EXTENDED_SYSCALL,
 	__FEAT_CNT,
 };
 
@@ -757,4 +759,5 @@ int probe_fd(int fd);
 #define SHA256_DWORD_SIZE SHA256_DIGEST_LENGTH / sizeof(__u64)
 
 void libbpf_sha256(const void *data, size_t len, __u8 out[SHA256_DIGEST_LENGTH]);
+int probe_sys_bpf_ext(void);
 #endif /* __LIBBPF_LIBBPF_INTERNAL_H */
-- 
2.52.0


^ permalink raw reply related

* [PATCH bpf-next v5 1/9] bpf: Extend BPF syscall with common attributes support
From: Leon Hwang @ 2026-01-12 14:56 UTC (permalink / raw)
  To: bpf
  Cc: Alexei Starovoitov, Daniel Borkmann, John Fastabend,
	Andrii Nakryiko, Martin KaFai Lau, Eduard Zingerman, Song Liu,
	Yonghong Song, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Shuah Khan, Christian Brauner, Seth Forshee, Yuichiro Tsuji,
	Andrey Albershteyn, Leon Hwang, Willem de Bruijn, Jason Xing,
	Tao Chen, Mykyta Yatsenko, Kumar Kartikeya Dwivedi,
	Anton Protopopov, Amery Hung, Rong Tao, linux-kernel, linux-api,
	linux-kselftest, kernel-patches-bot
In-Reply-To: <20260112145616.44195-1-leon.hwang@linux.dev>

Extend the BPF syscall to support a set of common attributes shared
across all BPF commands:

1. 'log_buf': User-provided buffer for storing logs.
2. 'log_size': Size of the log buffer.
3. 'log_level': Log verbosity level.
4. 'log_true_size': The size of log reported by kernel.

These common attributes are passed as the 4th argument to the BPF
syscall, with the 5th argument specifying the size of this structure.

To indicate the use of these common attributes from userspace, a new flag
'BPF_COMMON_ATTRS' ('1 << 16') is introduced. This flag is OR-ed into the
'cmd' field of the syscall.

When 'cmd & BPF_COMMON_ATTRS' is set, the kernel will copy the common
attributes from userspace into kernel space for use.

Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 include/linux/syscalls.h       |  3 ++-
 include/uapi/linux/bpf.h       |  8 ++++++++
 kernel/bpf/syscall.c           | 25 +++++++++++++++++++++----
 tools/include/uapi/linux/bpf.h |  8 ++++++++
 4 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index cf84d98964b2..729659202d77 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -937,7 +937,8 @@ asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
 asmlinkage long sys_getrandom(char __user *buf, size_t count,
 			      unsigned int flags);
 asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags);
-asmlinkage long sys_bpf(int cmd, union bpf_attr __user *attr, unsigned int size);
+asmlinkage long sys_bpf(int cmd, union bpf_attr __user *attr, unsigned int size,
+			struct bpf_common_attr __user *attr_common, unsigned int size_common);
 asmlinkage long sys_execveat(int dfd, const char __user *filename,
 			const char __user *const __user *argv,
 			const char __user *const __user *envp, int flags);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2a2ade4be60f..2f83eca0a357 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -986,6 +986,7 @@ enum bpf_cmd {
 	BPF_PROG_STREAM_READ_BY_FD,
 	BPF_PROG_ASSOC_STRUCT_OPS,
 	__MAX_BPF_CMD,
+	BPF_COMMON_ATTRS = 1 << 16, /* Indicate carrying bpf_common_attr. */
 };
 
 enum bpf_map_type {
@@ -1491,6 +1492,13 @@ struct bpf_stack_build_id {
 	};
 };
 
+struct bpf_common_attr {
+	__u64 log_buf;
+	__u32 log_size;
+	__u32 log_level;
+	__u32 log_true_size;
+};
+
 #define BPF_OBJ_NAME_LEN 16U
 
 enum {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ecc0929ce462..af703f7ea58e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -6163,8 +6163,10 @@ static int prog_assoc_struct_ops(union bpf_attr *attr)
 	return ret;
 }
 
-static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
+static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
+		     bpfptr_t uattr_common, unsigned int size_common)
 {
+	struct bpf_common_attr common_attr;
 	union bpf_attr attr;
 	int err;
 
@@ -6178,6 +6180,20 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
 	if (copy_from_bpfptr(&attr, uattr, size) != 0)
 		return -EFAULT;
 
+	memset(&common_attr, 0, sizeof(common_attr));
+	if (cmd & BPF_COMMON_ATTRS) {
+		err = bpf_check_uarg_tail_zero(uattr_common, sizeof(common_attr), size_common);
+		if (err)
+			return err;
+
+		cmd &= ~BPF_COMMON_ATTRS;
+		size_common = min_t(u32, size_common, sizeof(common_attr));
+		if (copy_from_bpfptr(&common_attr, uattr_common, size_common) != 0)
+			return -EFAULT;
+	} else {
+		size_common = 0;
+	}
+
 	err = security_bpf(cmd, &attr, size, uattr.is_kernel);
 	if (err < 0)
 		return err;
@@ -6313,9 +6329,10 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
 	return err;
 }
 
-SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
+SYSCALL_DEFINE5(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size,
+		struct bpf_common_attr __user *, uattr_common, unsigned int, size_common)
 {
-	return __sys_bpf(cmd, USER_BPFPTR(uattr), size);
+	return __sys_bpf(cmd, USER_BPFPTR(uattr), size, USER_BPFPTR(uattr_common), size_common);
 }
 
 static bool syscall_prog_is_valid_access(int off, int size,
@@ -6346,7 +6363,7 @@ BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size)
 	default:
 		return -EINVAL;
 	}
-	return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size);
+	return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size, KERNEL_BPFPTR(NULL), 0);
 }
 
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b816bc53d2e1..2b05c689d51a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -986,6 +986,7 @@ enum bpf_cmd {
 	BPF_PROG_STREAM_READ_BY_FD,
 	BPF_PROG_ASSOC_STRUCT_OPS,
 	__MAX_BPF_CMD,
+	BPF_COMMON_ATTRS = 1 << 16, /* Indicate carrying bpf_common_attr. */
 };
 
 enum bpf_map_type {
@@ -1491,6 +1492,13 @@ struct bpf_stack_build_id {
 	};
 };
 
+struct bpf_common_attr {
+	__u64 log_buf;
+	__u32 log_size;
+	__u32 log_level;
+	__u32 log_true_size;
+};
+
 #define BPF_OBJ_NAME_LEN 16U
 
 enum {
-- 
2.52.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox