* [RESEND PATCH bpf-next v4 3/9] bpf: Refactor reporting log_true_size for prog_load
From: Leon Hwang @ 2026-01-06 17:20 UTC (permalink / raw)
To: bpf
Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
Shuah Khan, Christian Brauner, Oleg Nesterov, Leon Hwang,
Seth Forshee, Yuichiro Tsuji, Andrey Albershteyn,
Willem de Bruijn, Jason Xing, Paul Chaignon, Mykyta Yatsenko,
Kumar Kartikeya Dwivedi, Anton Protopopov, Amery Hung, Rong Tao,
linux-kernel, linux-api, linux-kselftest, kernel-patches-bot
In-Reply-To: <20260106172018.57757-1-leon.hwang@linux.dev>
In the next commit, it will be able to report logs via extended common
attributes, which will report 'log_true_size' via the extended common
attributes meanwhile.
Therefore, refactor the way of 'log_true_size' reporting in order to
report 'log_true_size' via the extended common attributes easily.
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
include/linux/bpf.h | 2 +-
kernel/bpf/syscall.c | 21 +++++++++++++++++----
kernel/bpf/verifier.c | 12 ++----------
3 files changed, 20 insertions(+), 15 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a63e47d2109c..26fbc550e5aa 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2868,7 +2868,7 @@ int bpf_check_uarg_tail_zero(bpfptr_t uaddr, size_t expected_size,
size_t actual_size);
/* verify correctness of eBPF program */
-int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size);
+int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr);
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 8f464b847405..1739601fb7bd 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2862,7 +2862,7 @@ static int bpf_prog_mark_insn_arrays_ready(struct bpf_prog *prog)
/* last field in 'union bpf_attr' used by this command */
#define BPF_PROG_LOAD_LAST_FIELD keyring_id
-static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
+static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
{
enum bpf_prog_type type = attr->prog_type;
struct bpf_prog *prog, *dst_prog = NULL;
@@ -3080,7 +3080,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
goto free_prog_sec;
/* run eBPF verifier */
- err = bpf_check(&prog, attr, uattr, uattr_size);
+ err = bpf_check(&prog, attr, uattr);
if (err < 0)
goto free_used_maps;
@@ -6160,12 +6160,22 @@ static int prog_assoc_struct_ops(union bpf_attr *attr)
return ret;
}
+static int copy_prog_load_log_true_size(union bpf_attr *attr, bpfptr_t uattr, unsigned int size)
+{
+ if (size >= offsetofend(union bpf_attr, log_true_size) &&
+ copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, log_true_size),
+ &attr->log_true_size, sizeof(attr->log_true_size)))
+ return -EFAULT;
+
+ return 0;
+}
+
static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
bpfptr_t uattr_common, unsigned int size_common)
{
struct bpf_common_attr common_attrs;
union bpf_attr attr;
- int err;
+ int err, ret;
err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
if (err)
@@ -6215,7 +6225,10 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
err = map_freeze(&attr);
break;
case BPF_PROG_LOAD:
- err = bpf_prog_load(&attr, uattr, size);
+ attr.log_true_size = 0;
+ err = bpf_prog_load(&attr, uattr);
+ ret = copy_prog_load_log_true_size(&attr, uattr, size);
+ err = ret ? ret : err;
break;
case BPF_OBJ_PIN:
err = bpf_obj_pin(&attr);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9394b0de2ef0..ab5eacdde92c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -25096,12 +25096,11 @@ static int compute_scc(struct bpf_verifier_env *env)
return err;
}
-int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
+int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
{
u64 start_time = ktime_get_ns();
struct bpf_verifier_env *env;
int i, len, ret = -EINVAL, err;
- u32 log_true_size;
bool is_priv;
BTF_TYPE_EMIT(enum bpf_features);
@@ -25300,17 +25299,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
env->prog->aux->verified_insns = env->insn_processed;
/* preserve original error even if log finalization is successful */
- err = bpf_vlog_finalize(&env->log, &log_true_size);
+ err = bpf_vlog_finalize(&env->log, &attr->log_true_size);
if (err)
ret = err;
- if (uattr_size >= offsetofend(union bpf_attr, log_true_size) &&
- copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, log_true_size),
- &log_true_size, sizeof(log_true_size))) {
- ret = -EFAULT;
- goto err_release_maps;
- }
-
if (ret)
goto err_release_maps;
--
2.52.0
^ permalink raw reply related
* [RESEND PATCH bpf-next v4 2/9] libbpf: Add support for extended bpf syscall
From: Leon Hwang @ 2026-01-06 17:20 UTC (permalink / raw)
To: bpf
Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
Shuah Khan, Christian Brauner, Oleg Nesterov, Leon Hwang,
Seth Forshee, Yuichiro Tsuji, Andrey Albershteyn,
Willem de Bruijn, Jason Xing, Paul Chaignon, Mykyta Yatsenko,
Kumar Kartikeya Dwivedi, Anton Protopopov, Amery Hung, Rong Tao,
linux-kernel, linux-api, linux-kselftest, kernel-patches-bot
In-Reply-To: <20260106172018.57757-1-leon.hwang@linux.dev>
To support the extended BPF syscall introduced in the previous commit,
introduce the following internal APIs:
* 'sys_bpf_ext()'
* 'sys_bpf_ext_fd()'
They wrap the raw 'syscall()' interface to support passing extended
attributes.
* 'probe_sys_bpf_ext()'
Check whether current kernel supports the extended attributes.
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
tools/lib/bpf/bpf.c | 34 +++++++++++++++++++++++++++++++++
tools/lib/bpf/features.c | 8 ++++++++
tools/lib/bpf/libbpf_internal.h | 3 +++
3 files changed, 45 insertions(+)
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 21b57a629916..689ade4a822b 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -69,6 +69,40 @@ static inline __u64 ptr_to_u64(const void *ptr)
return (__u64) (unsigned long) ptr;
}
+static inline int sys_bpf_ext(enum bpf_cmd cmd, union bpf_attr *attr,
+ unsigned int size,
+ struct bpf_common_attr *common_attrs,
+ unsigned int size_common)
+{
+ cmd = common_attrs ? (cmd | BPF_COMMON_ATTRS) : (cmd & ~BPF_COMMON_ATTRS);
+ return syscall(__NR_bpf, cmd, attr, size, common_attrs, size_common);
+}
+
+static inline int sys_bpf_ext_fd(enum bpf_cmd cmd, union bpf_attr *attr,
+ unsigned int size,
+ struct bpf_common_attr *common_attrs,
+ unsigned int size_common)
+{
+ int fd;
+
+ fd = sys_bpf_ext(cmd, attr, size, common_attrs, size_common);
+ return ensure_good_fd(fd);
+}
+
+int probe_sys_bpf_ext(void)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd);
+ union bpf_attr attr;
+ int fd;
+
+ memset(&attr, 0, attr_sz);
+ fd = syscall(__NR_bpf, BPF_PROG_LOAD | BPF_COMMON_ATTRS, &attr, attr_sz, NULL,
+ sizeof(struct bpf_common_attr));
+ if (fd >= 0)
+ close(fd);
+ return errno == EFAULT;
+}
+
static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
unsigned int size)
{
diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c
index b842b83e2480..d786a815f1ae 100644
--- a/tools/lib/bpf/features.c
+++ b/tools/lib/bpf/features.c
@@ -506,6 +506,11 @@ static int probe_kern_arg_ctx_tag(int token_fd)
return probe_fd(prog_fd);
}
+static int probe_kern_extended_syscall(int token_fd)
+{
+ return probe_sys_bpf_ext();
+}
+
typedef int (*feature_probe_fn)(int /* token_fd */);
static struct kern_feature_cache feature_cache;
@@ -581,6 +586,9 @@ static struct kern_feature_desc {
[FEAT_BTF_QMARK_DATASEC] = {
"BTF DATASEC names starting from '?'", probe_kern_btf_qmark_datasec,
},
+ [FEAT_EXTENDED_SYSCALL] = {
+ "Kernel supports extended syscall", probe_kern_extended_syscall,
+ },
};
bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id)
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index fc59b21b51b5..e2a6ef4b45ae 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -392,6 +392,8 @@ enum kern_feature_id {
FEAT_ARG_CTX_TAG,
/* Kernel supports '?' at the front of datasec names */
FEAT_BTF_QMARK_DATASEC,
+ /* Kernel supports extended syscall */
+ FEAT_EXTENDED_SYSCALL,
__FEAT_CNT,
};
@@ -757,4 +759,5 @@ int probe_fd(int fd);
#define SHA256_DWORD_SIZE SHA256_DIGEST_LENGTH / sizeof(__u64)
void libbpf_sha256(const void *data, size_t len, __u8 out[SHA256_DIGEST_LENGTH]);
+int probe_sys_bpf_ext(void);
#endif /* __LIBBPF_LIBBPF_INTERNAL_H */
--
2.52.0
^ permalink raw reply related
* [RESEND PATCH bpf-next v4 1/9] bpf: Extend bpf syscall with common attributes support
From: Leon Hwang @ 2026-01-06 17:20 UTC (permalink / raw)
To: bpf
Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
Shuah Khan, Christian Brauner, Oleg Nesterov, Leon Hwang,
Seth Forshee, Yuichiro Tsuji, Andrey Albershteyn,
Willem de Bruijn, Jason Xing, Paul Chaignon, Mykyta Yatsenko,
Kumar Kartikeya Dwivedi, Anton Protopopov, Amery Hung, Rong Tao,
linux-kernel, linux-api, linux-kselftest, kernel-patches-bot
In-Reply-To: <20260106172018.57757-1-leon.hwang@linux.dev>
Extend the BPF syscall to support a set of common attributes shared
across all BPF commands:
1. 'log_buf': User-provided buffer for storing logs.
2. 'log_size': Size of the log buffer.
3. 'log_level': Log verbosity level.
4. 'log_true_size': The size of log reported by kernel.
These common attributes are passed as the 4th argument to the BPF
syscall, with the 5th argument specifying the size of this structure.
To indicate the use of these common attributes from userspace, a new flag
'BPF_COMMON_ATTRS' ('1 << 16') is introduced. This flag is OR-ed into the
'cmd' field of the syscall.
When 'cmd & BPF_COMMON_ATTRS' is set, the kernel will copy the common
attributes from userspace into kernel space for use.
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
include/linux/syscalls.h | 3 ++-
include/uapi/linux/bpf.h | 8 ++++++++
kernel/bpf/syscall.c | 25 +++++++++++++++++++++----
tools/include/uapi/linux/bpf.h | 8 ++++++++
4 files changed, 39 insertions(+), 5 deletions(-)
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index cf84d98964b2..729659202d77 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -937,7 +937,8 @@ asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
asmlinkage long sys_getrandom(char __user *buf, size_t count,
unsigned int flags);
asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags);
-asmlinkage long sys_bpf(int cmd, union bpf_attr __user *attr, unsigned int size);
+asmlinkage long sys_bpf(int cmd, union bpf_attr __user *attr, unsigned int size,
+ struct bpf_common_attr __user *attr_common, unsigned int size_common);
asmlinkage long sys_execveat(int dfd, const char __user *filename,
const char __user *const __user *argv,
const char __user *const __user *envp, int flags);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 84ced3ed2d21..dcae1f3e50b7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -986,6 +986,7 @@ enum bpf_cmd {
BPF_PROG_STREAM_READ_BY_FD,
BPF_PROG_ASSOC_STRUCT_OPS,
__MAX_BPF_CMD,
+ BPF_COMMON_ATTRS = 1 << 16, /* Indicate carrying bpf_common_attr. */
};
enum bpf_map_type {
@@ -1489,6 +1490,13 @@ struct bpf_stack_build_id {
};
};
+struct bpf_common_attr {
+ __u64 log_buf;
+ __u32 log_size;
+ __u32 log_level;
+ __u32 log_true_size;
+};
+
#define BPF_OBJ_NAME_LEN 16U
enum {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 6dd2ad2f9e81..8f464b847405 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -6160,8 +6160,10 @@ static int prog_assoc_struct_ops(union bpf_attr *attr)
return ret;
}
-static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
+static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
+ bpfptr_t uattr_common, unsigned int size_common)
{
+ struct bpf_common_attr common_attrs;
union bpf_attr attr;
int err;
@@ -6175,6 +6177,20 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
if (copy_from_bpfptr(&attr, uattr, size) != 0)
return -EFAULT;
+ memset(&common_attrs, 0, sizeof(common_attrs));
+ if (cmd & BPF_COMMON_ATTRS) {
+ err = bpf_check_uarg_tail_zero(uattr_common, sizeof(common_attrs), size_common);
+ if (err)
+ return err;
+
+ cmd &= ~BPF_COMMON_ATTRS;
+ size_common = min_t(u32, size_common, sizeof(common_attrs));
+ if (copy_from_bpfptr(&common_attrs, uattr_common, size_common) != 0)
+ return -EFAULT;
+ } else {
+ size_common = 0;
+ }
+
err = security_bpf(cmd, &attr, size, uattr.is_kernel);
if (err < 0)
return err;
@@ -6310,9 +6326,10 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
return err;
}
-SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
+SYSCALL_DEFINE5(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size,
+ struct bpf_common_attr __user *, uattr_common, unsigned int, size_common)
{
- return __sys_bpf(cmd, USER_BPFPTR(uattr), size);
+ return __sys_bpf(cmd, USER_BPFPTR(uattr), size, USER_BPFPTR(uattr_common), size_common);
}
static bool syscall_prog_is_valid_access(int off, int size,
@@ -6343,7 +6360,7 @@ BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size)
default:
return -EINVAL;
}
- return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size);
+ return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size, KERNEL_BPFPTR(NULL), 0);
}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 6b92b0847ec2..2cb847b38f20 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -986,6 +986,7 @@ enum bpf_cmd {
BPF_PROG_STREAM_READ_BY_FD,
BPF_PROG_ASSOC_STRUCT_OPS,
__MAX_BPF_CMD,
+ BPF_COMMON_ATTRS = 1 << 16, /* Indicate carrying bpf_common_attr. */
};
enum bpf_map_type {
@@ -1489,6 +1490,13 @@ struct bpf_stack_build_id {
};
};
+struct bpf_common_attr {
+ __u64 log_buf;
+ __u32 log_size;
+ __u32 log_level;
+ __u32 log_true_size;
+};
+
#define BPF_OBJ_NAME_LEN 16U
enum {
--
2.52.0
^ permalink raw reply related
* [RESEND PATCH bpf-next v4 0/9] bpf: Extend bpf syscall with common attributes support
From: Leon Hwang @ 2026-01-06 17:20 UTC (permalink / raw)
To: bpf
Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
Shuah Khan, Christian Brauner, Oleg Nesterov, Leon Hwang,
Seth Forshee, Yuichiro Tsuji, Andrey Albershteyn,
Willem de Bruijn, Jason Xing, Paul Chaignon, Mykyta Yatsenko,
Kumar Kartikeya Dwivedi, Anton Protopopov, Amery Hung, Rong Tao,
linux-kernel, linux-api, linux-kselftest, kernel-patches-bot
Resending the patch series due to a previous "4.7.1 Error: too many recipients"
failure.
===
This patch series builds upon the discussion in
"[PATCH bpf-next v4 0/4] bpf: Improve error reporting for freplace attachment failure" [1].
This patch series introduces support for *common attributes* in the BPF
syscall, providing a unified mechanism for passing shared metadata across
all BPF commands.
The initial set of common attributes includes:
1. 'log_buf': User-provided buffer for storing log output.
2. 'log_size': Size of the provided log buffer.
3. 'log_level': Verbosity level for logging.
4. 'log_true_size': The size of log reported by kernel.
With this extension, the BPF syscall will be able to return meaningful
error messages (e.g., failures of creating map), improving debuggability
and user experience.
Changes:
RFC v3 -> v4:
* Drop RFC.
* Address comments from Andrii:
* Add parentheses in 'sys_bpf_ext()'.
* Avoid creating new fd in 'probe_sys_bpf_ext()'.
* Add a new struct to wrap log fields in libbpf.
* Address comments from Alexei:
* Do not skip writing to user space when log_true_size is zero.
* Do not use 'bool' arguments.
* Drop the adding WARN_ON_ONCE()'s.
RFC v2 -> RFC v3:
* Rename probe_sys_bpf_extended to probe_sys_bpf_ext.
* Refactor reporting 'log_true_size' for prog_load.
* Refactor reporting 'btf_log_true_size' for btf_load.
* Add warnings for internal bugs in map_create.
* Check log_true_size in test cases.
* Address comment from Alexei:
* Change kvzalloc/kvfree to kzalloc/kfree.
* Address comments from Andrii:
* Move BPF_COMMON_ATTRS to 'enum bpf_cmd' alongside brief comment.
* Add bpf_check_uarg_tail_zero() for extra checks.
* Rename sys_bpf_extended to sys_bpf_ext.
* Rename sys_bpf_fd_extended to sys_bpf_ext_fd.
* Probe the new feature using NULL and -EFAULT.
* Move probe_sys_bpf_ext to libbpf_internal.h and drop LIBBPF_API.
* Return -EUSERS when log attrs are conflict between bpf_attr and
bpf_common_attr.
* Avoid touching bpf_vlog_init().
* Update the reason messages in map_create.
* Finalize the log using __cleanup().
* Report log size to users.
* Change type of log_buf from '__u64' to 'const char *' and cast type
using ptr_to_u64() in bpf_map_create().
* Do not return -EOPNOTSUPP when kernel doesn't support this feature
in bpf_map_create().
* Add log_level support for map creation for consistency.
* Address comment from Eduard:
* Use common_attrs->log_level instead of BPF_LOG_FIXED.
RFC v1 -> RFC v2:
* Fix build error reported by test bot.
* Address comments from Alexei:
* Drop new uapi for freplace.
* Add common attributes support for prog_load and btf_load.
* Add common attributes support for map_create.
Links:
[1] https://lore.kernel.org/bpf/20250224153352.64689-1-leon.hwang@linux.dev/
Leon Hwang (9):
bpf: Extend bpf syscall with common attributes support
libbpf: Add support for extended bpf syscall
bpf: Refactor reporting log_true_size for prog_load
bpf: Add common attr support for prog_load
bpf: Refactor reporting btf_log_true_size for btf_load
bpf: Add common attr support for btf_load
bpf: Add common attr support for map_create
libbpf: Add common attr support for map_create
selftests/bpf: Add tests to verify map create failure log
include/linux/bpf.h | 2 +-
include/linux/btf.h | 2 +-
include/linux/syscalls.h | 3 +-
include/uapi/linux/bpf.h | 8 +
kernel/bpf/btf.c | 25 +-
kernel/bpf/syscall.c | 223 ++++++++++++++++--
kernel/bpf/verifier.c | 12 +-
tools/include/uapi/linux/bpf.h | 8 +
tools/lib/bpf/bpf.c | 49 +++-
tools/lib/bpf/bpf.h | 17 +-
tools/lib/bpf/features.c | 8 +
tools/lib/bpf/libbpf_internal.h | 3 +
.../selftests/bpf/prog_tests/map_init.c | 143 +++++++++++
13 files changed, 448 insertions(+), 55 deletions(-)
--
2.52.0
^ permalink raw reply
* [PATCH bpf-next v4 5/9] bpf: Refactor reporting btf_log_true_size for btf_load
From: Leon Hwang @ 2026-01-06 16:59 UTC (permalink / raw)
To: bpf
Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
Shuah Khan, Christian Brauner, Oleg Nesterov, Leon Hwang,
Seth Forshee, Yuichiro Tsuji, Andrey Albershteyn,
Willem de Bruijn, Jason Xing, Paul Chaignon, Mykyta Yatsenko,
Kumar Kartikeya Dwivedi, Anton Protopopov, Amery Hung, Rong Tao,
linux-kernel, linux-api, linux-kselftest, kernel-patches-bot
In-Reply-To: <20260106165907.53631-1-leon.hwang@linux.dev>
In the next commit, it will be able to report logs via extended common
attributes, which will report 'log_true_size' via the extended common
attributes meanwhile.
Therefore, refactor the way of 'btf_log_true_size' reporting in order to
report 'log_true_size' via the extended common attributes easily.
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
include/linux/btf.h | 2 +-
kernel/bpf/btf.c | 25 +++++--------------------
kernel/bpf/syscall.c | 19 ++++++++++++++++---
3 files changed, 22 insertions(+), 24 deletions(-)
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 691f09784933..2b27fdd567f5 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -144,7 +144,7 @@ const char *btf_get_name(const struct btf *btf);
void btf_get(struct btf *btf);
void btf_put(struct btf *btf);
const struct btf_header *btf_header(const struct btf *btf);
-int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_sz);
+int btf_new_fd(union bpf_attr *attr, bpfptr_t uattr);
struct btf *btf_get_by_fd(int fd);
int btf_get_info_by_fd(const struct btf *btf,
const union bpf_attr *attr,
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 539c9fdea41d..9efcbb489edb 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -5745,22 +5745,7 @@ static int btf_check_type_tags(struct btf_verifier_env *env,
return 0;
}
-static int finalize_log(struct bpf_verifier_log *log, bpfptr_t uattr, u32 uattr_size)
-{
- u32 log_true_size;
- int err;
-
- err = bpf_vlog_finalize(log, &log_true_size);
-
- if (uattr_size >= offsetofend(union bpf_attr, btf_log_true_size) &&
- copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, btf_log_true_size),
- &log_true_size, sizeof(log_true_size)))
- err = -EFAULT;
-
- return err;
-}
-
-static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
+static struct btf *btf_parse(union bpf_attr *attr, bpfptr_t uattr)
{
bpfptr_t btf_data = make_bpfptr(attr->btf, uattr.is_kernel);
char __user *log_ubuf = u64_to_user_ptr(attr->btf_log_buf);
@@ -5841,7 +5826,7 @@ static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uat
}
}
- err = finalize_log(&env->log, uattr, uattr_size);
+ err = bpf_vlog_finalize(&env->log, &attr->btf_log_true_size);
if (err)
goto errout_free;
@@ -5853,7 +5838,7 @@ static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uat
btf_free_struct_meta_tab(btf);
errout:
/* overwrite err with -ENOSPC or -EFAULT */
- ret = finalize_log(&env->log, uattr, uattr_size);
+ ret = bpf_vlog_finalize(&env->log, &attr->btf_log_true_size);
if (ret)
err = ret;
errout_free:
@@ -8017,12 +8002,12 @@ static int __btf_new_fd(struct btf *btf)
return anon_inode_getfd("btf", &btf_fops, btf, O_RDONLY | O_CLOEXEC);
}
-int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
+int btf_new_fd(union bpf_attr *attr, bpfptr_t uattr)
{
struct btf *btf;
int ret;
- btf = btf_parse(attr, uattr, uattr_size);
+ btf = btf_parse(attr, uattr);
if (IS_ERR(btf))
return PTR_ERR(btf);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ad565f569a4f..ce349a059d5d 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -5422,7 +5422,7 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
#define BPF_BTF_LOAD_LAST_FIELD btf_token_fd
-static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
+static int bpf_btf_load(union bpf_attr *attr, bpfptr_t uattr)
{
struct bpf_token *token = NULL;
@@ -5449,7 +5449,7 @@ static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_
bpf_token_put(token);
- return btf_new_fd(attr, uattr, uattr_size);
+ return btf_new_fd(attr, uattr);
}
#define BPF_BTF_GET_FD_BY_ID_LAST_FIELD fd_by_id_token_fd
@@ -6211,6 +6211,16 @@ static int copy_prog_load_log_true_size(union bpf_attr *attr, bpfptr_t uattr, un
&attr->log_true_size);
}
+static int copy_btf_load_log_true_size(union bpf_attr *attr, bpfptr_t uattr, unsigned int size)
+{
+ if (size >= offsetofend(union bpf_attr, btf_log_true_size) &&
+ copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, btf_log_true_size),
+ &attr->btf_log_true_size, sizeof(attr->btf_log_true_size)))
+ return -EFAULT;
+
+ return 0;
+}
+
static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
bpfptr_t uattr_common, unsigned int size_common)
{
@@ -6318,7 +6328,10 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
err = bpf_raw_tracepoint_open(&attr);
break;
case BPF_BTF_LOAD:
- err = bpf_btf_load(&attr, uattr, size);
+ attr.btf_log_true_size = 0;
+ err = bpf_btf_load(&attr, uattr);
+ ret = copy_btf_load_log_true_size(&attr, uattr, size);
+ err = ret ? ret : err;
break;
case BPF_BTF_GET_FD_BY_ID:
err = bpf_btf_get_fd_by_id(&attr);
--
2.52.0
^ permalink raw reply related
* [PATCH bpf-next v4 4/9] bpf: Add common attr support for prog_load
From: Leon Hwang @ 2026-01-06 16:59 UTC (permalink / raw)
To: bpf
Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
Shuah Khan, Christian Brauner, Oleg Nesterov, Leon Hwang,
Seth Forshee, Yuichiro Tsuji, Andrey Albershteyn,
Willem de Bruijn, Jason Xing, Paul Chaignon, Mykyta Yatsenko,
Kumar Kartikeya Dwivedi, Anton Protopopov, Amery Hung, Rong Tao,
linux-kernel, linux-api, linux-kselftest, kernel-patches-bot
In-Reply-To: <20260106165907.53631-1-leon.hwang@linux.dev>
The log buffer of common attributes would be confusing with the one in
'union bpf_attr' for BPF_PROG_LOAD.
In order to clarify the usage of these two log buffers, they both can be
used for logging if:
* They are same, including 'log_buf', 'log_level' and 'log_size'.
* One of them is missing, then another one will be used for logging.
If they both have 'log_buf' but they are not same totally, return -EUSERS.
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
kernel/bpf/syscall.c | 51 +++++++++++++++++++++++++++++++++++++++++---
1 file changed, 48 insertions(+), 3 deletions(-)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 1739601fb7bd..ad565f569a4f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -6160,14 +6160,55 @@ static int prog_assoc_struct_ops(union bpf_attr *attr)
return ret;
}
-static int copy_prog_load_log_true_size(union bpf_attr *attr, bpfptr_t uattr, unsigned int size)
+static int check_log_attrs(u64 log_buf, u32 log_size, u32 log_level,
+ struct bpf_common_attr *common_attrs)
+{
+ if (log_buf && common_attrs->log_buf && (log_buf != common_attrs->log_buf ||
+ log_size != common_attrs->log_size ||
+ log_level != common_attrs->log_level))
+ return -EUSERS;
+
+ return 0;
+}
+
+static int check_prog_load_log_attrs(union bpf_attr *attr, struct bpf_common_attr *common_attrs)
+{
+ int err;
+
+ err = check_log_attrs(attr->log_buf, attr->log_size, attr->log_level, common_attrs);
+ if (err)
+ return err;
+
+ if (!attr->log_buf && common_attrs->log_buf) {
+ attr->log_buf = common_attrs->log_buf;
+ attr->log_size = common_attrs->log_size;
+ attr->log_level = common_attrs->log_level;
+ }
+
+ return 0;
+}
+
+static int copy_common_attr_log_true_size(bpfptr_t uattr, unsigned int size, u32 *log_true_size)
+{
+ if (size >= offsetofend(struct bpf_common_attr, log_true_size) &&
+ copy_to_bpfptr_offset(uattr, offsetof(struct bpf_common_attr, log_true_size),
+ log_true_size, sizeof(*log_true_size)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int copy_prog_load_log_true_size(union bpf_attr *attr, bpfptr_t uattr, unsigned int size,
+ struct bpf_common_attr *common_attrs, bpfptr_t uattr_common,
+ unsigned int size_common)
{
if (size >= offsetofend(union bpf_attr, log_true_size) &&
copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, log_true_size),
&attr->log_true_size, sizeof(attr->log_true_size)))
return -EFAULT;
- return 0;
+ return copy_common_attr_log_true_size(uattr_common, size_common,
+ &attr->log_true_size);
}
static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
@@ -6225,9 +6266,13 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
err = map_freeze(&attr);
break;
case BPF_PROG_LOAD:
+ err = check_prog_load_log_attrs(&attr, &common_attrs);
+ if (err)
+ break;
attr.log_true_size = 0;
err = bpf_prog_load(&attr, uattr);
- ret = copy_prog_load_log_true_size(&attr, uattr, size);
+ ret = copy_prog_load_log_true_size(&attr, uattr, size, &common_attrs, uattr_common,
+ size_common);
err = ret ? ret : err;
break;
case BPF_OBJ_PIN:
--
2.52.0
^ permalink raw reply related
* [PATCH bpf-next v4 3/9] bpf: Refactor reporting log_true_size for prog_load
From: Leon Hwang @ 2026-01-06 16:59 UTC (permalink / raw)
To: bpf
Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
Shuah Khan, Christian Brauner, Oleg Nesterov, Leon Hwang,
Seth Forshee, Yuichiro Tsuji, Andrey Albershteyn,
Willem de Bruijn, Jason Xing, Paul Chaignon, Mykyta Yatsenko,
Kumar Kartikeya Dwivedi, Anton Protopopov, Amery Hung, Rong Tao,
linux-kernel, linux-api, linux-kselftest, kernel-patches-bot
In-Reply-To: <20260106165907.53631-1-leon.hwang@linux.dev>
In the next commit, it will be able to report logs via extended common
attributes, which will report 'log_true_size' via the extended common
attributes meanwhile.
Therefore, refactor the way of 'log_true_size' reporting in order to
report 'log_true_size' via the extended common attributes easily.
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
include/linux/bpf.h | 2 +-
kernel/bpf/syscall.c | 21 +++++++++++++++++----
kernel/bpf/verifier.c | 12 ++----------
3 files changed, 20 insertions(+), 15 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a63e47d2109c..26fbc550e5aa 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2868,7 +2868,7 @@ int bpf_check_uarg_tail_zero(bpfptr_t uaddr, size_t expected_size,
size_t actual_size);
/* verify correctness of eBPF program */
-int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size);
+int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr);
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 8f464b847405..1739601fb7bd 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2862,7 +2862,7 @@ static int bpf_prog_mark_insn_arrays_ready(struct bpf_prog *prog)
/* last field in 'union bpf_attr' used by this command */
#define BPF_PROG_LOAD_LAST_FIELD keyring_id
-static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
+static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
{
enum bpf_prog_type type = attr->prog_type;
struct bpf_prog *prog, *dst_prog = NULL;
@@ -3080,7 +3080,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
goto free_prog_sec;
/* run eBPF verifier */
- err = bpf_check(&prog, attr, uattr, uattr_size);
+ err = bpf_check(&prog, attr, uattr);
if (err < 0)
goto free_used_maps;
@@ -6160,12 +6160,22 @@ static int prog_assoc_struct_ops(union bpf_attr *attr)
return ret;
}
+static int copy_prog_load_log_true_size(union bpf_attr *attr, bpfptr_t uattr, unsigned int size)
+{
+ if (size >= offsetofend(union bpf_attr, log_true_size) &&
+ copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, log_true_size),
+ &attr->log_true_size, sizeof(attr->log_true_size)))
+ return -EFAULT;
+
+ return 0;
+}
+
static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
bpfptr_t uattr_common, unsigned int size_common)
{
struct bpf_common_attr common_attrs;
union bpf_attr attr;
- int err;
+ int err, ret;
err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
if (err)
@@ -6215,7 +6225,10 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
err = map_freeze(&attr);
break;
case BPF_PROG_LOAD:
- err = bpf_prog_load(&attr, uattr, size);
+ attr.log_true_size = 0;
+ err = bpf_prog_load(&attr, uattr);
+ ret = copy_prog_load_log_true_size(&attr, uattr, size);
+ err = ret ? ret : err;
break;
case BPF_OBJ_PIN:
err = bpf_obj_pin(&attr);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9394b0de2ef0..ab5eacdde92c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -25096,12 +25096,11 @@ static int compute_scc(struct bpf_verifier_env *env)
return err;
}
-int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
+int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
{
u64 start_time = ktime_get_ns();
struct bpf_verifier_env *env;
int i, len, ret = -EINVAL, err;
- u32 log_true_size;
bool is_priv;
BTF_TYPE_EMIT(enum bpf_features);
@@ -25300,17 +25299,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
env->prog->aux->verified_insns = env->insn_processed;
/* preserve original error even if log finalization is successful */
- err = bpf_vlog_finalize(&env->log, &log_true_size);
+ err = bpf_vlog_finalize(&env->log, &attr->log_true_size);
if (err)
ret = err;
- if (uattr_size >= offsetofend(union bpf_attr, log_true_size) &&
- copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, log_true_size),
- &log_true_size, sizeof(log_true_size))) {
- ret = -EFAULT;
- goto err_release_maps;
- }
-
if (ret)
goto err_release_maps;
--
2.52.0
^ permalink raw reply related
* [PATCH bpf-next v4 2/9] libbpf: Add support for extended bpf syscall
From: Leon Hwang @ 2026-01-06 16:59 UTC (permalink / raw)
To: bpf
Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
Shuah Khan, Christian Brauner, Oleg Nesterov, Leon Hwang,
Seth Forshee, Yuichiro Tsuji, Andrey Albershteyn,
Willem de Bruijn, Jason Xing, Paul Chaignon, Mykyta Yatsenko,
Kumar Kartikeya Dwivedi, Anton Protopopov, Amery Hung, Rong Tao,
linux-kernel, linux-api, linux-kselftest, kernel-patches-bot
In-Reply-To: <20260106165907.53631-1-leon.hwang@linux.dev>
To support the extended BPF syscall introduced in the previous commit,
introduce the following internal APIs:
* 'sys_bpf_ext()'
* 'sys_bpf_ext_fd()'
They wrap the raw 'syscall()' interface to support passing extended
attributes.
* 'probe_sys_bpf_ext()'
Check whether current kernel supports the extended attributes.
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
tools/lib/bpf/bpf.c | 34 +++++++++++++++++++++++++++++++++
tools/lib/bpf/features.c | 8 ++++++++
tools/lib/bpf/libbpf_internal.h | 3 +++
3 files changed, 45 insertions(+)
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 21b57a629916..689ade4a822b 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -69,6 +69,40 @@ static inline __u64 ptr_to_u64(const void *ptr)
return (__u64) (unsigned long) ptr;
}
+static inline int sys_bpf_ext(enum bpf_cmd cmd, union bpf_attr *attr,
+ unsigned int size,
+ struct bpf_common_attr *common_attrs,
+ unsigned int size_common)
+{
+ cmd = common_attrs ? (cmd | BPF_COMMON_ATTRS) : (cmd & ~BPF_COMMON_ATTRS);
+ return syscall(__NR_bpf, cmd, attr, size, common_attrs, size_common);
+}
+
+static inline int sys_bpf_ext_fd(enum bpf_cmd cmd, union bpf_attr *attr,
+ unsigned int size,
+ struct bpf_common_attr *common_attrs,
+ unsigned int size_common)
+{
+ int fd;
+
+ fd = sys_bpf_ext(cmd, attr, size, common_attrs, size_common);
+ return ensure_good_fd(fd);
+}
+
+int probe_sys_bpf_ext(void)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd);
+ union bpf_attr attr;
+ int fd;
+
+ memset(&attr, 0, attr_sz);
+ fd = syscall(__NR_bpf, BPF_PROG_LOAD | BPF_COMMON_ATTRS, &attr, attr_sz, NULL,
+ sizeof(struct bpf_common_attr));
+ if (fd >= 0)
+ close(fd);
+ return errno == EFAULT;
+}
+
static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
unsigned int size)
{
diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c
index b842b83e2480..d786a815f1ae 100644
--- a/tools/lib/bpf/features.c
+++ b/tools/lib/bpf/features.c
@@ -506,6 +506,11 @@ static int probe_kern_arg_ctx_tag(int token_fd)
return probe_fd(prog_fd);
}
+static int probe_kern_extended_syscall(int token_fd)
+{
+ return probe_sys_bpf_ext();
+}
+
typedef int (*feature_probe_fn)(int /* token_fd */);
static struct kern_feature_cache feature_cache;
@@ -581,6 +586,9 @@ static struct kern_feature_desc {
[FEAT_BTF_QMARK_DATASEC] = {
"BTF DATASEC names starting from '?'", probe_kern_btf_qmark_datasec,
},
+ [FEAT_EXTENDED_SYSCALL] = {
+ "Kernel supports extended syscall", probe_kern_extended_syscall,
+ },
};
bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id)
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index fc59b21b51b5..e2a6ef4b45ae 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -392,6 +392,8 @@ enum kern_feature_id {
FEAT_ARG_CTX_TAG,
/* Kernel supports '?' at the front of datasec names */
FEAT_BTF_QMARK_DATASEC,
+ /* Kernel supports extended syscall */
+ FEAT_EXTENDED_SYSCALL,
__FEAT_CNT,
};
@@ -757,4 +759,5 @@ int probe_fd(int fd);
#define SHA256_DWORD_SIZE SHA256_DIGEST_LENGTH / sizeof(__u64)
void libbpf_sha256(const void *data, size_t len, __u8 out[SHA256_DIGEST_LENGTH]);
+int probe_sys_bpf_ext(void);
#endif /* __LIBBPF_LIBBPF_INTERNAL_H */
--
2.52.0
^ permalink raw reply related
* [PATCH bpf-next v4 1/9] bpf: Extend bpf syscall with common attributes support
From: Leon Hwang @ 2026-01-06 16:58 UTC (permalink / raw)
To: bpf
Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
Shuah Khan, Christian Brauner, Oleg Nesterov, Leon Hwang,
Seth Forshee, Yuichiro Tsuji, Andrey Albershteyn,
Willem de Bruijn, Jason Xing, Paul Chaignon, Mykyta Yatsenko,
Kumar Kartikeya Dwivedi, Anton Protopopov, Amery Hung, Rong Tao,
linux-kernel, linux-api, linux-kselftest, kernel-patches-bot
In-Reply-To: <20260106165907.53631-1-leon.hwang@linux.dev>
Extend the BPF syscall to support a set of common attributes shared
across all BPF commands:
1. 'log_buf': User-provided buffer for storing logs.
2. 'log_size': Size of the log buffer.
3. 'log_level': Log verbosity level.
4. 'log_true_size': The size of log reported by kernel.
These common attributes are passed as the 4th argument to the BPF
syscall, with the 5th argument specifying the size of this structure.
To indicate the use of these common attributes from userspace, a new flag
'BPF_COMMON_ATTRS' ('1 << 16') is introduced. This flag is OR-ed into the
'cmd' field of the syscall.
When 'cmd & BPF_COMMON_ATTRS' is set, the kernel will copy the common
attributes from userspace into kernel space for use.
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
include/linux/syscalls.h | 3 ++-
include/uapi/linux/bpf.h | 8 ++++++++
kernel/bpf/syscall.c | 25 +++++++++++++++++++++----
tools/include/uapi/linux/bpf.h | 8 ++++++++
4 files changed, 39 insertions(+), 5 deletions(-)
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index cf84d98964b2..729659202d77 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -937,7 +937,8 @@ asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
asmlinkage long sys_getrandom(char __user *buf, size_t count,
unsigned int flags);
asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags);
-asmlinkage long sys_bpf(int cmd, union bpf_attr __user *attr, unsigned int size);
+asmlinkage long sys_bpf(int cmd, union bpf_attr __user *attr, unsigned int size,
+ struct bpf_common_attr __user *attr_common, unsigned int size_common);
asmlinkage long sys_execveat(int dfd, const char __user *filename,
const char __user *const __user *argv,
const char __user *const __user *envp, int flags);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 84ced3ed2d21..dcae1f3e50b7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -986,6 +986,7 @@ enum bpf_cmd {
BPF_PROG_STREAM_READ_BY_FD,
BPF_PROG_ASSOC_STRUCT_OPS,
__MAX_BPF_CMD,
+ BPF_COMMON_ATTRS = 1 << 16, /* Indicate carrying bpf_common_attr. */
};
enum bpf_map_type {
@@ -1489,6 +1490,13 @@ struct bpf_stack_build_id {
};
};
+struct bpf_common_attr {
+ __u64 log_buf;
+ __u32 log_size;
+ __u32 log_level;
+ __u32 log_true_size;
+};
+
#define BPF_OBJ_NAME_LEN 16U
enum {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 6dd2ad2f9e81..8f464b847405 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -6160,8 +6160,10 @@ static int prog_assoc_struct_ops(union bpf_attr *attr)
return ret;
}
-static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
+static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size,
+ bpfptr_t uattr_common, unsigned int size_common)
{
+ struct bpf_common_attr common_attrs;
union bpf_attr attr;
int err;
@@ -6175,6 +6177,20 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
if (copy_from_bpfptr(&attr, uattr, size) != 0)
return -EFAULT;
+ memset(&common_attrs, 0, sizeof(common_attrs));
+ if (cmd & BPF_COMMON_ATTRS) {
+ err = bpf_check_uarg_tail_zero(uattr_common, sizeof(common_attrs), size_common);
+ if (err)
+ return err;
+
+ cmd &= ~BPF_COMMON_ATTRS;
+ size_common = min_t(u32, size_common, sizeof(common_attrs));
+ if (copy_from_bpfptr(&common_attrs, uattr_common, size_common) != 0)
+ return -EFAULT;
+ } else {
+ size_common = 0;
+ }
+
err = security_bpf(cmd, &attr, size, uattr.is_kernel);
if (err < 0)
return err;
@@ -6310,9 +6326,10 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
return err;
}
-SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
+SYSCALL_DEFINE5(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size,
+ struct bpf_common_attr __user *, uattr_common, unsigned int, size_common)
{
- return __sys_bpf(cmd, USER_BPFPTR(uattr), size);
+ return __sys_bpf(cmd, USER_BPFPTR(uattr), size, USER_BPFPTR(uattr_common), size_common);
}
static bool syscall_prog_is_valid_access(int off, int size,
@@ -6343,7 +6360,7 @@ BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size)
default:
return -EINVAL;
}
- return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size);
+ return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size, KERNEL_BPFPTR(NULL), 0);
}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 6b92b0847ec2..2cb847b38f20 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -986,6 +986,7 @@ enum bpf_cmd {
BPF_PROG_STREAM_READ_BY_FD,
BPF_PROG_ASSOC_STRUCT_OPS,
__MAX_BPF_CMD,
+ BPF_COMMON_ATTRS = 1 << 16, /* Indicate carrying bpf_common_attr. */
};
enum bpf_map_type {
@@ -1489,6 +1490,13 @@ struct bpf_stack_build_id {
};
};
+struct bpf_common_attr {
+ __u64 log_buf;
+ __u32 log_size;
+ __u32 log_level;
+ __u32 log_true_size;
+};
+
#define BPF_OBJ_NAME_LEN 16U
enum {
--
2.52.0
^ permalink raw reply related
* [PATCH bpf-next v4 0/9] bpf: Extend bpf syscall with common attributes support
From: Leon Hwang @ 2026-01-06 16:58 UTC (permalink / raw)
To: bpf
Cc: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
Shuah Khan, Christian Brauner, Oleg Nesterov, Leon Hwang,
Seth Forshee, Yuichiro Tsuji, Andrey Albershteyn,
Willem de Bruijn, Jason Xing, Paul Chaignon, Mykyta Yatsenko,
Kumar Kartikeya Dwivedi, Anton Protopopov, Amery Hung, Rong Tao,
linux-kernel, linux-api, linux-kselftest, kernel-patches-bot
This patch series builds upon the discussion in
"[PATCH bpf-next v4 0/4] bpf: Improve error reporting for freplace attachment failure" [1].
This patch series introduces support for *common attributes* in the BPF
syscall, providing a unified mechanism for passing shared metadata across
all BPF commands.
The initial set of common attributes includes:
1. 'log_buf': User-provided buffer for storing log output.
2. 'log_size': Size of the provided log buffer.
3. 'log_level': Verbosity level for logging.
4. 'log_true_size': The size of log reported by kernel.
With this extension, the BPF syscall will be able to return meaningful
error messages (e.g., failures of creating map), improving debuggability
and user experience.
Changes:
RFC v3 -> v4:
* Drop RFC.
* Address comments from Andrii:
* Add parentheses in 'sys_bpf_ext()'.
* Avoid creating new fd in 'probe_sys_bpf_ext()'.
* Add a new struct to wrap log fields in libbpf.
* Address comments from Alexei:
* Do not skip writing to user space when log_true_size is zero.
* Do not use 'bool' arguments.
* Drop the adding WARN_ON_ONCE()'s.
RFC v2 -> RFC v3:
* Rename probe_sys_bpf_extended to probe_sys_bpf_ext.
* Refactor reporting 'log_true_size' for prog_load.
* Refactor reporting 'btf_log_true_size' for btf_load.
* Add warnings for internal bugs in map_create.
* Check log_true_size in test cases.
* Address comment from Alexei:
* Change kvzalloc/kvfree to kzalloc/kfree.
* Address comments from Andrii:
* Move BPF_COMMON_ATTRS to 'enum bpf_cmd' alongside brief comment.
* Add bpf_check_uarg_tail_zero() for extra checks.
* Rename sys_bpf_extended to sys_bpf_ext.
* Rename sys_bpf_fd_extended to sys_bpf_ext_fd.
* Probe the new feature using NULL and -EFAULT.
* Move probe_sys_bpf_ext to libbpf_internal.h and drop LIBBPF_API.
* Return -EUSERS when log attrs are conflict between bpf_attr and
bpf_common_attr.
* Avoid touching bpf_vlog_init().
* Update the reason messages in map_create.
* Finalize the log using __cleanup().
* Report log size to users.
* Change type of log_buf from '__u64' to 'const char *' and cast type
using ptr_to_u64() in bpf_map_create().
* Do not return -EOPNOTSUPP when kernel doesn't support this feature
in bpf_map_create().
* Add log_level support for map creation for consistency.
* Address comment from Eduard:
* Use common_attrs->log_level instead of BPF_LOG_FIXED.
RFC v1 -> RFC v2:
* Fix build error reported by test bot.
* Address comments from Alexei:
* Drop new uapi for freplace.
* Add common attributes support for prog_load and btf_load.
* Add common attributes support for map_create.
Links:
[1] https://lore.kernel.org/bpf/20250224153352.64689-1-leon.hwang@linux.dev/
Leon Hwang (9):
bpf: Extend bpf syscall with common attributes support
libbpf: Add support for extended bpf syscall
bpf: Refactor reporting log_true_size for prog_load
bpf: Add common attr support for prog_load
bpf: Refactor reporting btf_log_true_size for btf_load
bpf: Add common attr support for btf_load
bpf: Add common attr support for map_create
libbpf: Add common attr support for map_create
selftests/bpf: Add tests to verify map create failure log
include/linux/bpf.h | 2 +-
include/linux/btf.h | 2 +-
include/linux/syscalls.h | 3 +-
include/uapi/linux/bpf.h | 8 +
kernel/bpf/btf.c | 25 +-
kernel/bpf/syscall.c | 223 ++++++++++++++++--
kernel/bpf/verifier.c | 12 +-
tools/include/uapi/linux/bpf.h | 8 +
tools/lib/bpf/bpf.c | 49 +++-
tools/lib/bpf/bpf.h | 17 +-
tools/lib/bpf/features.c | 8 +
tools/lib/bpf/libbpf_internal.h | 3 +
.../selftests/bpf/prog_tests/map_init.c | 143 +++++++++++
13 files changed, 448 insertions(+), 55 deletions(-)
--
2.52.0
^ permalink raw reply
* Re: [PATCH net-next] net: uapi: Provide an UAPI definition of 'struct sockaddr'
From: Thomas Weißschuh @ 2026-01-06 10:32 UTC (permalink / raw)
To: Jakub Kicinski
Cc: Eric Dumazet, Kuniyuki Iwashima, Paolo Abeni, Willem de Bruijn,
netdev, linux-kernel, linux-api, Arnd Bergmann
In-Reply-To: <20260105095713.0b312b26@kernel.org>
Hi Jakub,
On Mon, Jan 05, 2026 at 09:57:13AM -0800, Jakub Kicinski wrote:
> On Mon, 05 Jan 2026 09:25:55 +0100 Thomas Weißschuh wrote:
> > Various UAPI headers reference 'struct sockaddr'. Currently the
> > definition of this struct is pulled in from the libc header
> > sys/socket.h. This is problematic as it introduces a dependency
> > on a full userspace toolchain.
> >
> > Instead expose a custom but compatible definition of 'struct sockaddr'
> > in the UAPI headers. It is guarded by the libc compatibility
> > infrastructure to avoid potential conflicts.
> >
> > The compatibility symbol won't be supported by glibc right away,
> > but right now __UAPI_DEF_IF_IFNAMSIZ is not supported either,
> > so including the libc headers before the UAPI headers is broken anyways.
>
> I did not look too closely but this seems to break build of selftests
> in netdev and BPF CI (netdev on AWS Linux, not sure what base BPF uses)
Thanks for the report.
I found the reported CI failures in BPF CI and will work on those.
As for the failure in netdev CI however I am not so sure.
Looking at net-next-2026-01-05--12-00, the only failures triggered by my
change are also the ones from the bpf-ci. Are these the ones you meant,
or am I missing some others?
Thomas
^ permalink raw reply
* Re: [PATCH 8/9] arm64: vdso32: Provide clock_getres_time64()
From: Will Deacon @ 2026-01-05 20:55 UTC (permalink / raw)
To: Thomas Weißschuh
Cc: Andy Lutomirski, Thomas Gleixner, Vincenzo Frascino, Shuah Khan,
Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
Russell King, Catalin Marinas, Thomas Bogendoerfer, linux-kernel,
linux-kselftest, Russell King, linux-arm-kernel, linux-mips,
Arnd Bergmann, linux-api
In-Reply-To: <20251223-vdso-compat-time32-v1-8-97ea7a06a543@linutronix.de>
On Tue, Dec 23, 2025 at 07:59:19AM +0100, Thomas Weißschuh wrote:
> For consistency with __vdso_clock_gettime64() there should also be a
> 64-bit variant of clock_getres(). This will allow the extension of
> CONFIG_COMPAT_32BIT_TIME to the vDSO and finally the removal of 32-bit
> time types from the kernel and UAPI.
>
> Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
> ---
> arch/arm64/kernel/vdso32/vdso.lds.S | 1 +
> arch/arm64/kernel/vdso32/vgettimeofday.c | 6 ++++++
> 2 files changed, 7 insertions(+)
>
> diff --git a/arch/arm64/kernel/vdso32/vdso.lds.S b/arch/arm64/kernel/vdso32/vdso.lds.S
> index e02b27487ce8..c374fb0146f3 100644
> --- a/arch/arm64/kernel/vdso32/vdso.lds.S
> +++ b/arch/arm64/kernel/vdso32/vdso.lds.S
> @@ -86,6 +86,7 @@ VERSION
> __vdso_gettimeofday;
> __vdso_clock_getres;
> __vdso_clock_gettime64;
> + __vdso_clock_getres_time64;
> local: *;
> };
> }
> diff --git a/arch/arm64/kernel/vdso32/vgettimeofday.c b/arch/arm64/kernel/vdso32/vgettimeofday.c
> index 29b4d8f61e39..d7b39b0a9668 100644
> --- a/arch/arm64/kernel/vdso32/vgettimeofday.c
> +++ b/arch/arm64/kernel/vdso32/vgettimeofday.c
> @@ -32,6 +32,12 @@ int __vdso_clock_getres(clockid_t clock_id,
> return __cvdso_clock_getres_time32(clock_id, res);
> }
>
> +int __vdso_clock_getres_time64(clockid_t clock_id,
> + struct __kernel_timespec *res)
> +{
> + return __cvdso_clock_getres(clock_id, res);
> +}
> +
> /* Avoid unresolved references emitted by GCC */
>
> void __aeabi_unwind_cpp_pr0(void)
Acked-by: Will Deacon <will@kernel.org>
Please merge this along with the 32-bit Arm change, as it doesn't make
sense otherwise.
Will
^ permalink raw reply
* Re: [PATCH net-next] net: uapi: Provide an UAPI definition of 'struct sockaddr'
From: Jakub Kicinski @ 2026-01-05 17:57 UTC (permalink / raw)
To: Thomas Weißschuh
Cc: Eric Dumazet, Kuniyuki Iwashima, Paolo Abeni, Willem de Bruijn,
netdev, linux-kernel, linux-api, Arnd Bergmann
In-Reply-To: <20260105-uapi-sockaddr-v1-1-b7653aba12a5@linutronix.de>
On Mon, 05 Jan 2026 09:25:55 +0100 Thomas Weißschuh wrote:
> Various UAPI headers reference 'struct sockaddr'. Currently the
> definition of this struct is pulled in from the libc header
> sys/socket.h. This is problematic as it introduces a dependency
> on a full userspace toolchain.
>
> Instead expose a custom but compatible definition of 'struct sockaddr'
> in the UAPI headers. It is guarded by the libc compatibility
> infrastructure to avoid potential conflicts.
>
> The compatibility symbol won't be supported by glibc right away,
> but right now __UAPI_DEF_IF_IFNAMSIZ is not supported either,
> so including the libc headers before the UAPI headers is broken anyways.
I did not look too closely but this seems to break build of selftests
in netdev and BPF CI (netdev on AWS Linux, not sure what base BPF uses)
^ permalink raw reply
* Re: [PATCH net-next] net: uapi: Provide an UAPI definition of 'struct sockaddr'
From: Arnd Bergmann @ 2026-01-05 13:50 UTC (permalink / raw)
To: Thomas Weißschuh, Eric Dumazet, Kuniyuki Iwashima,
Paolo Abeni, Willem de Bruijn, libc-alpha, Carlos O'Donell,
Adhemerval Zanella, Rich Felker
Cc: Netdev, linux-kernel, linux-api, klibc
In-Reply-To: <20260105-uapi-sockaddr-v1-1-b7653aba12a5@linutronix.de>
On Mon, Jan 5, 2026, at 09:25, Thomas Weißschuh wrote:
> Various UAPI headers reference 'struct sockaddr'. Currently the
> definition of this struct is pulled in from the libc header
> sys/socket.h. This is problematic as it introduces a dependency
> on a full userspace toolchain.
>
> Instead expose a custom but compatible definition of 'struct sockaddr'
> in the UAPI headers. It is guarded by the libc compatibility
> infrastructure to avoid potential conflicts.
>
> The compatibility symbol won't be supported by glibc right away,
> but right now __UAPI_DEF_IF_IFNAMSIZ is not supported either,
> so including the libc headers before the UAPI headers is broken anyways.
>
> Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
This looks like the right approach to me. I have previously
tried to introduce a 'struct __kernel_sockaddr' structure and
use that in uapi headers in place of the libc sockaddr, but
that seemed worse in the end, and introduce the same problems
as using the existing __kernel_sockaddr_storage.
The version that worked for my own testing used a nolibc specific
definition, which was enough for me to build-test the kernel headers
across all architectures, but it does not resolve the dependency.
What I'm not sure about is whether the added definition will
cause problems for users that include linux/socket.h (or one
of the headers using it) before including sys/socket.h.
I would expect that this causes build failures on some application
source code, but hopefully in a way that is easily fixable by
changing the include order.
I've added the libc-alpha list to Cc, along with a few
developers that care about this.
I also found a few older commits in which we've tried to work
this out in the past, but each time created a new (or old) problem:
57a87bb0720a ("[PATCH] scrub non-__GLIBC__ checks in linux/socket.h and linux/stat.h")
304c209c9b02 ("[NET]: Revert socket.h/stat.h ifdef hacks.")
9c501935a3cd ("net: Support inclusion of <linux/socket.h> before <sys/socket.h>")
2618be7dccf8 ("uapi: fix linux/if.h userspace compilation errors")
22bbc1dcd0d6 ("vsock/uapi: fix linux/vm_sockets.h userspace compilation errors")
06e445f740c1 ("mptcp: fix conflict with <netinet/in.h>")
c11c5906bc0a ("mptcp: add MPTCP_SUBFLOW_ADDRS getsockopt support")
Arnd
> ---
> include/linux/socket.h | 10 ----------
> include/uapi/linux/if.h | 4 ----
> include/uapi/linux/libc-compat.h | 12 ++++++++++++
> include/uapi/linux/socket.h | 14 ++++++++++++++
> 4 files changed, 26 insertions(+), 14 deletions(-)
>
> diff --git a/include/linux/socket.h b/include/linux/socket.h
> index ec715ad4bf25..8363d4e0a044 100644
> --- a/include/linux/socket.h
> +++ b/include/linux/socket.h
> @@ -28,16 +28,6 @@ extern void socket_seq_show(struct seq_file *seq);
>
> typedef __kernel_sa_family_t sa_family_t;
>
> -/*
> - * 1003.1g requires sa_family_t and that sa_data is char.
> - */
> -
> -/* Deprecated for in-kernel use. Use struct sockaddr_unsized instead. */
> -struct sockaddr {
> - sa_family_t sa_family; /* address family, AF_xxx */
> - char sa_data[14]; /* 14 bytes of protocol address */
> -};
> -
> /**
> * struct sockaddr_unsized - Unspecified size sockaddr for callbacks
> * @sa_family: Address family (AF_UNIX, AF_INET, AF_INET6, etc.)
> diff --git a/include/uapi/linux/if.h b/include/uapi/linux/if.h
> index 797ba2c1562a..a4bc54196a07 100644
> --- a/include/uapi/linux/if.h
> +++ b/include/uapi/linux/if.h
> @@ -25,10 +25,6 @@
> #include <linux/socket.h> /* for "struct sockaddr" et al */
> #include <linux/compiler.h> /* for "__user" et al */
>
> -#ifndef __KERNEL__
> -#include <sys/socket.h> /* for struct sockaddr. */
> -#endif
> -
> #if __UAPI_DEF_IF_IFNAMSIZ
> #define IFNAMSIZ 16
> #endif /* __UAPI_DEF_IF_IFNAMSIZ */
> diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h
> index 0eca95ccb41e..13a06ce4e825 100644
> --- a/include/uapi/linux/libc-compat.h
> +++ b/include/uapi/linux/libc-compat.h
> @@ -140,6 +140,13 @@
>
> #endif /* _NETINET_IN_H */
>
> +/* Definitions for socket.h */
> +#if defined(_SYS_SOCKET_H)
> +#define __UAPI_DEF_SOCKADDR 0
> +#else
> +#define __UAPI_DEF_SOCKADDR 1
> +#endif
> +
> /* Definitions for xattr.h */
> #if defined(_SYS_XATTR_H)
> #define __UAPI_DEF_XATTR 0
> @@ -221,6 +228,11 @@
> #define __UAPI_DEF_IP6_MTUINFO 1
> #endif
>
> +/* Definitions for socket.h */
> +#ifndef __UAPI_DEF_SOCKADDR
> +#define __UAPI_DEF_SOCKADDR 1
> +#endif
> +
> /* Definitions for xattr.h */
> #ifndef __UAPI_DEF_XATTR
> #define __UAPI_DEF_XATTR 1
> diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h
> index d3fcd3b5ec53..35d7d5f4b1a8 100644
> --- a/include/uapi/linux/socket.h
> +++ b/include/uapi/linux/socket.h
> @@ -2,6 +2,8 @@
> #ifndef _UAPI_LINUX_SOCKET_H
> #define _UAPI_LINUX_SOCKET_H
>
> +#include <linux/libc-compat.h> /* for compatibility with glibc */
> +
> /*
> * Desired design of maximum size and alignment (see RFC2553)
> */
> @@ -26,6 +28,18 @@ struct __kernel_sockaddr_storage {
> };
> };
>
> +/*
> + * 1003.1g requires sa_family_t and that sa_data is char.
> + */
> +
> +/* Deprecated for in-kernel use. Use struct sockaddr_unsized instead. */
> +#if __UAPI_DEF_SOCKADDR
> +struct sockaddr {
> + __kernel_sa_family_t sa_family; /* address family, AF_xxx */
> + char sa_data[14]; /* 14 bytes of protocol address */
> +};
> +#endif /* __UAPI_DEF_SOCKADDR */
> +
> #define SOCK_SNDBUF_LOCK 1
> #define SOCK_RCVBUF_LOCK 2
>
>
> ---
> base-commit: dbf8fe85a16a33d6b6bd01f2bc606fc017771465
> change-id: 20251222-uapi-sockaddr-cf10e7624729
>
> Best regards,
> --
> Thomas Weißschuh <thomas.weissschuh@linutronix.de>
^ permalink raw reply
* [PATCH net-next] net: uapi: Provide an UAPI definition of 'struct sockaddr'
From: Thomas Weißschuh @ 2026-01-05 8:25 UTC (permalink / raw)
To: Eric Dumazet, Kuniyuki Iwashima, Paolo Abeni, Willem de Bruijn
Cc: netdev, linux-kernel, linux-api, Arnd Bergmann,
Thomas Weißschuh
Various UAPI headers reference 'struct sockaddr'. Currently the
definition of this struct is pulled in from the libc header
sys/socket.h. This is problematic as it introduces a dependency
on a full userspace toolchain.
Instead expose a custom but compatible definition of 'struct sockaddr'
in the UAPI headers. It is guarded by the libc compatibility
infrastructure to avoid potential conflicts.
The compatibility symbol won't be supported by glibc right away,
but right now __UAPI_DEF_IF_IFNAMSIZ is not supported either,
so including the libc headers before the UAPI headers is broken anyways.
Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
---
include/linux/socket.h | 10 ----------
include/uapi/linux/if.h | 4 ----
include/uapi/linux/libc-compat.h | 12 ++++++++++++
include/uapi/linux/socket.h | 14 ++++++++++++++
4 files changed, 26 insertions(+), 14 deletions(-)
diff --git a/include/linux/socket.h b/include/linux/socket.h
index ec715ad4bf25..8363d4e0a044 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -28,16 +28,6 @@ extern void socket_seq_show(struct seq_file *seq);
typedef __kernel_sa_family_t sa_family_t;
-/*
- * 1003.1g requires sa_family_t and that sa_data is char.
- */
-
-/* Deprecated for in-kernel use. Use struct sockaddr_unsized instead. */
-struct sockaddr {
- sa_family_t sa_family; /* address family, AF_xxx */
- char sa_data[14]; /* 14 bytes of protocol address */
-};
-
/**
* struct sockaddr_unsized - Unspecified size sockaddr for callbacks
* @sa_family: Address family (AF_UNIX, AF_INET, AF_INET6, etc.)
diff --git a/include/uapi/linux/if.h b/include/uapi/linux/if.h
index 797ba2c1562a..a4bc54196a07 100644
--- a/include/uapi/linux/if.h
+++ b/include/uapi/linux/if.h
@@ -25,10 +25,6 @@
#include <linux/socket.h> /* for "struct sockaddr" et al */
#include <linux/compiler.h> /* for "__user" et al */
-#ifndef __KERNEL__
-#include <sys/socket.h> /* for struct sockaddr. */
-#endif
-
#if __UAPI_DEF_IF_IFNAMSIZ
#define IFNAMSIZ 16
#endif /* __UAPI_DEF_IF_IFNAMSIZ */
diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h
index 0eca95ccb41e..13a06ce4e825 100644
--- a/include/uapi/linux/libc-compat.h
+++ b/include/uapi/linux/libc-compat.h
@@ -140,6 +140,13 @@
#endif /* _NETINET_IN_H */
+/* Definitions for socket.h */
+#if defined(_SYS_SOCKET_H)
+#define __UAPI_DEF_SOCKADDR 0
+#else
+#define __UAPI_DEF_SOCKADDR 1
+#endif
+
/* Definitions for xattr.h */
#if defined(_SYS_XATTR_H)
#define __UAPI_DEF_XATTR 0
@@ -221,6 +228,11 @@
#define __UAPI_DEF_IP6_MTUINFO 1
#endif
+/* Definitions for socket.h */
+#ifndef __UAPI_DEF_SOCKADDR
+#define __UAPI_DEF_SOCKADDR 1
+#endif
+
/* Definitions for xattr.h */
#ifndef __UAPI_DEF_XATTR
#define __UAPI_DEF_XATTR 1
diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h
index d3fcd3b5ec53..35d7d5f4b1a8 100644
--- a/include/uapi/linux/socket.h
+++ b/include/uapi/linux/socket.h
@@ -2,6 +2,8 @@
#ifndef _UAPI_LINUX_SOCKET_H
#define _UAPI_LINUX_SOCKET_H
+#include <linux/libc-compat.h> /* for compatibility with glibc */
+
/*
* Desired design of maximum size and alignment (see RFC2553)
*/
@@ -26,6 +28,18 @@ struct __kernel_sockaddr_storage {
};
};
+/*
+ * 1003.1g requires sa_family_t and that sa_data is char.
+ */
+
+/* Deprecated for in-kernel use. Use struct sockaddr_unsized instead. */
+#if __UAPI_DEF_SOCKADDR
+struct sockaddr {
+ __kernel_sa_family_t sa_family; /* address family, AF_xxx */
+ char sa_data[14]; /* 14 bytes of protocol address */
+};
+#endif /* __UAPI_DEF_SOCKADDR */
+
#define SOCK_SNDBUF_LOCK 1
#define SOCK_RCVBUF_LOCK 2
---
base-commit: dbf8fe85a16a33d6b6bd01f2bc606fc017771465
change-id: 20251222-uapi-sockaddr-cf10e7624729
Best regards,
--
Thomas Weißschuh <thomas.weissschuh@linutronix.de>
^ permalink raw reply related
* Re: [PATCH v3] vdso: Remove struct getcpu_cache
From: Heiko Carstens @ 2026-01-02 12:20 UTC (permalink / raw)
To: Thomas Weißschuh
Cc: Huacai Chen, WANG Xuerui, Vasily Gorbik, Alexander Gordeev,
Christian Borntraeger, Sven Schnelle, Andy Lutomirski,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
H. Peter Anvin, Vincenzo Frascino, Shuah Khan, Arnd Bergmann,
loongarch, linux-kernel, linux-s390, linux-api, linux-kselftest
In-Reply-To: <20251230-getcpu_cache-v3-1-fb9c5f880ebe@linutronix.de>
On Tue, Dec 30, 2025 at 08:08:44AM +0100, Thomas Weißschuh wrote:
> The cache parameter of getcpu() is useless nowadays for various reasons.
> * It is never passed by userspace for either the vDSO or syscalls.
> * It is never used by the kernel.
> * It could not be made to work on the current vDSO architecture.
> * The structure definition is not part of the UAPI headers.
> * vdso_getcpu() is superseded by restartable sequences in any case.
>
> Remove the struct and its header.
>
> As a side-effect we get rid of an unwanted inclusion of the linux/
> header namespace from vDSO code.
>
> Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
> ---
> Changes in v3:
> - Rebase on v6.19-rc1
> - Fix conflict with UML vdso_getcpu() removal
> - Flesh out commit message
> - Link to v2: https://lore.kernel.org/r/20251013-getcpu_cache-v2-1-880fbfa3b7cc@linutronix.de
>
> Changes in v2:
> - Rebase on v6.18-rc1
> - Link to v1: https://lore.kernel.org/r/20250826-getcpu_cache-v1-1-8748318f6141@linutronix.de
> ---
> We could also completely remove the parameter, but I am not sure if
> that is a good idea for syscalls and vDSO entrypoints.
> ---
> arch/loongarch/vdso/vgetcpu.c | 5 ++---
> arch/s390/kernel/vdso/getcpu.c | 3 +--
> arch/s390/kernel/vdso/vdso.h | 4 +---
> arch/x86/entry/vdso/vgetcpu.c | 5 ++---
> arch/x86/include/asm/vdso/processor.h | 4 +---
> include/linux/getcpu.h | 19 -------------------
> include/linux/syscalls.h | 3 +--
> kernel/sys.c | 4 +---
> tools/testing/selftests/vDSO/vdso_test_getcpu.c | 4 +---
> 9 files changed, 10 insertions(+), 41 deletions(-)
Acked-by: Heiko Carstens <hca@linux.ibm.com> # s390
^ permalink raw reply
* Re: [PATCH v3] vdso: Remove struct getcpu_cache
From: Arnd Bergmann @ 2025-12-30 21:23 UTC (permalink / raw)
To: Thomas Weißschuh, Huacai Chen, WANG Xuerui, Heiko Carstens,
Vasily Gorbik, Alexander Gordeev, Christian Borntraeger,
Sven Schnelle, Andy Lutomirski, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
Vincenzo Frascino, shuah
Cc: loongarch, linux-kernel, linux-s390, linux-api, linux-kselftest
In-Reply-To: <20251230-getcpu_cache-v3-1-fb9c5f880ebe@linutronix.de>
On Tue, Dec 30, 2025, at 08:08, Thomas Weißschuh wrote:
> The cache parameter of getcpu() is useless nowadays for various reasons.
> * It is never passed by userspace for either the vDSO or syscalls.
> * It is never used by the kernel.
> * It could not be made to work on the current vDSO architecture.
> * The structure definition is not part of the UAPI headers.
> * vdso_getcpu() is superseded by restartable sequences in any case.
>
> Remove the struct and its header.
>
> As a side-effect we get rid of an unwanted inclusion of the linux/
> header namespace from vDSO code.
>
> Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Acked-by: Arnd Bergmann <arnd@arndb.de>
^ permalink raw reply
* [PATCH v3] vdso: Remove struct getcpu_cache
From: Thomas Weißschuh @ 2025-12-30 7:08 UTC (permalink / raw)
To: Huacai Chen, WANG Xuerui, Heiko Carstens, Vasily Gorbik,
Alexander Gordeev, Christian Borntraeger, Sven Schnelle,
Andy Lutomirski, Thomas Gleixner, Ingo Molnar, Borislav Petkov,
Dave Hansen, x86, H. Peter Anvin, Vincenzo Frascino, Shuah Khan
Cc: Arnd Bergmann, loongarch, linux-kernel, linux-s390, linux-api,
linux-kselftest, Thomas Weißschuh
The cache parameter of getcpu() is useless nowadays for various reasons.
* It is never passed by userspace for either the vDSO or syscalls.
* It is never used by the kernel.
* It could not be made to work on the current vDSO architecture.
* The structure definition is not part of the UAPI headers.
* vdso_getcpu() is superseded by restartable sequences in any case.
Remove the struct and its header.
As a side-effect we get rid of an unwanted inclusion of the linux/
header namespace from vDSO code.
Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
---
Changes in v3:
- Rebase on v6.19-rc1
- Fix conflict with UML vdso_getcpu() removal
- Flesh out commit message
- Link to v2: https://lore.kernel.org/r/20251013-getcpu_cache-v2-1-880fbfa3b7cc@linutronix.de
Changes in v2:
- Rebase on v6.18-rc1
- Link to v1: https://lore.kernel.org/r/20250826-getcpu_cache-v1-1-8748318f6141@linutronix.de
---
We could also completely remove the parameter, but I am not sure if
that is a good idea for syscalls and vDSO entrypoints.
---
arch/loongarch/vdso/vgetcpu.c | 5 ++---
arch/s390/kernel/vdso/getcpu.c | 3 +--
arch/s390/kernel/vdso/vdso.h | 4 +---
arch/x86/entry/vdso/vgetcpu.c | 5 ++---
arch/x86/include/asm/vdso/processor.h | 4 +---
include/linux/getcpu.h | 19 -------------------
include/linux/syscalls.h | 3 +--
kernel/sys.c | 4 +---
tools/testing/selftests/vDSO/vdso_test_getcpu.c | 4 +---
9 files changed, 10 insertions(+), 41 deletions(-)
diff --git a/arch/loongarch/vdso/vgetcpu.c b/arch/loongarch/vdso/vgetcpu.c
index 73af49242ecd..6f054ec898c7 100644
--- a/arch/loongarch/vdso/vgetcpu.c
+++ b/arch/loongarch/vdso/vgetcpu.c
@@ -4,7 +4,6 @@
*/
#include <asm/vdso.h>
-#include <linux/getcpu.h>
static __always_inline int read_cpu_id(void)
{
@@ -28,8 +27,8 @@ static __always_inline int read_cpu_id(void)
}
extern
-int __vdso_getcpu(unsigned int *cpu, unsigned int *node, struct getcpu_cache *unused);
-int __vdso_getcpu(unsigned int *cpu, unsigned int *node, struct getcpu_cache *unused)
+int __vdso_getcpu(unsigned int *cpu, unsigned int *node, void *unused);
+int __vdso_getcpu(unsigned int *cpu, unsigned int *node, void *unused)
{
int cpu_id;
diff --git a/arch/s390/kernel/vdso/getcpu.c b/arch/s390/kernel/vdso/getcpu.c
index 5c5d4a848b76..1e17665616c5 100644
--- a/arch/s390/kernel/vdso/getcpu.c
+++ b/arch/s390/kernel/vdso/getcpu.c
@@ -2,11 +2,10 @@
/* Copyright IBM Corp. 2020 */
#include <linux/compiler.h>
-#include <linux/getcpu.h>
#include <asm/timex.h>
#include "vdso.h"
-int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
+int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, void *unused)
{
union tod_clock clk;
diff --git a/arch/s390/kernel/vdso/vdso.h b/arch/s390/kernel/vdso/vdso.h
index 8cff033dd854..1fe52a6f5a56 100644
--- a/arch/s390/kernel/vdso/vdso.h
+++ b/arch/s390/kernel/vdso/vdso.h
@@ -4,9 +4,7 @@
#include <vdso/datapage.h>
-struct getcpu_cache;
-
-int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused);
+int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, void *unused);
int __s390_vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz);
int __s390_vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts);
int __s390_vdso_clock_getres(clockid_t clock, struct __kernel_timespec *ts);
diff --git a/arch/x86/entry/vdso/vgetcpu.c b/arch/x86/entry/vdso/vgetcpu.c
index e4640306b2e3..6381b472b7c5 100644
--- a/arch/x86/entry/vdso/vgetcpu.c
+++ b/arch/x86/entry/vdso/vgetcpu.c
@@ -6,17 +6,16 @@
*/
#include <linux/kernel.h>
-#include <linux/getcpu.h>
#include <asm/segment.h>
#include <vdso/processor.h>
notrace long
-__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
+__vdso_getcpu(unsigned *cpu, unsigned *node, void *unused)
{
vdso_read_cpunode(cpu, node);
return 0;
}
-long getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
+long getcpu(unsigned *cpu, unsigned *node, void *tcache)
__attribute__((weak, alias("__vdso_getcpu")));
diff --git a/arch/x86/include/asm/vdso/processor.h b/arch/x86/include/asm/vdso/processor.h
index 7000aeb59aa2..93e0e24e5cb4 100644
--- a/arch/x86/include/asm/vdso/processor.h
+++ b/arch/x86/include/asm/vdso/processor.h
@@ -18,9 +18,7 @@ static __always_inline void cpu_relax(void)
native_pause();
}
-struct getcpu_cache;
-
-notrace long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused);
+notrace long __vdso_getcpu(unsigned *cpu, unsigned *node, void *unused);
#endif /* __ASSEMBLER__ */
diff --git a/include/linux/getcpu.h b/include/linux/getcpu.h
deleted file mode 100644
index c304dcdb4eac..000000000000
--- a/include/linux/getcpu.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_GETCPU_H
-#define _LINUX_GETCPU_H 1
-
-/* Cache for getcpu() to speed it up. Results might be a short time
- out of date, but will be faster.
-
- User programs should not refer to the contents of this structure.
- I repeat they should not refer to it. If they do they will break
- in future kernels.
-
- It is only a private cache for vgetcpu(). It will change in future kernels.
- The user program must store this information per thread (__thread)
- If you want 100% accurate information pass NULL instead. */
-struct getcpu_cache {
- unsigned long blob[128 / sizeof(long)];
-};
-
-#endif
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index cf84d98964b2..23704e006afd 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -59,7 +59,6 @@ struct compat_stat;
struct old_timeval32;
struct robust_list_head;
struct futex_waitv;
-struct getcpu_cache;
struct old_linux_dirent;
struct perf_event_attr;
struct file_handle;
@@ -718,7 +717,7 @@ asmlinkage long sys_getrusage(int who, struct rusage __user *ru);
asmlinkage long sys_umask(int mask);
asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
unsigned long arg4, unsigned long arg5);
-asmlinkage long sys_getcpu(unsigned __user *cpu, unsigned __user *node, struct getcpu_cache __user *cache);
+asmlinkage long sys_getcpu(unsigned __user *cpu, unsigned __user *node, void __user *cache);
asmlinkage long sys_gettimeofday(struct __kernel_old_timeval __user *tv,
struct timezone __user *tz);
asmlinkage long sys_settimeofday(struct __kernel_old_timeval __user *tv,
diff --git a/kernel/sys.c b/kernel/sys.c
index 8b58eece4e58..f1780ab132a3 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -31,7 +31,6 @@
#include <linux/tty.h>
#include <linux/signal.h>
#include <linux/cn_proc.h>
-#include <linux/getcpu.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/seccomp.h>
#include <linux/cpu.h>
@@ -2876,8 +2875,7 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
return error;
}
-SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
- struct getcpu_cache __user *, unused)
+SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, void __user *, unused)
{
int err = 0;
int cpu = raw_smp_processor_id();
diff --git a/tools/testing/selftests/vDSO/vdso_test_getcpu.c b/tools/testing/selftests/vDSO/vdso_test_getcpu.c
index bea8ad54da11..3fe49cbdae98 100644
--- a/tools/testing/selftests/vDSO/vdso_test_getcpu.c
+++ b/tools/testing/selftests/vDSO/vdso_test_getcpu.c
@@ -16,9 +16,7 @@
#include "vdso_config.h"
#include "vdso_call.h"
-struct getcpu_cache;
-typedef long (*getcpu_t)(unsigned int *, unsigned int *,
- struct getcpu_cache *);
+typedef long (*getcpu_t)(unsigned int *, unsigned int *, void *);
int main(int argc, char **argv)
{
---
base-commit: 8f0b4cce4481fb22653697cced8d0d04027cb1e8
change-id: 20250825-getcpu_cache-3abcd2e65437
Best regards,
--
Thomas Weißschuh <thomas.weissschuh@linutronix.de>
^ permalink raw reply related
* [PATCH 9/9] MIPS: vdso: Provide getres_time64() for 32-bit ABIs
From: Thomas Weißschuh @ 2025-12-23 6:59 UTC (permalink / raw)
To: Andy Lutomirski, Thomas Gleixner, Vincenzo Frascino, Shuah Khan,
Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
Russell King, Catalin Marinas, Will Deacon, Thomas Bogendoerfer
Cc: linux-kernel, linux-kselftest, Russell King, linux-arm-kernel,
linux-mips, Arnd Bergmann, linux-api, Thomas Weißschuh
In-Reply-To: <20251223-vdso-compat-time32-v1-0-97ea7a06a543@linutronix.de>
For consistency with __vdso_clock_gettime64() there should also be a
64-bit variant of clock_getres(). This will allow the extension of
CONFIG_COMPAT_32BIT_TIME to the vDSO and finally the removal of 32-bit
time types from the kernel and UAPI.
Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
---
arch/mips/vdso/vdso.lds.S | 1 +
arch/mips/vdso/vgettimeofday.c | 6 ++++++
2 files changed, 7 insertions(+)
diff --git a/arch/mips/vdso/vdso.lds.S b/arch/mips/vdso/vdso.lds.S
index c8bbe56d89cb..5d08be3a6b85 100644
--- a/arch/mips/vdso/vdso.lds.S
+++ b/arch/mips/vdso/vdso.lds.S
@@ -103,6 +103,7 @@ VERSION
__vdso_clock_getres;
#if _MIPS_SIM != _MIPS_SIM_ABI64
__vdso_clock_gettime64;
+ __vdso_clock_getres_time64;
#endif
#endif
local: *;
diff --git a/arch/mips/vdso/vgettimeofday.c b/arch/mips/vdso/vgettimeofday.c
index 604afea3f336..59627f2f51b7 100644
--- a/arch/mips/vdso/vgettimeofday.c
+++ b/arch/mips/vdso/vgettimeofday.c
@@ -46,6 +46,12 @@ int __vdso_clock_gettime64(clockid_t clock,
return __cvdso_clock_gettime(clock, ts);
}
+int __vdso_clock_getres_time64(clockid_t clock,
+ struct __kernel_timespec *ts)
+{
+ return __cvdso_clock_getres(clock, ts);
+}
+
#else
int __vdso_clock_gettime(clockid_t clock,
--
2.52.0
^ permalink raw reply related
* [PATCH 8/9] arm64: vdso32: Provide clock_getres_time64()
From: Thomas Weißschuh @ 2025-12-23 6:59 UTC (permalink / raw)
To: Andy Lutomirski, Thomas Gleixner, Vincenzo Frascino, Shuah Khan,
Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
Russell King, Catalin Marinas, Will Deacon, Thomas Bogendoerfer
Cc: linux-kernel, linux-kselftest, Russell King, linux-arm-kernel,
linux-mips, Arnd Bergmann, linux-api, Thomas Weißschuh
In-Reply-To: <20251223-vdso-compat-time32-v1-0-97ea7a06a543@linutronix.de>
For consistency with __vdso_clock_gettime64() there should also be a
64-bit variant of clock_getres(). This will allow the extension of
CONFIG_COMPAT_32BIT_TIME to the vDSO and finally the removal of 32-bit
time types from the kernel and UAPI.
Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
---
arch/arm64/kernel/vdso32/vdso.lds.S | 1 +
arch/arm64/kernel/vdso32/vgettimeofday.c | 6 ++++++
2 files changed, 7 insertions(+)
diff --git a/arch/arm64/kernel/vdso32/vdso.lds.S b/arch/arm64/kernel/vdso32/vdso.lds.S
index e02b27487ce8..c374fb0146f3 100644
--- a/arch/arm64/kernel/vdso32/vdso.lds.S
+++ b/arch/arm64/kernel/vdso32/vdso.lds.S
@@ -86,6 +86,7 @@ VERSION
__vdso_gettimeofday;
__vdso_clock_getres;
__vdso_clock_gettime64;
+ __vdso_clock_getres_time64;
local: *;
};
}
diff --git a/arch/arm64/kernel/vdso32/vgettimeofday.c b/arch/arm64/kernel/vdso32/vgettimeofday.c
index 29b4d8f61e39..d7b39b0a9668 100644
--- a/arch/arm64/kernel/vdso32/vgettimeofday.c
+++ b/arch/arm64/kernel/vdso32/vgettimeofday.c
@@ -32,6 +32,12 @@ int __vdso_clock_getres(clockid_t clock_id,
return __cvdso_clock_getres_time32(clock_id, res);
}
+int __vdso_clock_getres_time64(clockid_t clock_id,
+ struct __kernel_timespec *res)
+{
+ return __cvdso_clock_getres(clock_id, res);
+}
+
/* Avoid unresolved references emitted by GCC */
void __aeabi_unwind_cpp_pr0(void)
--
2.52.0
^ permalink raw reply related
* [PATCH 7/9] ARM: VDSO: provide clock_getres_time64()
From: Thomas Weißschuh @ 2025-12-23 6:59 UTC (permalink / raw)
To: Andy Lutomirski, Thomas Gleixner, Vincenzo Frascino, Shuah Khan,
Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
Russell King, Catalin Marinas, Will Deacon, Thomas Bogendoerfer
Cc: linux-kernel, linux-kselftest, Russell King, linux-arm-kernel,
linux-mips, Arnd Bergmann, linux-api, Thomas Weißschuh
In-Reply-To: <20251223-vdso-compat-time32-v1-0-97ea7a06a543@linutronix.de>
For consistency with __vdso_clock_gettime64() there should also be a
64-bit variant of clock_getres(). This will allow the extension of
CONFIG_COMPAT_32BIT_TIME to the vDSO and finally the removal of 32-bit
time types from the kernel and UAPI.
Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
---
arch/arm/kernel/vdso.c | 1 +
arch/arm/vdso/vdso.lds.S | 1 +
arch/arm/vdso/vgettimeofday.c | 6 ++++++
3 files changed, 8 insertions(+)
diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c
index 566c40f0f7c7..0108f33d6bed 100644
--- a/arch/arm/kernel/vdso.c
+++ b/arch/arm/kernel/vdso.c
@@ -162,6 +162,7 @@ static void __init patch_vdso(void *ehdr)
vdso_nullpatch_one(&einfo, "__vdso_clock_gettime");
vdso_nullpatch_one(&einfo, "__vdso_clock_gettime64");
vdso_nullpatch_one(&einfo, "__vdso_clock_getres");
+ vdso_nullpatch_one(&einfo, "__vdso_clock_getres_time64");
}
}
diff --git a/arch/arm/vdso/vdso.lds.S b/arch/arm/vdso/vdso.lds.S
index 7c08371f4400..74d8d8bc8a40 100644
--- a/arch/arm/vdso/vdso.lds.S
+++ b/arch/arm/vdso/vdso.lds.S
@@ -74,6 +74,7 @@ VERSION
__vdso_gettimeofday;
__vdso_clock_getres;
__vdso_clock_gettime64;
+ __vdso_clock_getres_time64;
local: *;
};
}
diff --git a/arch/arm/vdso/vgettimeofday.c b/arch/arm/vdso/vgettimeofday.c
index 3554aa35f1ba..2874dde7e6cf 100644
--- a/arch/arm/vdso/vgettimeofday.c
+++ b/arch/arm/vdso/vgettimeofday.c
@@ -34,6 +34,12 @@ int __vdso_clock_getres(clockid_t clock_id,
return __cvdso_clock_getres_time32(clock_id, res);
}
+int __vdso_clock_getres_time64(clockid_t clock_id,
+ struct __kernel_timespec *res)
+{
+ return __cvdso_clock_getres(clock_id, res);
+}
+
/* Avoid unresolved references emitted by GCC */
void __aeabi_unwind_cpp_pr0(void)
--
2.52.0
^ permalink raw reply related
* [PATCH 6/9] ARM: VDSO: also patch out __vdso_clock_getres() if unavailable
From: Thomas Weißschuh @ 2025-12-23 6:59 UTC (permalink / raw)
To: Andy Lutomirski, Thomas Gleixner, Vincenzo Frascino, Shuah Khan,
Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
Russell King, Catalin Marinas, Will Deacon, Thomas Bogendoerfer
Cc: linux-kernel, linux-kselftest, Russell King, linux-arm-kernel,
linux-mips, Arnd Bergmann, linux-api, Thomas Weißschuh
In-Reply-To: <20251223-vdso-compat-time32-v1-0-97ea7a06a543@linutronix.de>
The vDSO code hides symbols which are non-functional.
__vdso_clock_getres() was not added to this list when it got introduced.
Fixes: 052e76a31b4a ("ARM: 8931/1: Add clock_getres entry point")
Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
---
arch/arm/kernel/vdso.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c
index e38a30477f3d..566c40f0f7c7 100644
--- a/arch/arm/kernel/vdso.c
+++ b/arch/arm/kernel/vdso.c
@@ -161,6 +161,7 @@ static void __init patch_vdso(void *ehdr)
vdso_nullpatch_one(&einfo, "__vdso_gettimeofday");
vdso_nullpatch_one(&einfo, "__vdso_clock_gettime");
vdso_nullpatch_one(&einfo, "__vdso_clock_gettime64");
+ vdso_nullpatch_one(&einfo, "__vdso_clock_getres");
}
}
--
2.52.0
^ permalink raw reply related
* [PATCH 4/9] selftests: vDSO: vdso_test_abi: Add test for clock_getres_time64()
From: Thomas Weißschuh @ 2025-12-23 6:59 UTC (permalink / raw)
To: Andy Lutomirski, Thomas Gleixner, Vincenzo Frascino, Shuah Khan,
Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
Russell King, Catalin Marinas, Will Deacon, Thomas Bogendoerfer
Cc: linux-kernel, linux-kselftest, Russell King, linux-arm-kernel,
linux-mips, Arnd Bergmann, linux-api, Thomas Weißschuh
In-Reply-To: <20251223-vdso-compat-time32-v1-0-97ea7a06a543@linutronix.de>
Some architectures will start to implement this function.
Make sure it works correctly.
Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
---
tools/testing/selftests/vDSO/vdso_test_abi.c | 53 +++++++++++++++++++++++++++-
1 file changed, 52 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/vDSO/vdso_test_abi.c b/tools/testing/selftests/vDSO/vdso_test_abi.c
index a75c12dcb0f1..b162a4ba9c4f 100644
--- a/tools/testing/selftests/vDSO/vdso_test_abi.c
+++ b/tools/testing/selftests/vDSO/vdso_test_abi.c
@@ -36,6 +36,7 @@ typedef long (*vdso_gettimeofday_t)(struct timeval *tv, struct timezone *tz);
typedef long (*vdso_clock_gettime_t)(clockid_t clk_id, struct timespec *ts);
typedef long (*vdso_clock_gettime64_t)(clockid_t clk_id, struct vdso_timespec64 *ts);
typedef long (*vdso_clock_getres_t)(clockid_t clk_id, struct timespec *ts);
+typedef long (*vdso_clock_getres_time64_t)(clockid_t clk_id, struct vdso_timespec64 *ts);
typedef time_t (*vdso_time_t)(time_t *t);
static const char * const vdso_clock_name[] = {
@@ -196,6 +197,55 @@ static void vdso_test_clock_getres(clockid_t clk_id)
}
}
+#ifdef __NR_clock_getres_time64
+static void vdso_test_clock_getres_time64(clockid_t clk_id)
+{
+ int clock_getres_fail = 0;
+
+ /* Find clock_getres. */
+ vdso_clock_getres_time64_t vdso_clock_getres_time64 =
+ (vdso_clock_getres_time64_t)vdso_sym(version, name[7]);
+
+ if (!vdso_clock_getres_time64) {
+ ksft_print_msg("Couldn't find %s\n", name[7]);
+ ksft_test_result_skip("%s %s\n", name[7],
+ vdso_clock_name[clk_id]);
+ return;
+ }
+
+ struct vdso_timespec64 ts, sys_ts;
+ long ret = VDSO_CALL(vdso_clock_getres_time64, 2, clk_id, &ts);
+
+ if (ret == 0) {
+ ksft_print_msg("The vdso resolution is %lld %lld\n",
+ (long long)ts.tv_sec, (long long)ts.tv_nsec);
+ } else {
+ clock_getres_fail++;
+ }
+
+ ret = syscall(__NR_clock_getres_time64, clk_id, &sys_ts);
+
+ ksft_print_msg("The syscall resolution is %lld %lld\n",
+ (long long)sys_ts.tv_sec, (long long)sys_ts.tv_nsec);
+
+ if ((sys_ts.tv_sec != ts.tv_sec) || (sys_ts.tv_nsec != ts.tv_nsec))
+ clock_getres_fail++;
+
+ if (clock_getres_fail > 0) {
+ ksft_test_result_fail("%s %s\n", name[7],
+ vdso_clock_name[clk_id]);
+ } else {
+ ksft_test_result_pass("%s %s\n", name[7],
+ vdso_clock_name[clk_id]);
+ }
+}
+#else /* !__NR_clock_getres_time64 */
+static void vdso_test_clock_getres_time64(clockid_t clk_id)
+{
+ ksft_test_result_skip("%s %s\n", name[7], vdso_clock_name[clk_id]);
+}
+#endif /* __NR_clock_getres_time64 */
+
/*
* This function calls vdso_test_clock_gettime and vdso_test_clock_getres
* with different values for clock_id.
@@ -208,9 +258,10 @@ static inline void vdso_test_clock(clockid_t clock_id)
vdso_test_clock_gettime64(clock_id);
vdso_test_clock_getres(clock_id);
+ vdso_test_clock_getres_time64(clock_id);
}
-#define VDSO_TEST_PLAN 29
+#define VDSO_TEST_PLAN 38
int main(int argc, char **argv)
{
--
2.52.0
^ permalink raw reply related
* [PATCH 5/9] x86/vdso: Provide clock_getres_time64() for x86-32
From: Thomas Weißschuh @ 2025-12-23 6:59 UTC (permalink / raw)
To: Andy Lutomirski, Thomas Gleixner, Vincenzo Frascino, Shuah Khan,
Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
Russell King, Catalin Marinas, Will Deacon, Thomas Bogendoerfer
Cc: linux-kernel, linux-kselftest, Russell King, linux-arm-kernel,
linux-mips, Arnd Bergmann, linux-api, Thomas Weißschuh
In-Reply-To: <20251223-vdso-compat-time32-v1-0-97ea7a06a543@linutronix.de>
For consistency with __vdso_clock_gettime64() there should also be a
64-bit variant of clock_getres(). This will allow the extension of
CONFIG_COMPAT_32BIT_TIME to the vDSO and finally the removal of 32-bit
time types from the kernel and UAPI.
Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
---
arch/x86/entry/vdso/vclock_gettime.c | 8 ++++++++
arch/x86/entry/vdso/vdso32/vdso32.lds.S | 1 +
2 files changed, 9 insertions(+)
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 0debc194bd78..027b7e88d753 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -74,4 +74,12 @@ int __vdso_clock_getres(clockid_t clock, struct old_timespec32 *res)
int clock_getres(clockid_t, struct old_timespec32 *)
__attribute__((weak, alias("__vdso_clock_getres")));
+
+int __vdso_clock_getres_time64(clockid_t clock, struct __kernel_timespec *ts)
+{
+ return __cvdso_clock_getres(clock, ts);
+}
+
+int clock_getres_time64(clockid_t, struct __kernel_timespec *)
+ __attribute__((weak, alias("__vdso_clock_getres_time64")));
#endif
diff --git a/arch/x86/entry/vdso/vdso32/vdso32.lds.S b/arch/x86/entry/vdso/vdso32/vdso32.lds.S
index 8a3be07006bb..6f977c103584 100644
--- a/arch/x86/entry/vdso/vdso32/vdso32.lds.S
+++ b/arch/x86/entry/vdso/vdso32/vdso32.lds.S
@@ -28,6 +28,7 @@ VERSION
__vdso_time;
__vdso_clock_getres;
__vdso_clock_gettime64;
+ __vdso_clock_getres_time64;
__vdso_getcpu;
};
--
2.52.0
^ permalink raw reply related
* [PATCH 3/9] selftests: vDSO: vdso_test_abi: Use UAPI system call numbers
From: Thomas Weißschuh @ 2025-12-23 6:59 UTC (permalink / raw)
To: Andy Lutomirski, Thomas Gleixner, Vincenzo Frascino, Shuah Khan,
Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
Russell King, Catalin Marinas, Will Deacon, Thomas Bogendoerfer
Cc: linux-kernel, linux-kselftest, Russell King, linux-arm-kernel,
linux-mips, Arnd Bergmann, linux-api, Thomas Weißschuh
In-Reply-To: <20251223-vdso-compat-time32-v1-0-97ea7a06a543@linutronix.de>
SYS_clock_getres might have been redirected by libc to some other system
call than the actual clock_getres. In the test we want to make sure to
use exactly this system call.
Use the system call number exported by the UAPI headers which is always
correct.
Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
---
tools/testing/selftests/vDSO/vdso_test_abi.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/testing/selftests/vDSO/vdso_test_abi.c b/tools/testing/selftests/vDSO/vdso_test_abi.c
index c620317eaeea..a75c12dcb0f1 100644
--- a/tools/testing/selftests/vDSO/vdso_test_abi.c
+++ b/tools/testing/selftests/vDSO/vdso_test_abi.c
@@ -179,7 +179,7 @@ static void vdso_test_clock_getres(clockid_t clk_id)
clock_getres_fail++;
}
- ret = syscall(SYS_clock_getres, clk_id, &sys_ts);
+ ret = syscall(__NR_clock_getres, clk_id, &sys_ts);
ksft_print_msg("The syscall resolution is %lld %lld\n",
(long long)sys_ts.tv_sec, (long long)sys_ts.tv_nsec);
--
2.52.0
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox