Linux Trace Kernel
 help / color / mirror / Atom feed
* [PATCHv3 bpf-next 02/24] bpf: Use mutex lock pool for bpf trampolines
From: Jiri Olsa @ 2026-03-16  7:51 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
  Cc: bpf, linux-trace-kernel, Martin KaFai Lau, Eduard Zingerman,
	Song Liu, Yonghong Song, Menglong Dong, Steven Rostedt
In-Reply-To: <20260316075138.465430-1-jolsa@kernel.org>

Adding mutex lock pool that replaces bpf trampolines mutex.

For tracing_multi link coming in following changes we need to lock all
the involved trampolines during the attachment. This could mean thousands
of mutex locks, which is not convenient.

As suggested by Andrii we can replace bpf trampolines mutex with mutex
pool, where each trampoline is hash-ed to one of the locks from the pool.

It's better to lock all the pool mutexes (32 at the moment) than
thousands of them.

There is 48 (MAX_LOCK_DEPTH) lock limit allowed to be simultaneously
held by task, so we need to keep 32 mutexes (5 bits) in the pool, so
when we lock them all in following changes the lockdep won't scream.

Removing the mutex_is_locked in bpf_trampoline_put, because we removed
the mutex from bpf_trampoline.

Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 include/linux/bpf.h     |  2 --
 kernel/bpf/trampoline.c | 76 ++++++++++++++++++++++++++++-------------
 2 files changed, 52 insertions(+), 26 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 05b34a6355b0..1d900f49aff5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1335,8 +1335,6 @@ struct bpf_trampoline {
 	/* hlist for trampoline_ip_table */
 	struct hlist_node hlist_ip;
 	struct ftrace_ops *fops;
-	/* serializes access to fields of this trampoline */
-	struct mutex mutex;
 	refcount_t refcnt;
 	u32 flags;
 	u64 key;
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index f02254a21585..9923703a1544 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -30,6 +30,34 @@ static struct hlist_head trampoline_ip_table[TRAMPOLINE_TABLE_SIZE];
 /* serializes access to trampoline tables */
 static DEFINE_MUTEX(trampoline_mutex);
 
+/*
+ * We keep 32 trampoline locks (5 bits) in the pool, because there
+ * is 48 (MAX_LOCK_DEPTH) locks limit allowed to be simultaneously
+ * held by task.
+ */
+#define TRAMPOLINE_LOCKS_BITS 5
+#define TRAMPOLINE_LOCKS_TABLE_SIZE (1 << TRAMPOLINE_LOCKS_BITS)
+
+static struct {
+	struct mutex mutex;
+	struct lock_class_key key;
+} trampoline_locks[TRAMPOLINE_LOCKS_TABLE_SIZE];
+
+static struct mutex *select_trampoline_lock(struct bpf_trampoline *tr)
+{
+	return &trampoline_locks[hash_64((u64)(uintptr_t) tr, TRAMPOLINE_LOCKS_BITS)].mutex;
+}
+
+static void trampoline_lock(struct bpf_trampoline *tr)
+{
+	mutex_lock(select_trampoline_lock(tr));
+}
+
+static void trampoline_unlock(struct bpf_trampoline *tr)
+{
+	mutex_unlock(select_trampoline_lock(tr));
+}
+
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
 static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex);
 
@@ -69,9 +97,9 @@ static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, unsigned long ip,
 
 	if (cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF) {
 		/* This is called inside register_ftrace_direct_multi(), so
-		 * tr->mutex is already locked.
+		 * trampoline's mutex is already locked.
 		 */
-		lockdep_assert_held_once(&tr->mutex);
+		lockdep_assert_held_once(select_trampoline_lock(tr));
 
 		/* Instead of updating the trampoline here, we propagate
 		 * -EAGAIN to register_ftrace_direct(). Then we can
@@ -91,7 +119,7 @@ static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, unsigned long ip,
 	}
 
 	/* The normal locking order is
-	 *    tr->mutex => direct_mutex (ftrace.c) => ftrace_lock (ftrace.c)
+	 *    select_trampoline_lock(tr) => direct_mutex (ftrace.c) => ftrace_lock (ftrace.c)
 	 *
 	 * The following two commands are called from
 	 *
@@ -99,12 +127,12 @@ static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, unsigned long ip,
 	 *   cleanup_direct_functions_after_ipmodify
 	 *
 	 * In both cases, direct_mutex is already locked. Use
-	 * mutex_trylock(&tr->mutex) to avoid deadlock in race condition
-	 * (something else is making changes to this same trampoline).
+	 * mutex_trylock(select_trampoline_lock(tr)) to avoid deadlock in race condition
+	 * (something else holds the same pool lock).
 	 */
-	if (!mutex_trylock(&tr->mutex)) {
-		/* sleep 1 ms to make sure whatever holding tr->mutex makes
-		 * some progress.
+	if (!mutex_trylock(select_trampoline_lock(tr))) {
+		/* sleep 1 ms to make sure whatever holding select_trampoline_lock(tr)
+		 * makes some progress.
 		 */
 		msleep(1);
 		return -EAGAIN;
@@ -129,7 +157,7 @@ static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, unsigned long ip,
 		break;
 	}
 
-	mutex_unlock(&tr->mutex);
+	trampoline_unlock(tr);
 	return ret;
 }
 #endif
@@ -359,7 +387,6 @@ static struct bpf_trampoline *bpf_trampoline_lookup(u64 key, unsigned long ip)
 	head = &trampoline_ip_table[hash_64(tr->ip, TRAMPOLINE_HASH_BITS)];
 	hlist_add_head(&tr->hlist_ip, head);
 	refcount_set(&tr->refcnt, 1);
-	mutex_init(&tr->mutex);
 	for (i = 0; i < BPF_TRAMP_MAX; i++)
 		INIT_HLIST_HEAD(&tr->progs_hlist[i]);
 out:
@@ -844,9 +871,9 @@ int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
 {
 	int err;
 
-	mutex_lock(&tr->mutex);
+	trampoline_lock(tr);
 	err = __bpf_trampoline_link_prog(link, tr, tgt_prog);
-	mutex_unlock(&tr->mutex);
+	trampoline_unlock(tr);
 	return err;
 }
 
@@ -887,9 +914,9 @@ int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
 {
 	int err;
 
-	mutex_lock(&tr->mutex);
+	trampoline_lock(tr);
 	err = __bpf_trampoline_unlink_prog(link, tr, tgt_prog);
-	mutex_unlock(&tr->mutex);
+	trampoline_unlock(tr);
 	return err;
 }
 
@@ -999,12 +1026,12 @@ int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
 	if (!tr)
 		return  -ENOMEM;
 
-	mutex_lock(&tr->mutex);
+	trampoline_lock(tr);
 
 	shim_link = cgroup_shim_find(tr, bpf_func);
 	if (shim_link && !IS_ERR(bpf_link_inc_not_zero(&shim_link->link.link))) {
 		/* Reusing existing shim attached by the other program. */
-		mutex_unlock(&tr->mutex);
+		trampoline_unlock(tr);
 		bpf_trampoline_put(tr); /* bpf_trampoline_get above */
 		return 0;
 	}
@@ -1024,16 +1051,16 @@ int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
 	shim_link->trampoline = tr;
 	/* note, we're still holding tr refcnt from above */
 
-	mutex_unlock(&tr->mutex);
+	trampoline_unlock(tr);
 
 	return 0;
 err:
-	mutex_unlock(&tr->mutex);
+	trampoline_unlock(tr);
 
 	if (shim_link)
 		bpf_link_put(&shim_link->link.link);
 
-	/* have to release tr while _not_ holding its mutex */
+	/* have to release tr while _not_ holding pool mutex for trampoline */
 	bpf_trampoline_put(tr); /* bpf_trampoline_get above */
 
 	return err;
@@ -1054,9 +1081,9 @@ void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog)
 	if (WARN_ON_ONCE(!tr))
 		return;
 
-	mutex_lock(&tr->mutex);
+	trampoline_lock(tr);
 	shim_link = cgroup_shim_find(tr, bpf_func);
-	mutex_unlock(&tr->mutex);
+	trampoline_unlock(tr);
 
 	if (shim_link)
 		bpf_link_put(&shim_link->link.link);
@@ -1074,14 +1101,14 @@ struct bpf_trampoline *bpf_trampoline_get(u64 key,
 	if (!tr)
 		return NULL;
 
-	mutex_lock(&tr->mutex);
+	trampoline_lock(tr);
 	if (tr->func.addr)
 		goto out;
 
 	memcpy(&tr->func.model, &tgt_info->fmodel, sizeof(tgt_info->fmodel));
 	tr->func.addr = (void *)tgt_info->tgt_addr;
 out:
-	mutex_unlock(&tr->mutex);
+	trampoline_unlock(tr);
 	return tr;
 }
 
@@ -1094,7 +1121,6 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
 	mutex_lock(&trampoline_mutex);
 	if (!refcount_dec_and_test(&tr->refcnt))
 		goto out;
-	WARN_ON_ONCE(mutex_is_locked(&tr->mutex));
 
 	for (i = 0; i < BPF_TRAMP_MAX; i++)
 		if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[i])))
@@ -1380,6 +1406,8 @@ static int __init init_trampolines(void)
 		INIT_HLIST_HEAD(&trampoline_key_table[i]);
 	for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++)
 		INIT_HLIST_HEAD(&trampoline_ip_table[i]);
+	for (i = 0; i < TRAMPOLINE_LOCKS_TABLE_SIZE; i++)
+		__mutex_init(&trampoline_locks[i].mutex, "trampoline_lock", &trampoline_locks[i].key);
 	return 0;
 }
 late_initcall(init_trampolines);
-- 
2.53.0


^ permalink raw reply related

* [PATCHv3 bpf-next 01/24] ftrace: Add ftrace_hash_count function
From: Jiri Olsa @ 2026-03-16  7:51 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
  Cc: bpf, linux-trace-kernel, Martin KaFai Lau, Eduard Zingerman,
	Song Liu, Yonghong Song, Menglong Dong, Steven Rostedt
In-Reply-To: <20260316075138.465430-1-jolsa@kernel.org>

Adding external ftrace_hash_count function so we could get hash
count outside of ftrace object.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 include/linux/ftrace.h | 1 +
 kernel/trace/ftrace.c  | 7 ++++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index c242fe49af4c..401f8dfd05d3 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -415,6 +415,7 @@ struct ftrace_hash *alloc_ftrace_hash(int size_bits);
 void free_ftrace_hash(struct ftrace_hash *hash);
 struct ftrace_func_entry *add_ftrace_hash_entry_direct(struct ftrace_hash *hash,
 						       unsigned long ip, unsigned long direct);
+unsigned long ftrace_hash_count(struct ftrace_hash *hash);
 
 /* The hash used to know what functions callbacks trace */
 struct ftrace_ops_hash {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 71dcbfeac86c..2240c38e7216 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -6288,11 +6288,16 @@ int modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
 }
 EXPORT_SYMBOL_GPL(modify_ftrace_direct);
 
-static unsigned long hash_count(struct ftrace_hash *hash)
+static inline unsigned long hash_count(struct ftrace_hash *hash)
 {
 	return hash ? hash->count : 0;
 }
 
+unsigned long ftrace_hash_count(struct ftrace_hash *hash)
+{
+	return hash_count(hash);
+}
+
 /**
  * hash_add - adds two struct ftrace_hash and returns the result
  * @a: struct ftrace_hash object
-- 
2.53.0


^ permalink raw reply related

* [PATCHv3 bpf-next 00/24] bpf: tracing_multi link
From: Jiri Olsa @ 2026-03-16  7:51 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
  Cc: Hengqi Chen, bpf, linux-trace-kernel, Martin KaFai Lau,
	Eduard Zingerman, Song Liu, Yonghong Song, Menglong Dong,
	Steven Rostedt

hi,
adding tracing_multi link support that allows fast attachment
of tracing program to many functions.

RFC: https://lore.kernel.org/bpf/20260203093819.2105105-1-jolsa@kernel.org/
v1: https://lore.kernel.org/bpf/20260220100649.628307-1-jolsa@kernel.org/
v2: https://lore.kernel.org/bpf/20260304222141.497203-1-jolsa@kernel.org/

v3 changes:
- fix module parsing [Leon Hwang]
- use function traceable check from libbpf [Leon Hwang]
- use ptr_to_u64 and fix/updated few comments [ci]
- display cookies as decimal numbers [ci]
- added link_create.flags check [ci]
- fix error path in bpf_trampoline_multi_detach [ci]
- make fentry/fexit.multi not extendable [ci]
- add missing OPTS_VALID to bpf_program__attach_tracing_multi [ci]

v2 changes:
- allocate data.unreg in bpf_trampoline_multi_attach for rollback path [ci]
  and fixed link count setup in rollback path [ci]
- several small assorted fixes [ci]
- added loongarch and powerpc changes for struct bpf_tramp_node change
- added support to attach functions from modules
- added tests for sleepable programs
- added rollback tests

v1 changes:
- added ftrace_hash_count as wrapper for hash_count [Steven]
- added trampoline mutex pool [Andrii]
- reworked 'struct bpf_tramp_node' separatoin [Andrii]
  - the 'struct bpf_tramp_node' now holds pointer to bpf_link,
    which is similar to what we do for uprobe_multi;
    I understand it's not a fundamental change compared to previous
    version which used bpf_prog pointer instead, but I don't see better
    way of doing this.. I'm happy to discuss this further if there's
    better idea
- reworked 'struct bpf_fsession_link' based on bpf_tramp_node
- made btf__find_by_glob_kind function internal helper [Andrii]
- many small assorted fixes [Andrii,CI]
- added session support [Leon Hwang]
- added cookies support
- added more tests


Note I plan to send linkinfo support separately, the patchset is big enough.

thanks,
jirka


Cc: Hengqi Chen <hengqi.chen@gmail.com>
---
Jiri Olsa (24):
      ftrace: Add ftrace_hash_count function
      bpf: Use mutex lock pool for bpf trampolines
      bpf: Add struct bpf_trampoline_ops object
      bpf: Add struct bpf_tramp_node object
      bpf: Factor fsession link to use struct bpf_tramp_node
      bpf: Add multi tracing attach types
      bpf: Move sleepable verification code to btf_id_allow_sleepable
      bpf: Add bpf_trampoline_multi_attach/detach functions
      bpf: Add support for tracing multi link
      bpf: Add support for tracing_multi link cookies
      bpf: Add support for tracing_multi link session
      bpf: Add support for tracing_multi link fdinfo
      libbpf: Add bpf_object_cleanup_btf function
      libbpf: Add bpf_link_create support for tracing_multi link
      libbpf: Add btf_type_is_traceable_func function
      libbpf: Add support to create tracing multi link
      selftests/bpf: Add tracing multi skel/pattern/ids attach tests
      selftests/bpf: Add tracing multi skel/pattern/ids module attach tests
      selftests/bpf: Add tracing multi intersect tests
      selftests/bpf: Add tracing multi cookies test
      selftests/bpf: Add tracing multi session test
      selftests/bpf: Add tracing multi attach fails test
      selftests/bpf: Add tracing multi attach benchmark test
      selftests/bpf: Add tracing multi attach rollback tests

 arch/arm64/net/bpf_jit_comp.c                                      |  58 +++---
 arch/loongarch/net/bpf_jit.c                                       |  44 ++---
 arch/powerpc/net/bpf_jit_comp.c                                    |  46 ++---
 arch/riscv/net/bpf_jit_comp64.c                                    |  52 +++---
 arch/s390/net/bpf_jit_comp.c                                       |  44 ++---
 arch/x86/net/bpf_jit_comp.c                                        |  54 +++---
 include/linux/bpf.h                                                |  91 ++++++---
 include/linux/bpf_types.h                                          |   1 +
 include/linux/bpf_verifier.h                                       |   3 +
 include/linux/btf_ids.h                                            |   1 +
 include/linux/ftrace.h                                             |   1 +
 include/linux/trace_events.h                                       |   6 +
 include/uapi/linux/bpf.h                                           |   9 +
 kernel/bpf/bpf_struct_ops.c                                        |  27 +--
 kernel/bpf/btf.c                                                   |   4 +
 kernel/bpf/syscall.c                                               |  88 +++++----
 kernel/bpf/trampoline.c                                            | 512 ++++++++++++++++++++++++++++++++++++++++----------
 kernel/bpf/verifier.c                                              | 124 +++++++++---
 kernel/trace/bpf_trace.c                                           | 149 ++++++++++++++-
 kernel/trace/ftrace.c                                              |   7 +-
 net/bpf/bpf_dummy_struct_ops.c                                     |  14 +-
 net/bpf/test_run.c                                                 |   3 +
 tools/include/uapi/linux/bpf.h                                     |  10 +
 tools/lib/bpf/bpf.c                                                |   9 +
 tools/lib/bpf/bpf.h                                                |   5 +
 tools/lib/bpf/libbpf.c                                             | 337 ++++++++++++++++++++++++++++++++-
 tools/lib/bpf/libbpf.h                                             |  15 ++
 tools/lib/bpf/libbpf.map                                           |   1 +
 tools/lib/bpf/libbpf_internal.h                                    |   1 +
 tools/testing/selftests/bpf/Makefile                               |   9 +-
 tools/testing/selftests/bpf/prog_tests/tracing_multi.c             | 860 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/tracing_multi_attach.c           |  40 ++++
 tools/testing/selftests/bpf/progs/tracing_multi_attach_module.c    |  26 +++
 tools/testing/selftests/bpf/progs/tracing_multi_bench.c            |  13 ++
 tools/testing/selftests/bpf/progs/tracing_multi_check.c            | 213 +++++++++++++++++++++
 tools/testing/selftests/bpf/progs/tracing_multi_fail.c             |  19 ++
 tools/testing/selftests/bpf/progs/tracing_multi_intersect_attach.c |  42 +++++
 tools/testing/selftests/bpf/progs/tracing_multi_rollback.c         |  38 ++++
 tools/testing/selftests/bpf/progs/tracing_multi_session_attach.c   |  43 +++++
 tools/testing/selftests/bpf/trace_helpers.c                        |   6 +-
 tools/testing/selftests/bpf/trace_helpers.h                        |   1 +
 41 files changed, 2661 insertions(+), 365 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/tracing_multi.c
 create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_attach.c
 create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_attach_module.c
 create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_bench.c
 create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_check.c
 create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_fail.c
 create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_intersect_attach.c
 create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_rollback.c
 create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_session_attach.c

^ permalink raw reply

* Re: [PATCH V2] tracing: Revert "tracing: Remove pid in task_rename tracing output"
From: Xuewen Yan @ 2026-03-16  2:00 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Xuewen Yan, mhiramat, mathieu.desnoyers, elver, kees,
	lorenzo.stoakes, brauner, schuster.simon, david, linux-kernel,
	linux-trace-kernel, guohua.yan, ke.wang, jing.xia
In-Reply-To: <20260306100625.2211675a@gandalf.local.home>

Hi Steven,

Unless there are any further comments, could you please help to take
this through the tracing tree?

Thanks!

On Fri, Mar 6, 2026 at 11:06 PM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Fri, 6 Mar 2026 15:59:54 +0800
> Xuewen Yan <xuewen.yan@unisoc.com> wrote:
>
> > This reverts commit e3f6a42272e028c46695acc83fc7d7c42f2750ad.
> >
> > The commit says that the tracepoint only deals with the current task,
> > however the following case is not current task:
> >
> > comm_write() {
> >     p = get_proc_task(inode);
> >     if (!p)
> >         return -ESRCH;
> >
> >     if (same_thread_group(current, p))
> >         set_task_comm(p, buffer);
> > }
> > where set_task_comm() calls __set_task_comm() which records
> > the update of p and not current.
> >
> > So revert the patch to show pid.
> >
> > Fixes: e3f6a42272e0 ("tracing: Remove pid in task_rename tracing output")
> > Reported-by: Guohua Yan <guohua.yan@unisoc.com>
> > Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
>
> Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
>
> -- Steve
>
> > ---
> > v2:
> > - update commit message (Steven)
> > ---
> >  include/trace/events/task.h | 7 +++++--
> >  1 file changed, 5 insertions(+), 2 deletions(-)
> >
> > diff --git a/include/trace/events/task.h b/include/trace/events/task.h
> > index 4f0759634306..b9a129eb54d9 100644
> > --- a/include/trace/events/task.h
> > +++ b/include/trace/events/task.h
> > @@ -38,19 +38,22 @@ TRACE_EVENT(task_rename,
> >       TP_ARGS(task, comm),
> >
> >       TP_STRUCT__entry(
> > +             __field(        pid_t,  pid)
> >               __array(        char, oldcomm,  TASK_COMM_LEN)
> >               __array(        char, newcomm,  TASK_COMM_LEN)
> >               __field(        short,  oom_score_adj)
> >       ),
> >
> >       TP_fast_assign(
> > +             __entry->pid = task->pid;
> >               memcpy(entry->oldcomm, task->comm, TASK_COMM_LEN);
> >               strscpy(entry->newcomm, comm, TASK_COMM_LEN);
> >               __entry->oom_score_adj = task->signal->oom_score_adj;
> >       ),
> >
> > -     TP_printk("oldcomm=%s newcomm=%s oom_score_adj=%hd",
> > -               __entry->oldcomm, __entry->newcomm, __entry->oom_score_adj)
> > +     TP_printk("pid=%d oldcomm=%s newcomm=%s oom_score_adj=%hd",
> > +             __entry->pid, __entry->oldcomm,
> > +             __entry->newcomm, __entry->oom_score_adj)
> >  );
> >
> >  /**
>

^ permalink raw reply

* Re: [RFC PATCH 0/4] Enable Clang's Source-based Code Coverage and MC/DC for x86-64
From: Sasha Levin @ 2026-03-15 14:15 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Chuck Wolber, nathan, Matt.Kelly2, akpm, andrew.j.oppelt,
	anton.ivanov, ardb, arnd, bhelgaas, bp, chuck.wolber, dave.hansen,
	dvyukov, hpa, jinghao7, johannes, jpoimboe, justinstitt, kees,
	kent.overstreet, linux-arch, linux-efi, linux-kbuild,
	linux-kernel, linux-trace-kernel, linux-um, llvm, luto, marinov,
	masahiroy, maskray, mathieu.desnoyers, matthew.l.weber3, mhiramat,
	mingo, morbo, ndesaulniers, oberpar, paulmck, richard, rostedt,
	samitolvanen, samuel.sarkisian, steven.h.vanderleest, tglx,
	tingxur, tyxu, wentaoz5, x86
In-Reply-To: <20251015092145.GB3419281@noisy.programming.kicks-ass.net>

On Wed, Oct 15, 2025 at 11:21:45AM +0200, Peter Zijlstra wrote:
>On Wed, Oct 15, 2025 at 08:26:50AM +0000, Chuck Wolber wrote:
>> Optimization makes it nearly impossible to correlate GCov results back to
>> actual lines of source. llvm-cov instruments at the AST level which enables
>> precise mapping back to source code regardless of optimization level.
>>
>>
>> A detailed rundown on this issue can be found here[1], with the most relevant
>> excerpt reproduced here:
>
>Yes read and understand this, but that doesn't mean you have to have 3
>different kernel interfaces for all of this, right?

To clarify, are you suggesting that we'll have something like a single
/sys/kernel/debug/coverage interface that is producing the same structured
output whether we use gcov or llvm?

-- 
Thanks,
Sasha

^ permalink raw reply

* Re: [PATCH 16/53] ovl: drop dir lock for lookups in impure readdir
From: Amir Goldstein @ 2026-03-15 13:51 UTC (permalink / raw)
  To: NeilBrown
  Cc: Linus Torvalds, Alexander Viro, Christian Brauner, Jan Kara,
	Jeff Layton, Trond Myklebust, Anna Schumaker, Carlos Maiolino,
	Miklos Szeredi, Jan Harkes, Hugh Dickins, Baolin Wang,
	David Howells, Marc Dionne, Steve French, Namjae Jeon,
	Sungjong Seo, Yuezhang Mo, Andreas Hindborg, Breno Leitao,
	Theodore Ts'o, Andreas Dilger, Steven Rostedt,
	Masami Hiramatsu, Ilya Dryomov, Alex Markuze, Viacheslav Dubeyko,
	Tyler Hicks, Andreas Gruenbacher, Richard Weinberger,
	Anton Ivanov, Johannes Berg, Jeremy Kerr, Ard Biesheuvel,
	linux-fsdevel, linux-nfs, linux-xfs, linux-unionfs, coda,
	linux-mm, linux-afs, linux-cifs, linux-ext4, linux-kernel,
	linux-trace-kernel, ceph-devel, ecryptfs, gfs2, linux-um,
	linux-efi
In-Reply-To: <20260312214330.3885211-17-neilb@ownmail.net>

On Thu, Mar 12, 2026 at 10:49 PM NeilBrown <neilb@ownmail.net> wrote:
>
> From: NeilBrown <neil@brown.name>
>
> When performing an "impure" readdir, ovl needs to perform a lookup on some
> of the names that it found.
> With proposed locking changes it will not be possible to perform this
> lookup (in particular, not safe to wait for d_alloc_parallel()) while
> holding a lock on the directory.
>
> ovl doesn't really need the lock at this point.

Not exactly. see below.

> It has already iterated
> the directory and has cached a list of the contents.  It now needs to
> gather extra information about some contents.  It can do this without
> the lock.
>
> After gathering that info it needs to retake the lock for API
> correctness.  After doing this it must check IS_DEADDIR() again to
> ensure readdir always returns -ENOENT on a removed directory.
>
> Note that while ->iterate_shared is called with a shared lock, ovl uses
> WRAP_DIR_ITER() so an exclusive lock is held and so we drop and retake
> that exclusive lock.
>
> As the directory is no longer locked in ovl_cache_update() we need
> dget_parent() to get a reference to the parent.
>
> Signed-off-by: NeilBrown <neil@brown.name>
> ---
>  fs/overlayfs/readdir.c | 19 ++++++++++++-------
>  1 file changed, 12 insertions(+), 7 deletions(-)
>
> diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
> index 1dcc75b3a90f..d5123b37921c 100644
> --- a/fs/overlayfs/readdir.c
> +++ b/fs/overlayfs/readdir.c
> @@ -568,13 +568,12 @@ static int ovl_cache_update(const struct path *path, struct ovl_cache_entry *p,
>                         goto get;
>                 }
>                 if (p->len == 2) {
> -                       /* we shall not be moved */
> -                       this = dget(dir->d_parent);
> +                       this = dget_parent(dir);
>                         goto get;
>                 }
>         }
>         /* This checks also for xwhiteouts */
> -       this = lookup_one(mnt_idmap(path->mnt), &QSTR_LEN(p->name, p->len), dir);
> +       this = lookup_one_unlocked(mnt_idmap(path->mnt), &QSTR_LEN(p->name, p->len), dir);

ovl_cache_update() is also called from ovl_iterate_merged() where inode
is locked.

>         if (IS_ERR_OR_NULL(this) || !this->d_inode) {
>                 /* Mark a stale entry */
>                 p->is_whiteout = true;
> @@ -666,11 +665,12 @@ static int ovl_dir_read_impure(const struct path *path,  struct list_head *list,
>         if (err)
>                 return err;
>
> +       inode_unlock(path->dentry->d_inode);
>         list_for_each_entry_safe(p, n, list, l_node) {
>                 if (!name_is_dot_dotdot(p->name, p->len)) {
>                         err = ovl_cache_update(path, p, true);
>                         if (err)
> -                               return err;
> +                               break;
>                 }
>                 if (p->ino == p->real_ino) {
>                         list_del(&p->l_node);
> @@ -680,14 +680,19 @@ static int ovl_dir_read_impure(const struct path *path,  struct list_head *list,
>                         struct rb_node *parent = NULL;
>
>                         if (WARN_ON(ovl_cache_entry_find_link(p->name, p->len,
> -                                                             &newp, &parent)))
> -                               return -EIO;
> +                                                             &newp, &parent))) {
> +                               err = -EIO;
> +                               break;
> +                       }
>
>                         rb_link_node(&p->node, parent, newp);
>                         rb_insert_color(&p->node, root);
>                 }
>         }
> -       return 0;
> +       inode_lock(path->dentry->d_inode);
> +       if (IS_DEADDIR(path->dentry->d_inode))
> +               err = -ENOENT;
> +       return err;
>  }
>
>  static struct ovl_dir_cache *ovl_cache_get_impure(const struct path *path)
> --

You missed the fact that overlayfs uses the dir inode lock
to protect the readdir inode cache, so your patch introduces
a risk for storing a stale readdir cache when dir modify operations
invalidate the readdir cache version while lock is dropped
and also introduces memory leak when cache is stomped
without freeing cache created by a competing thread.
I think something like the untested patch below should fix this.

I did not look into ovl_iterate_merged() to see if it has a simple
fix and I am not 100% sure that this fix for impure dir is enough.

Thanks,
Amir.

diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index d5123b37921c8..9e90064b252ce 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -702,15 +702,13 @@ static struct ovl_dir_cache
*ovl_cache_get_impure(const struct path *path)
        struct inode *inode = d_inode(dentry);
        struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
        struct ovl_dir_cache *cache;
+       /* Snapshot version before ovl_dir_read_impure() drops i_rwsem */
+       u64 version = ovl_inode_version_get(inode);

        cache = ovl_dir_cache(inode);
-       if (cache && ovl_inode_version_get(inode) == cache->version)
+       if (cache && version == cache->version)
                return cache;

-       /* Impure cache is not refcounted, free it here */
-       ovl_dir_cache_free(inode);
-       ovl_set_dir_cache(inode, NULL);
-
        cache = kzalloc_obj(struct ovl_dir_cache);
        if (!cache)
                return ERR_PTR(-ENOMEM);
@@ -721,6 +719,14 @@ static struct ovl_dir_cache
*ovl_cache_get_impure(const struct path *path)
                kfree(cache);
                return ERR_PTR(res);
        }
+
+       /*
+        * Impure cache is not refcounted, free it here.
+        * Also frees cache stored by concurrent readdir during i_rwsem drop.
+        */
+       ovl_dir_cache_free(inode);
+       ovl_set_dir_cache(inode, NULL);
+
        if (list_empty(&cache->entries)) {
                /*
                 * A good opportunity to get rid of an unneeded "impure" flag.
@@ -736,7 +742,7 @@ static struct ovl_dir_cache
*ovl_cache_get_impure(const struct path *path)
                return NULL;
        }

-       cache->version = ovl_inode_version_get(inode);
+       cache->version = version;
        ovl_set_dir_cache(inode, cache);

        return cache;

^ permalink raw reply related

* [PATCH v6 09/17] lib/bootconfig: validate child node index in xbc_verify_tree()
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
  To: Masami Hiramatsu, Andrew Morton
  Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>

xbc_verify_tree() validates that each node's next index is within
bounds, but does not check the child index.  Add the same bounds
check for the child field.

Without this check, a corrupt bootconfig that passes next-index
validation could still trigger an out-of-bounds memory access via an
invalid child index when xbc_node_get_child() is called during tree
traversal at boot time.

Signed-off-by: Josh Law <objecting@objecting.org>
---
 lib/bootconfig.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 0823491221f4..038f56689a48 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -823,6 +823,10 @@ static int __init xbc_verify_tree(void)
 			return xbc_parse_error("No closing brace",
 				xbc_node_get_data(xbc_nodes + i));
 		}
+		if (xbc_nodes[i].child >= xbc_node_num) {
+			return xbc_parse_error("Broken child node",
+				xbc_node_get_data(xbc_nodes + i));
+		}
 	}
 
 	/* Key tree limitation check */
-- 
2.34.1


^ permalink raw reply related

* [PATCH v6 17/17] lib/bootconfig: change xbc_node_index() return type to uint16_t
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
  To: Masami Hiramatsu, Andrew Morton
  Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>

  lib/bootconfig.c:136:21: warning: conversion from 'long int' to
  'int' may change value [-Wconversion]
  lib/bootconfig.c:308:33: warning: conversion from 'int' to 'uint16_t'
  may change value [-Wconversion]
  lib/bootconfig.c:467:37: warning: conversion from 'int' to 'uint16_t'
  may change value [-Wconversion]
  lib/bootconfig.c:469:40: warning: conversion from 'int' to 'uint16_t'
  may change value [-Wconversion]
  lib/bootconfig.c:472:54: warning: conversion from 'int' to 'uint16_t'
  may change value [-Wconversion]
  lib/bootconfig.c:476:45: warning: conversion from 'int' to 'uint16_t'
  may change value [-Wconversion]

xbc_node_index() returns the position of a node in the xbc_nodes array,
which has at most XBC_NODE_MAX (8192) entries, well within uint16_t
range.  Every caller stores the result in a uint16_t field (node->parent,
node->child, node->next, or the keys[] array in compose_key_after), so
the int return type causes narrowing warnings at all six call sites.

Change the return type to uint16_t and add an explicit cast on the
pointer subtraction to match the storage width and eliminate the
warnings.

Signed-off-by: Josh Law <objecting@objecting.org>
---
 include/linux/bootconfig.h | 2 +-
 lib/bootconfig.c           | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h
index 23a96c5edcf3..692a5acc2ffc 100644
--- a/include/linux/bootconfig.h
+++ b/include/linux/bootconfig.h
@@ -66,7 +66,7 @@ struct xbc_node {
 
 /* Node tree access raw APIs */
 struct xbc_node * __init xbc_root_node(void);
-int __init xbc_node_index(struct xbc_node *node);
+uint16_t __init xbc_node_index(struct xbc_node *node);
 struct xbc_node * __init xbc_node_get_parent(struct xbc_node *node);
 struct xbc_node * __init xbc_node_get_child(struct xbc_node *node);
 struct xbc_node * __init xbc_node_get_next(struct xbc_node *node);
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 68a72dbc38fa..148084abae12 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -131,9 +131,9 @@ struct xbc_node * __init xbc_root_node(void)
  *
  * Return the index number of @node in XBC node list.
  */
-int __init xbc_node_index(struct xbc_node *node)
+uint16_t __init xbc_node_index(struct xbc_node *node)
 {
-	return node - &xbc_nodes[0];
+	return (uint16_t)(node - &xbc_nodes[0]);
 }
 
 /**
-- 
2.34.1


^ permalink raw reply related

* [PATCH v6 16/17] lib/bootconfig: fix sign-compare in xbc_node_compose_key_after()
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
  To: Masami Hiramatsu, Andrew Morton
  Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>

  lib/bootconfig.c:322:25: warning: comparison of integer expressions
  of different signedness: 'int' and 'size_t' [-Wsign-compare]
  lib/bootconfig.c:325:30: warning: conversion to 'size_t' from 'int'
  may change the sign of the result [-Wsign-conversion]

snprintf() returns int but size is size_t, so comparing ret >= size
and subtracting size -= ret involve mixed-sign operations.  Cast ret
at the comparison and subtraction sites; ret is known non-negative at
this point because the ret < 0 early return has already been taken.

Signed-off-by: Josh Law <objecting@objecting.org>
---
 lib/bootconfig.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index e318b236e728..68a72dbc38fa 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -319,10 +319,10 @@ int __init xbc_node_compose_key_after(struct xbc_node *root,
 			       depth ? "." : "");
 		if (ret < 0)
 			return ret;
-		if (ret >= size) {
+		if (ret >= (int)size) {
 			size = 0;
 		} else {
-			size -= ret;
+			size -= (size_t)ret;
 			buf += ret;
 		}
 		total += ret;
-- 
2.34.1


^ permalink raw reply related

* [PATCH v6 15/17] lib/bootconfig: use size_t for key length tracking in xbc_verify_tree()
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
  To: Masami Hiramatsu, Andrew Morton
  Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>

  lib/bootconfig.c:839:24: warning: conversion from 'size_t' to 'int'
  may change value [-Wconversion]
  lib/bootconfig.c:860:32: warning: conversion from 'size_t' to 'int'
  may change value [-Wconversion]
  lib/bootconfig.c:860:29: warning: conversion to 'size_t' from 'int'
  may change the sign of the result [-Wsign-conversion]

The key length variables len and wlen accumulate strlen() results but
were declared as int, causing truncation and sign-conversion warnings.
Change both to size_t to match the strlen() return type and avoid
mixed-sign arithmetic.

Signed-off-by: Josh Law <objecting@objecting.org>
---
 lib/bootconfig.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 7296df003459..e318b236e728 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -803,7 +803,8 @@ static int __init xbc_close_brace(char **k, char *n)
 
 static int __init xbc_verify_tree(void)
 {
-	int i, depth, len, wlen;
+	int i, depth;
+	size_t len, wlen;
 	struct xbc_node *n, *m;
 
 	/* Brace closing */
-- 
2.34.1


^ permalink raw reply related

* [PATCH v6 14/17] lib/bootconfig: narrow offset type in xbc_init_node()
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
  To: Masami Hiramatsu, Andrew Morton
  Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>

  lib/bootconfig.c:415:32: warning: conversion to 'long unsigned int'
  from 'long int' may change the sign of the result [-Wsign-conversion]

Pointer subtraction yields ptrdiff_t (signed long), which was stored in
unsigned long.  The offset is immediately checked against XBC_DATA_MAX
(32767) and then truncated to uint16_t, so unsigned int is sufficient.
Add an explicit cast on the subtraction to suppress the sign-conversion
warning.

Signed-off-by: Josh Law <objecting@objecting.org>
---
 lib/bootconfig.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 995c2ec94cbe..7296df003459 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -412,7 +412,7 @@ const char * __init xbc_node_find_next_key_value(struct xbc_node *root,
 
 static int __init xbc_init_node(struct xbc_node *node, char *data, uint16_t flag)
 {
-	unsigned long offset = data - xbc_data;
+	unsigned int offset = (unsigned int)(data - xbc_data);
 
 	if (WARN_ON(offset >= XBC_DATA_MAX))
 		return -EINVAL;
-- 
2.34.1


^ permalink raw reply related

* [PATCH v6 13/17] lib/bootconfig: use size_t for strlen result in xbc_node_match_prefix()
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
  To: Masami Hiramatsu, Andrew Morton
  Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>

  lib/bootconfig.c:198:19: warning: conversion from 'size_t' to 'int'
  may change value [-Wconversion]
  lib/bootconfig.c:200:33: warning: conversion to '__kernel_size_t'
  from 'int' may change the sign of the result [-Wsign-conversion]

strlen() returns size_t but the result was stored in an int.  The value
is then passed back to strncmp() which expects size_t, causing a second
sign-conversion warning on the round-trip.  Use size_t throughout to
match the API types.

Signed-off-by: Josh Law <objecting@objecting.org>
---
 lib/bootconfig.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 806a8f038d24..995c2ec94cbe 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -195,7 +195,7 @@ static bool __init
 xbc_node_match_prefix(struct xbc_node *node, const char **prefix)
 {
 	const char *p = xbc_node_get_data(node);
-	int len = strlen(p);
+	size_t len = strlen(p);
 
 	if (strncmp(*prefix, p, len))
 		return false;
-- 
2.34.1


^ permalink raw reply related

* [PATCH v6 10/17] lib/bootconfig: check xbc_init_node() return in override path
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
  To: Masami Hiramatsu, Andrew Morton
  Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>

The ':=' override path in xbc_parse_kv() calls xbc_init_node() to
re-initialize an existing value node but does not check the return
value.  If xbc_init_node() fails (data offset out of range), parsing
silently continues with stale node data.

Add the missing error check to match the xbc_add_node() call path
which already checks for failure.

In practice, a bootconfig using ':=' to override a value near the
32KB data limit could silently retain the old value, meaning a
security-relevant boot parameter override (e.g., a trace filter or
debug setting) would not take effect as intended.

Fixes: e5efaeb8a8f5 ("bootconfig: Support mixing a value and subkeys under a key")
Signed-off-by: Josh Law <objecting@objecting.org>
---
 lib/bootconfig.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 038f56689a48..182d9d9bc5a6 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -728,7 +728,8 @@ static int __init xbc_parse_kv(char **k, char *v, int op)
 		if (op == ':') {
 			unsigned short nidx = child->next;
 
-			xbc_init_node(child, v, XBC_VALUE);
+			if (xbc_init_node(child, v, XBC_VALUE) < 0)
+				return xbc_parse_error("Failed to override value", v);
 			child->next = nidx;	/* keep subkeys */
 			goto array;
 		}
-- 
2.34.1


^ permalink raw reply related

* [PATCH v6 12/17] lib/bootconfig: fix signed comparison in xbc_node_get_data()
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
  To: Masami Hiramatsu, Andrew Morton
  Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>

  lib/bootconfig.c:188:28: warning: comparison of integer expressions
  of different signedness: 'int' and 'size_t' [-Wsign-compare]

The local variable 'offset' is declared as int, but xbc_data_size is
size_t.  Using ~XBC_VALUE as the mask also involves integer promotion
rules that obscure intent.

Change the type to unsigned int and mask with XBC_DATA_MAX (which is
the 15-bit data mask) instead of ~XBC_VALUE, making the expression
self-documenting and eliminating the signed/unsigned comparison.

Signed-off-by: Josh Law <objecting@objecting.org>
---
 lib/bootconfig.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 182d9d9bc5a6..806a8f038d24 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -183,7 +183,7 @@ struct xbc_node * __init xbc_node_get_next(struct xbc_node *node)
  */
 const char * __init xbc_node_get_data(struct xbc_node *node)
 {
-	int offset = node->data & ~XBC_VALUE;
+	unsigned int offset = node->data & XBC_DATA_MAX;
 
 	if (WARN_ON(offset >= xbc_data_size))
 		return NULL;
-- 
2.34.1


^ permalink raw reply related

* [PATCH v6 06/17] lib/bootconfig: drop redundant memset of xbc_nodes
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
  To: Masami Hiramatsu, Andrew Morton
  Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>

memblock_alloc() already returns zeroed memory, so the explicit memset
in xbc_init() is redundant. Switch the userspace xbc_alloc_mem() from
malloc() to calloc() so both paths return zeroed memory, and remove
the separate memset call.

Signed-off-by: Josh Law <objecting@objecting.org>
---
 lib/bootconfig.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 06e8a79ab472..fe1053043752 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -71,7 +71,7 @@ static inline void __init xbc_free_mem(void *addr, size_t size, bool early)
 
 static inline void *xbc_alloc_mem(size_t size)
 {
-	return malloc(size);
+	return calloc(1, size);
 }
 
 static inline void xbc_free_mem(void *addr, size_t size, bool early)
@@ -982,7 +982,6 @@ int __init xbc_init(const char *data, size_t size, const char **emsg, int *epos)
 		_xbc_exit(true);
 		return -ENOMEM;
 	}
-	memset(xbc_nodes, 0, sizeof(struct xbc_node) * XBC_NODE_MAX);
 
 	ret = xbc_parse_tree();
 	if (!ret)
-- 
2.34.1


^ permalink raw reply related

* [PATCH v6 08/17] lib/bootconfig: replace linux/kernel.h with specific includes
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
  To: Masami Hiramatsu, Andrew Morton
  Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>

linux/kernel.h is a legacy catch-all header. Replace it with the
specific headers actually needed: linux/cache.h for SMP_CACHE_BYTES,
linux/compiler.h for unlikely(), and linux/sprintf.h for snprintf().

Signed-off-by: Josh Law <objecting@objecting.org>
---
 lib/bootconfig.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index fe1053043752..0823491221f4 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -17,7 +17,9 @@
 #include <linux/bug.h>
 #include <linux/ctype.h>
 #include <linux/errno.h>
-#include <linux/kernel.h>
+#include <linux/cache.h>
+#include <linux/compiler.h>
+#include <linux/sprintf.h>
 #include <linux/memblock.h>
 #include <linux/string.h>
 
-- 
2.34.1


^ permalink raw reply related

* [PATCH v6 11/17] tools/bootconfig: fix fd leak in load_xbc_file() on fstat failure
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
  To: Masami Hiramatsu, Andrew Morton
  Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>

If fstat() fails after open() succeeds, load_xbc_file() returns
-errno without closing the file descriptor.  Add the missing close()
call on the error path.

Fixes: 950313ebf79c ("tools: bootconfig: Add bootconfig command")
Signed-off-by: Josh Law <objecting@objecting.org>
---
 tools/bootconfig/main.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index 55d59ed507d5..8078fee0b75b 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -162,8 +162,10 @@ static int load_xbc_file(const char *path, char **buf)
 	if (fd < 0)
 		return -errno;
 	ret = fstat(fd, &stat);
-	if (ret < 0)
+	if (ret < 0) {
+		close(fd);
 		return -errno;
+	}
 
 	ret = load_xbc_fd(fd, buf, stat.st_size);
 
-- 
2.34.1


^ permalink raw reply related

* [PATCH v6 05/17] lib/bootconfig: increment xbc_node_num after node init succeeds
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
  To: Masami Hiramatsu, Andrew Morton
  Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>

Move the xbc_node_num increment to after xbc_init_node() so a failed
init does not leave a partially initialized node counted in the array.

If xbc_init_node() fails on a data offset at the boundary of a
maximum-size bootconfig, the pre-incremented count causes subsequent
tree verification and traversal to consider the uninitialized node as
valid, potentially leading to an out-of-bounds read or unpredictable
boot behavior.

Signed-off-by: Josh Law <objecting@objecting.org>
---
 lib/bootconfig.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 56fbedc9e725..06e8a79ab472 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -429,9 +429,10 @@ static struct xbc_node * __init xbc_add_node(char *data, uint16_t flag)
 	if (xbc_node_num == XBC_NODE_MAX)
 		return NULL;
 
-	node = &xbc_nodes[xbc_node_num++];
+	node = &xbc_nodes[xbc_node_num];
 	if (xbc_init_node(node, data, flag) < 0)
 		return NULL;
+	xbc_node_num++;
 
 	return node;
 }
-- 
2.34.1


^ permalink raw reply related

* [PATCH v6 07/17] bootconfig: constify xbc_calc_checksum() data parameter
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
  To: Masami Hiramatsu, Andrew Morton
  Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>

xbc_calc_checksum() only reads the data buffer, so mark the parameter
as const void * and the internal pointer as const unsigned char *.

Signed-off-by: Josh Law <objecting@objecting.org>
---
 include/linux/bootconfig.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h
index 25df9260d206..23a96c5edcf3 100644
--- a/include/linux/bootconfig.h
+++ b/include/linux/bootconfig.h
@@ -36,9 +36,9 @@ bool __init cmdline_has_extra_options(void);
  * The checksum will be used with the BOOTCONFIG_MAGIC and the size for
  * embedding the bootconfig in the initrd image.
  */
-static inline __init uint32_t xbc_calc_checksum(void *data, uint32_t size)
+static inline __init uint32_t xbc_calc_checksum(const void *data, uint32_t size)
 {
-	unsigned char *p = data;
+	const unsigned char *p = data;
 	uint32_t ret = 0;
 
 	while (size--)
-- 
2.34.1


^ permalink raw reply related

* [PATCH v6 02/17] lib/bootconfig: fix typos, kerneldoc, and inconsistent if/else bracing
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
  To: Masami Hiramatsu, Andrew Morton
  Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>

Fix comment typos ("initiized" -> "initialized" in xbc_root_node(),
"uder" -> "under" in xbc_node_find_next_leaf()), add a missing blank
line before the xbc_get_info() kerneldoc block, and add braces to
if/else blocks where one branch uses braces but the other does not,
per coding-style section 3.1.

Signed-off-by: Josh Law <objecting@objecting.org>
---
 lib/bootconfig.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 51fd2299ec0f..80de9540245d 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -79,6 +79,7 @@ static inline void xbc_free_mem(void *addr, size_t size, bool early)
 	free(addr);
 }
 #endif
+
 /**
  * xbc_get_info() - Get the information of loaded boot config
  * @node_size: A pointer to store the number of nodes.
@@ -112,7 +113,7 @@ static int __init xbc_parse_error(const char *msg, const char *p)
  * xbc_root_node() - Get the root node of extended boot config
  *
  * Return the address of root node of extended boot config. If the
- * extended boot config is not initiized, return NULL.
+ * extended boot config is not initialized, return NULL.
  */
 struct xbc_node * __init xbc_root_node(void)
 {
@@ -364,7 +365,7 @@ struct xbc_node * __init xbc_node_find_next_leaf(struct xbc_node *root,
 			node = xbc_node_get_parent(node);
 			if (node == root)
 				return NULL;
-			/* User passed a node which is not uder parent */
+			/* User passed a node which is not under parent */
 			if (WARN_ON(!node))
 				return NULL;
 		}
@@ -472,8 +473,9 @@ static struct xbc_node * __init __xbc_add_sibling(char *data, uint32_t flag, boo
 				sib->next = xbc_node_index(node);
 			}
 		}
-	} else
+	} else {
 		xbc_parse_error("Too many nodes", data);
+	}
 
 	return node;
 }
@@ -655,9 +657,9 @@ static int __init __xbc_add_key(char *k)
 	if (unlikely(xbc_node_num == 0))
 		goto add_node;
 
-	if (!last_parent)	/* the first level */
+	if (!last_parent) {	/* the first level */
 		node = find_match_node(xbc_nodes, k);
-	else {
+	} else {
 		child = xbc_node_get_child(last_parent);
 		/* Since the value node is the first child, skip it. */
 		if (child && xbc_node_is_value(child))
@@ -665,9 +667,9 @@ static int __init __xbc_add_key(char *k)
 		node = find_match_node(child, k);
 	}
 
-	if (node)
+	if (node) {
 		last_parent = node;
-	else {
+	} else {
 add_node:
 		node = xbc_add_child(k, XBC_KEY);
 		if (!node)
@@ -991,8 +993,9 @@ int __init xbc_init(const char *data, size_t size, const char **emsg, int *epos)
 		if (emsg)
 			*emsg = xbc_err_msg;
 		_xbc_exit(true);
-	} else
+	} else {
 		ret = xbc_node_num;
+	}
 
 	return ret;
 }
-- 
2.34.1


^ permalink raw reply related

* [PATCH v6 04/17] lib/bootconfig: fix off-by-one in xbc_verify_tree() next node check
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
  To: Masami Hiramatsu, Andrew Morton
  Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>

Valid node indices are 0 to xbc_node_num-1, so a next value equal to
xbc_node_num is out of bounds.  Use >= instead of > to catch this.

A malformed or corrupt bootconfig could pass tree verification with
an out-of-bounds next index.  On subsequent tree traversal at boot
time, xbc_node_get_next() would return a pointer past the allocated
xbc_nodes array, causing an out-of-bounds read of kernel memory.

Signed-off-by: Josh Law <objecting@objecting.org>
---
 lib/bootconfig.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 58d6ae297280..56fbedc9e725 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -816,7 +816,7 @@ static int __init xbc_verify_tree(void)
 	}
 
 	for (i = 0; i < xbc_node_num; i++) {
-		if (xbc_nodes[i].next > xbc_node_num) {
+		if (xbc_nodes[i].next >= xbc_node_num) {
 			return xbc_parse_error("No closing brace",
 				xbc_node_get_data(xbc_nodes + i));
 		}
-- 
2.34.1


^ permalink raw reply related

* [PATCH v6 03/17] lib/bootconfig: narrow flag parameter type from uint32_t to uint16_t
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
  To: Masami Hiramatsu, Andrew Morton
  Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>

The flag parameter in the node creation helpers only ever carries
XBC_KEY (0) or XBC_VALUE (0x8000), both of which fit in uint16_t.
Using uint16_t matches the width of xbc_node.data where the flag is
ultimately stored.

Signed-off-by: Josh Law <objecting@objecting.org>
---
 lib/bootconfig.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 80de9540245d..58d6ae297280 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -408,7 +408,7 @@ const char * __init xbc_node_find_next_key_value(struct xbc_node *root,
 
 /* XBC parse and tree build */
 
-static int __init xbc_init_node(struct xbc_node *node, char *data, uint32_t flag)
+static int __init xbc_init_node(struct xbc_node *node, char *data, uint16_t flag)
 {
 	unsigned long offset = data - xbc_data;
 
@@ -422,7 +422,7 @@ static int __init xbc_init_node(struct xbc_node *node, char *data, uint32_t flag
 	return 0;
 }
 
-static struct xbc_node * __init xbc_add_node(char *data, uint32_t flag)
+static struct xbc_node * __init xbc_add_node(char *data, uint16_t flag)
 {
 	struct xbc_node *node;
 
@@ -452,7 +452,7 @@ static inline __init struct xbc_node *xbc_last_child(struct xbc_node *node)
 	return node;
 }
 
-static struct xbc_node * __init __xbc_add_sibling(char *data, uint32_t flag, bool head)
+static struct xbc_node * __init __xbc_add_sibling(char *data, uint16_t flag, bool head)
 {
 	struct xbc_node *sib, *node = xbc_add_node(data, flag);
 
@@ -480,17 +480,17 @@ static struct xbc_node * __init __xbc_add_sibling(char *data, uint32_t flag, boo
 	return node;
 }
 
-static inline struct xbc_node * __init xbc_add_sibling(char *data, uint32_t flag)
+static inline struct xbc_node * __init xbc_add_sibling(char *data, uint16_t flag)
 {
 	return __xbc_add_sibling(data, flag, false);
 }
 
-static inline struct xbc_node * __init xbc_add_head_sibling(char *data, uint32_t flag)
+static inline struct xbc_node * __init xbc_add_head_sibling(char *data, uint16_t flag)
 {
 	return __xbc_add_sibling(data, flag, true);
 }
 
-static inline __init struct xbc_node *xbc_add_child(char *data, uint32_t flag)
+static inline __init struct xbc_node *xbc_add_child(char *data, uint16_t flag)
 {
 	struct xbc_node *node = xbc_add_sibling(data, flag);
 
-- 
2.34.1


^ permalink raw reply related

* [PATCH v6 01/17] lib/bootconfig: add missing __init annotations to static helpers
From: Josh Law @ 2026-03-15 12:19 UTC (permalink / raw)
  To: Masami Hiramatsu, Andrew Morton
  Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>

skip_comment() and skip_spaces_until_newline() are static functions
called exclusively from __init code paths but lack the __init
annotation themselves. Add it so their memory can be reclaimed after
init.

Signed-off-by: Josh Law <objecting@objecting.org>
---
 lib/bootconfig.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index b0ef1e74e98a..51fd2299ec0f 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -509,7 +509,7 @@ static inline __init bool xbc_valid_keyword(char *key)
 	return *key == '\0';
 }
 
-static char *skip_comment(char *p)
+static char __init *skip_comment(char *p)
 {
 	char *ret;
 
@@ -522,7 +522,7 @@ static char *skip_comment(char *p)
 	return ret;
 }
 
-static char *skip_spaces_until_newline(char *p)
+static char __init *skip_spaces_until_newline(char *p)
 {
 	while (isspace(*p) && *p != '\n')
 		p++;
-- 
2.34.1


^ permalink raw reply related

* [PATCH v6 00/17] bootconfig: fixes, cleanups, and modernization
From: Josh Law @ 2026-03-15 12:19 UTC (permalink / raw)
  To: Masami Hiramatsu, Andrew Morton
  Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law

This series addresses a collection of issues found during a review of
lib/bootconfig.c, include/linux/bootconfig.h, and tools/bootconfig,
ranging from off-by-one errors and unchecked return values to coding
style, signedness/type cleanup, and API modernization.

Changes since v5:
  - Folded typo fixes, kerneldoc blank line, and inconsistent bracing
    patches (v5 02-05, 07) into a single patch (patch 2).
  - Dropped "use __packed macro for struct xbc_node" (v5 11) and
    "add __packed definition to tools/bootconfig shim header" (v5 14)
    per review feedback.
  - Added Fixes: tag to "check xbc_init_node() return in override
    path" (patch 10).
  - Added Fixes: tag to "fix fd leak in load_xbc_file() on fstat
    failure" (patch 11).

Changes since v4:
  - Added six follow-up patches found via static analysis with strict
    GCC warnings (patches 12-17).
  - Added "fix signed comparison in xbc_node_get_data()" -- switch the
    masked offset variable to unsigned int and compare against
    XBC_DATA_MAX to avoid a signed comparison and make the mask
    self-documenting (patch 12).
  - Added "use size_t for strlen result in xbc_node_match_prefix()"
    and "use size_t for key length tracking in xbc_verify_tree()" to
    match strlen() return types (patches 13, 15).
  - Added "narrow offset type in xbc_init_node()" -- use a validated
    unsigned int temporary for the stored 15-bit data offset
    (patch 14).
  - Added "fix sign-compare in xbc_node_compose_key_after()" -- cast
    the checked snprintf() return when comparing and subtracting
    against a size_t buffer length (patch 16).
  - Added "change xbc_node_index() return type to uint16_t" -- match
    the 16-bit storage fields and XBC_NODE_MAX bounds (patch 17).

Changes since v3:
  - Added commit descriptions to all patches that were missing them.
  - Added real-world impact statements to all bug-fix patches.

Changes since v2:
  - Added "validate child node index in xbc_verify_tree()" (patch 9).
  - Added "check xbc_init_node() return in override path" (patch 10).
  - Added "fix fd leak in load_xbc_file() on fstat failure" (patch 11).

Changes since v1:
  - Dropped "return empty string instead of NULL from
    xbc_node_get_data()" -- returning "" causes false matches in
    xbc_node_match_prefix() because strncmp(..., "", 0) always
    returns 0.

Bug fixes:
  - Fix off-by-one in xbc_verify_tree() where a next-node index equal
    to xbc_node_num passes the bounds check despite being out of range;
    a malformed bootconfig could cause an out-of-bounds read of kernel
    memory during tree traversal at boot time (patch 4).
  - Move xbc_node_num increment to after xbc_init_node() validation
    so a failed init does not leave a partially initialized node
    counted in the array; on a maximum-size bootconfig, the
    uninitialized node could be traversed leading to unpredictable
    boot behavior (patch 5).
  - Validate child node indices in xbc_verify_tree() alongside the
    existing next-node check; without this, a corrupt bootconfig could
    trigger an out-of-bounds memory access via an invalid child index
    during tree traversal (patch 9).
  - Check xbc_init_node() return value in the ':=' override path; a
    bootconfig using ':=' near the 32KB data limit could silently
    retain the old value, meaning a security-relevant boot parameter
    override would not take effect (patch 10).
  - Fix file descriptor leak in tools/bootconfig load_xbc_file()
    when fstat() fails (patch 11).

Correctness:
  - Add missing __init annotations to skip_comment() and
    skip_spaces_until_newline() so their memory can be reclaimed
    after init (patch 1).
  - Narrow the flag parameter in node creation helpers from uint32_t
    to uint16_t to match the xbc_node.data field width (patch 3).
  - Constify the xbc_calc_checksum() data parameter since it only
    reads the buffer (patch 7).
  - Fix strict-GCC signedness and narrowing warnings by aligning local
    types with strlen()/snprintf() APIs and the 16-bit node index/data
    storage in xbc_node_get_data(), xbc_node_match_prefix(),
    xbc_init_node(), xbc_verify_tree(), xbc_node_compose_key_after(),
    and xbc_node_index() (patches 12-17).

Cleanups:
  - Fix comment typos, missing blank line before kerneldoc,
    inconsistent if/else bracing (patch 2).
  - Drop redundant memset after memblock_alloc which already returns
    zeroed memory; switch the userspace path from malloc to calloc
    to match (patch 6).

Modernization:
  - Replace the catch-all linux/kernel.h include with the specific
    headers needed: linux/cache.h, linux/compiler.h, and
    linux/sprintf.h (patch 8).

Build-tested with both the in-kernel build (lib/bootconfig.o,
init/main.o) and the userspace tools/bootconfig build. All 70
tools/bootconfig test cases pass.

Josh Law (17):
  lib/bootconfig: add missing __init annotations to static helpers
  lib/bootconfig: fix typos, kerneldoc, and inconsistent if/else bracing
  lib/bootconfig: narrow flag parameter type from uint32_t to uint16_t
  lib/bootconfig: fix off-by-one in xbc_verify_tree() next node check
  lib/bootconfig: increment xbc_node_num after node init succeeds
  lib/bootconfig: drop redundant memset of xbc_nodes
  bootconfig: constify xbc_calc_checksum() data parameter
  lib/bootconfig: replace linux/kernel.h with specific includes
  lib/bootconfig: validate child node index in xbc_verify_tree()
  lib/bootconfig: check xbc_init_node() return in override path
  tools/bootconfig: fix fd leak in load_xbc_file() on fstat failure
  lib/bootconfig: fix signed comparison in xbc_node_get_data()
  lib/bootconfig: use size_t for strlen result in
    xbc_node_match_prefix()
  lib/bootconfig: narrow offset type in xbc_init_node()
  lib/bootconfig: use size_t for key length tracking in
    xbc_verify_tree()
  lib/bootconfig: fix sign-compare in xbc_node_compose_key_after()
  lib/bootconfig: change xbc_node_index() return type to uint16_t

 include/linux/bootconfig.h |  6 ++--
 lib/bootconfig.c           | 71 ++++++++++++++++++++++----------------
 tools/bootconfig/main.c    |  4 ++-
 3 files changed, 47 insertions(+), 34 deletions(-)

-- 
2.34.1


^ permalink raw reply

* Re: [PATCH v4 00/17] bootconfig: fixes, cleanups, and modernization
From: Masami Hiramatsu @ 2026-03-15  8:30 UTC (permalink / raw)
  To: Josh Law; +Cc: Andrew Morton, linux-trace-kernel, linux-kernel
In-Reply-To: <20260314230155.155777-1-objecting@objecting.org>

Hi Josh,

Thanks for cleaning up. I had some comments. Please check my reply.
Basically, I don't see any urgent bugfixes in this series. In summary;

OK for-next: [01][06][08][09][10][12][13][15]
Need Fixed taa: [16][17]
Request to fold:[02][03][04][05][07]
NACK: [11][14]

Thank you,

On Sat, 14 Mar 2026 23:01:38 +0000
Josh Law <objecting@objecting.org> wrote:

> This series addresses a collection of issues found during a review of
> lib/bootconfig.c, include/linux/bootconfig.h, and tools/bootconfig,
> ranging from off-by-one errors and unchecked return values to coding
> style and API modernization.
> 
> Changes since v3:
>   - Added commit descriptions to all patches that were missing them
>     (patches 2, 3, 4, 7).
>   - Added real-world impact statements to all bug-fix patches
>     (patches 8, 9, 15, 16).
> 
> Changes since v2:
>   - Added "validate child node index in xbc_verify_tree()" —
>     xbc_verify_tree() validated next-node indices but not child indices;
>     an out-of-bounds child would cause xbc_node_get_child() to access
>     memory beyond the xbc_nodes array (patch 15).
>   - Added "check xbc_init_node() return in override path" — the ':='
>     override path in xbc_parse_kv() ignored xbc_init_node()'s return
>     value, silently continuing with stale node data on failure
>     (patch 16).
>   - Added "fix fd leak in load_xbc_file() on fstat failure" — if
>     fstat() failed after open() succeeded, the file descriptor was
>     leaked (patch 17).
> 
> Changes since v1:
>   - Dropped "return empty string instead of NULL from
>     xbc_node_get_data()" — returning "" causes false matches in
>     xbc_node_match_prefix() because strncmp(..., "", 0) always
>     returns 0.
> 
> Bug fixes:
>   - Fix off-by-one in xbc_verify_tree() where a next-node index equal
>     to xbc_node_num passes the bounds check despite being out of range;
>     a malformed bootconfig could cause an out-of-bounds read of kernel
>     memory during tree traversal at boot time (patch 8).
>   - Move xbc_node_num increment to after xbc_init_node() validation
>     so a failed init does not leave a partially initialized node
>     counted in the array; on a maximum-size bootconfig, the
>     uninitialized node could be traversed leading to unpredictable
>     boot behavior (patch 9).
>   - Validate child node indices in xbc_verify_tree() alongside the
>     existing next-node check; without this, a corrupt bootconfig could
>     trigger an out-of-bounds memory access via an invalid child index
>     during tree traversal (patch 15).
>   - Check xbc_init_node() return value in the ':=' override path; a
>     bootconfig using ':=' near the 32KB data limit could silently
>     retain the old value, meaning a security-relevant boot parameter
>     override would not take effect (patch 16).
>   - Fix file descriptor leak in tools/bootconfig load_xbc_file()
>     when fstat() fails (patch 17).
> 
> Correctness:
>   - Add missing __init annotations to skip_comment() and
>     skip_spaces_until_newline() so their memory can be reclaimed
>     after init (patch 1).
>   - Narrow the flag parameter in node creation helpers from uint32_t
>     to uint16_t to match the xbc_node.data field width (patch 6).
>   - Constify the xbc_calc_checksum() data parameter since it only
>     reads the buffer (patch 12).
> 
> Cleanups:
>   - Fix comment typos (patches 2-3), missing blank line before
>     kerneldoc (patch 4), inconsistent if/else bracing (patches 5, 7).
>   - Drop redundant memset after memblock_alloc which already returns
>     zeroed memory; switch the userspace path from malloc to calloc
>     to match (patch 10).
> 
> Modernization:
>   - Replace open-coded __attribute__((__packed__)) with the __packed
>     macro, adding the definition to the tools/bootconfig shim header
>     (patches 11, 14).
>   - Replace the catch-all linux/kernel.h include with the specific
>     headers needed: linux/cache.h, linux/compiler.h, and
>     linux/sprintf.h (patch 13).
> 
> Build-tested with both the in-kernel build (lib/bootconfig.o,
> init/main.o) and the userspace tools/bootconfig build. All 70
> tools/bootconfig test cases pass.
> 
> Josh Law (17):
>   lib/bootconfig: add missing __init annotations to static helpers
>   lib/bootconfig: fix typo "initiized" in xbc_root_node() kerneldoc
>   lib/bootconfig: fix typo "uder" in xbc_node_find_next_leaf()
>   lib/bootconfig: add blank line before xbc_get_info() kerneldoc
>   lib/bootconfig: fix inconsistent if/else bracing
>   lib/bootconfig: narrow flag parameter type from uint32_t to uint16_t
>   lib/bootconfig: fix inconsistent if/else bracing in __xbc_add_key()
>   lib/bootconfig: fix off-by-one in xbc_verify_tree() next node check
>   lib/bootconfig: increment xbc_node_num after node init succeeds
>   lib/bootconfig: drop redundant memset of xbc_nodes
>   bootconfig: use __packed macro for struct xbc_node
>   bootconfig: constify xbc_calc_checksum() data parameter
>   lib/bootconfig: replace linux/kernel.h with specific includes
>   bootconfig: add __packed definition to tools/bootconfig shim header
>   lib/bootconfig: validate child node index in xbc_verify_tree()
>   lib/bootconfig: check xbc_init_node() return in override path
>   tools/bootconfig: fix fd leak in load_xbc_file() on fstat failure
> 
>  include/linux/bootconfig.h                  |  6 +--
>  lib/bootconfig.c                            | 54 ++++++++++++---------
>  tools/bootconfig/include/linux/bootconfig.h |  1 +
>  tools/bootconfig/main.c                     |  4 +-
>  4 files changed, 39 insertions(+), 26 deletions(-)
> 
> -- 
> 2.34.1
> 


-- 
Masami Hiramatsu (Google) <mhiramat@kernel.org>

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox