* [PATCH v3 1/4] tracing/user_events: Split up mm alloc and attach
2023-05-19 23:07 [PATCH v3 0/4] tracing/user_events: Use non-RCU context for enabler writes Beau Belgrave
@ 2023-05-19 23:07 ` Beau Belgrave
2023-05-19 23:07 ` [PATCH v3 2/4] tracing/user_events: Remove RCU lock while pinning pages Beau Belgrave
` (3 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Beau Belgrave @ 2023-05-19 23:07 UTC (permalink / raw)
To: rostedt, mhiramat; +Cc: linux-kernel, linux-trace-kernel, torvalds, ast
From: Linus Torvalds <torvalds@linux-foundation.org>
When a new mm is being created in a fork() path it currently is
allocated and then attached in one go. This leaves the mm exposed out to
the tracing register callbacks while any parent enabler locations are
copied in. This should not happen.
Split up mm alloc and attach as unique operations. When duplicating
enablers, first alloc, then duplicate, and only upon success, attach.
This prevents any timing window outside of the event_reg mutex for
enablement walking. This allows for dropping RCU requirement for
enablement walking in later patches.
Link: https://lore.kernel.org/linux-trace-kernel/CAHk-=whTBvXJuoi_kACo3qi5WZUmRrhyA-_=rRFsycTytmB6qw@mail.gmail.com/
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
[ change log written by Beau Belgrave ]
Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
---
kernel/trace/trace_events_user.c | 29 ++++++++++++++++++-----------
1 file changed, 18 insertions(+), 11 deletions(-)
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index b1ecd7677642..b2aecbfbbd24 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -538,10 +538,9 @@ static struct user_event_mm *user_event_mm_get_all(struct user_event *user)
return found;
}
-static struct user_event_mm *user_event_mm_create(struct task_struct *t)
+static struct user_event_mm *user_event_mm_alloc(struct task_struct *t)
{
struct user_event_mm *user_mm;
- unsigned long flags;
user_mm = kzalloc(sizeof(*user_mm), GFP_KERNEL_ACCOUNT);
@@ -553,12 +552,6 @@ static struct user_event_mm *user_event_mm_create(struct task_struct *t)
refcount_set(&user_mm->refcnt, 1);
refcount_set(&user_mm->tasks, 1);
- spin_lock_irqsave(&user_event_mms_lock, flags);
- list_add_rcu(&user_mm->link, &user_event_mms);
- spin_unlock_irqrestore(&user_event_mms_lock, flags);
-
- t->user_event_mm = user_mm;
-
/*
* The lifetime of the memory descriptor can slightly outlast
* the task lifetime if a ref to the user_event_mm is taken
@@ -572,6 +565,17 @@ static struct user_event_mm *user_event_mm_create(struct task_struct *t)
return user_mm;
}
+static void user_event_mm_attach(struct user_event_mm *user_mm, struct task_struct *t)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&user_event_mms_lock, flags);
+ list_add_rcu(&user_mm->link, &user_event_mms);
+ spin_unlock_irqrestore(&user_event_mms_lock, flags);
+
+ t->user_event_mm = user_mm;
+}
+
static struct user_event_mm *current_user_event_mm(void)
{
struct user_event_mm *user_mm = current->user_event_mm;
@@ -579,10 +583,12 @@ static struct user_event_mm *current_user_event_mm(void)
if (user_mm)
goto inc;
- user_mm = user_event_mm_create(current);
+ user_mm = user_event_mm_alloc(current);
if (!user_mm)
goto error;
+
+ user_event_mm_attach(user_mm, current);
inc:
refcount_inc(&user_mm->refcnt);
error:
@@ -670,7 +676,7 @@ void user_event_mm_remove(struct task_struct *t)
void user_event_mm_dup(struct task_struct *t, struct user_event_mm *old_mm)
{
- struct user_event_mm *mm = user_event_mm_create(t);
+ struct user_event_mm *mm = user_event_mm_alloc(t);
struct user_event_enabler *enabler;
if (!mm)
@@ -684,10 +690,11 @@ void user_event_mm_dup(struct task_struct *t, struct user_event_mm *old_mm)
rcu_read_unlock();
+ user_event_mm_attach(mm, t);
return;
error:
rcu_read_unlock();
- user_event_mm_remove(t);
+ user_event_mm_destroy(mm);
}
static bool current_user_event_enabler_exists(unsigned long uaddr,
--
2.25.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v3 2/4] tracing/user_events: Remove RCU lock while pinning pages
2023-05-19 23:07 [PATCH v3 0/4] tracing/user_events: Use non-RCU context for enabler writes Beau Belgrave
2023-05-19 23:07 ` [PATCH v3 1/4] tracing/user_events: Split up mm alloc and attach Beau Belgrave
@ 2023-05-19 23:07 ` Beau Belgrave
2023-05-19 23:07 ` [PATCH v3 3/4] tracing/user_events: Rename link fields for clarity Beau Belgrave
` (2 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Beau Belgrave @ 2023-05-19 23:07 UTC (permalink / raw)
To: rostedt, mhiramat; +Cc: linux-kernel, linux-trace-kernel, torvalds, ast
From: Linus Torvalds <torvalds@linux-foundation.org>
pin_user_pages_remote() can reschedule which means we cannot hold any
RCU lock while using it. Now that enablers are not exposed out to the
tracing register callbacks during fork(), there is clearly no need to
require the RCU lock as event_mutex is enough to protect changes.
Remove unneeded RCU usages when pinning pages and walking enablers with
event_mutex held. Cleanup a misleading "safe" list walk that is not
needed. During fork() duplication, remove unneeded RCU list add, since
the list is not exposed yet.
Link: https://lore.kernel.org/linux-trace-kernel/CAHk-=wiiBfT4zNS29jA0XEsy8EmbqTH1hAPdRJCDAJMD8Gxt5A@mail.gmail.com/
Fixes: 7235759084a4 ("tracing/user_events: Use remote writes for event enablement")
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
[ change log written by Beau Belgrave ]
Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
---
kernel/trace/trace_events_user.c | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index b2aecbfbbd24..2f70dabb0f71 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -437,9 +437,8 @@ static bool user_event_enabler_exists(struct user_event_mm *mm,
unsigned long uaddr, unsigned char bit)
{
struct user_event_enabler *enabler;
- struct user_event_enabler *next;
- list_for_each_entry_safe(enabler, next, &mm->enablers, link) {
+ list_for_each_entry(enabler, &mm->enablers, link) {
if (enabler->addr == uaddr &&
(enabler->values & ENABLE_VAL_BIT_MASK) == bit)
return true;
@@ -455,19 +454,19 @@ static void user_event_enabler_update(struct user_event *user)
struct user_event_mm *next;
int attempt;
+ lockdep_assert_held(&event_mutex);
+
while (mm) {
next = mm->next;
mmap_read_lock(mm->mm);
- rcu_read_lock();
- list_for_each_entry_rcu(enabler, &mm->enablers, link) {
+ list_for_each_entry(enabler, &mm->enablers, link) {
if (enabler->event == user) {
attempt = 0;
user_event_enabler_write(mm, enabler, true, &attempt);
}
}
- rcu_read_unlock();
mmap_read_unlock(mm->mm);
user_event_mm_put(mm);
mm = next;
@@ -495,7 +494,9 @@ static bool user_event_enabler_dup(struct user_event_enabler *orig,
enabler->values = orig->values & ENABLE_VAL_DUP_MASK;
refcount_inc(&enabler->event->refcnt);
- list_add_rcu(&enabler->link, &mm->enablers);
+
+ /* Enablers not exposed yet, RCU not required */
+ list_add(&enabler->link, &mm->enablers);
return true;
}
--
2.25.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v3 3/4] tracing/user_events: Rename link fields for clarity
2023-05-19 23:07 [PATCH v3 0/4] tracing/user_events: Use non-RCU context for enabler writes Beau Belgrave
2023-05-19 23:07 ` [PATCH v3 1/4] tracing/user_events: Split up mm alloc and attach Beau Belgrave
2023-05-19 23:07 ` [PATCH v3 2/4] tracing/user_events: Remove RCU lock while pinning pages Beau Belgrave
@ 2023-05-19 23:07 ` Beau Belgrave
2023-05-19 23:07 ` [PATCH v3 4/4] tracing/user_events: Document user_event_mm one-shot list usage Beau Belgrave
2023-05-20 2:15 ` [PATCH v3 0/4] tracing/user_events: Use non-RCU context for enabler writes Linus Torvalds
4 siblings, 0 replies; 6+ messages in thread
From: Beau Belgrave @ 2023-05-19 23:07 UTC (permalink / raw)
To: rostedt, mhiramat; +Cc: linux-kernel, linux-trace-kernel, torvalds, ast
Currently most list_head fields of various structs within user_events
are simply named link. This causes folks to keep additional context in
their head when working with the code, which can be confusing.
Instead of using link, describe what the actual link is, for example:
list_del_rcu(&mm->link);
Changes into:
list_del_rcu(&mm->mms_link);
The reader now is given a hint the link is to the mms global list
instead of having to remember or spot check within the code.
Link: https://lore.kernel.org/linux-trace-kernel/CAHk-=wicngggxVpbnrYHjRTwGE0WYscPRM+L2HO2BF8ia1EXgQ@mail.gmail.com/
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
---
include/linux/user_events.h | 2 +-
kernel/trace/trace_events_user.c | 40 ++++++++++++++++++--------------
2 files changed, 23 insertions(+), 19 deletions(-)
diff --git a/include/linux/user_events.h b/include/linux/user_events.h
index 2847f5a18a86..17d452b389de 100644
--- a/include/linux/user_events.h
+++ b/include/linux/user_events.h
@@ -17,7 +17,7 @@
#ifdef CONFIG_USER_EVENTS
struct user_event_mm {
- struct list_head link;
+ struct list_head mms_link;
struct list_head enablers;
struct mm_struct *mm;
struct user_event_mm *next;
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index 2f70dabb0f71..360d0f965cb8 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -96,7 +96,7 @@ struct user_event {
* these to track enablement sites that are tied to an event.
*/
struct user_event_enabler {
- struct list_head link;
+ struct list_head mm_enablers_link;
struct user_event *event;
unsigned long addr;
@@ -153,7 +153,7 @@ struct user_event_file_info {
#define VALIDATOR_REL (1 << 1)
struct user_event_validator {
- struct list_head link;
+ struct list_head user_event_link;
int offset;
int flags;
};
@@ -259,7 +259,7 @@ static struct user_event_group
static void user_event_enabler_destroy(struct user_event_enabler *enabler)
{
- list_del_rcu(&enabler->link);
+ list_del_rcu(&enabler->mm_enablers_link);
/* No longer tracking the event via the enabler */
refcount_dec(&enabler->event->refcnt);
@@ -438,7 +438,7 @@ static bool user_event_enabler_exists(struct user_event_mm *mm,
{
struct user_event_enabler *enabler;
- list_for_each_entry(enabler, &mm->enablers, link) {
+ list_for_each_entry(enabler, &mm->enablers, mm_enablers_link) {
if (enabler->addr == uaddr &&
(enabler->values & ENABLE_VAL_BIT_MASK) == bit)
return true;
@@ -460,7 +460,7 @@ static void user_event_enabler_update(struct user_event *user)
next = mm->next;
mmap_read_lock(mm->mm);
- list_for_each_entry(enabler, &mm->enablers, link) {
+ list_for_each_entry(enabler, &mm->enablers, mm_enablers_link) {
if (enabler->event == user) {
attempt = 0;
user_event_enabler_write(mm, enabler, true, &attempt);
@@ -496,7 +496,7 @@ static bool user_event_enabler_dup(struct user_event_enabler *orig,
refcount_inc(&enabler->event->refcnt);
/* Enablers not exposed yet, RCU not required */
- list_add(&enabler->link, &mm->enablers);
+ list_add(&enabler->mm_enablers_link, &mm->enablers);
return true;
}
@@ -526,13 +526,15 @@ static struct user_event_mm *user_event_mm_get_all(struct user_event *user)
*/
rcu_read_lock();
- list_for_each_entry_rcu(mm, &user_event_mms, link)
- list_for_each_entry_rcu(enabler, &mm->enablers, link)
+ list_for_each_entry_rcu(mm, &user_event_mms, mms_link) {
+ list_for_each_entry_rcu(enabler, &mm->enablers, mm_enablers_link) {
if (enabler->event == user) {
mm->next = found;
found = user_event_mm_get(mm);
break;
}
+ }
+ }
rcu_read_unlock();
@@ -571,7 +573,7 @@ static void user_event_mm_attach(struct user_event_mm *user_mm, struct task_stru
unsigned long flags;
spin_lock_irqsave(&user_event_mms_lock, flags);
- list_add_rcu(&user_mm->link, &user_event_mms);
+ list_add_rcu(&user_mm->mms_link, &user_event_mms);
spin_unlock_irqrestore(&user_event_mms_lock, flags);
t->user_event_mm = user_mm;
@@ -600,7 +602,7 @@ static void user_event_mm_destroy(struct user_event_mm *mm)
{
struct user_event_enabler *enabler, *next;
- list_for_each_entry_safe(enabler, next, &mm->enablers, link)
+ list_for_each_entry_safe(enabler, next, &mm->enablers, mm_enablers_link)
user_event_enabler_destroy(enabler);
mmdrop(mm->mm);
@@ -637,7 +639,7 @@ void user_event_mm_remove(struct task_struct *t)
/* Remove the mm from the list, so it can no longer be enabled */
spin_lock_irqsave(&user_event_mms_lock, flags);
- list_del_rcu(&mm->link);
+ list_del_rcu(&mm->mms_link);
spin_unlock_irqrestore(&user_event_mms_lock, flags);
/*
@@ -685,9 +687,10 @@ void user_event_mm_dup(struct task_struct *t, struct user_event_mm *old_mm)
rcu_read_lock();
- list_for_each_entry_rcu(enabler, &old_mm->enablers, link)
+ list_for_each_entry_rcu(enabler, &old_mm->enablers, mm_enablers_link) {
if (!user_event_enabler_dup(enabler, mm))
goto error;
+ }
rcu_read_unlock();
@@ -756,7 +759,7 @@ static struct user_event_enabler
*/
if (!*write_result) {
refcount_inc(&enabler->event->refcnt);
- list_add_rcu(&enabler->link, &user_mm->enablers);
+ list_add_rcu(&enabler->mm_enablers_link, &user_mm->enablers);
}
mutex_unlock(&event_mutex);
@@ -912,8 +915,8 @@ static void user_event_destroy_validators(struct user_event *user)
struct user_event_validator *validator, *next;
struct list_head *head = &user->validators;
- list_for_each_entry_safe(validator, next, head, link) {
- list_del(&validator->link);
+ list_for_each_entry_safe(validator, next, head, user_event_link) {
+ list_del(&validator->user_event_link);
kfree(validator);
}
}
@@ -967,7 +970,7 @@ static int user_event_add_field(struct user_event *user, const char *type,
validator->offset = offset;
/* Want sequential access when validating */
- list_add_tail(&validator->link, &user->validators);
+ list_add_tail(&validator->user_event_link, &user->validators);
add_field:
field->type = type;
@@ -1357,7 +1360,7 @@ static int user_event_validate(struct user_event *user, void *data, int len)
void *pos, *end = data + len;
u32 loc, offset, size;
- list_for_each_entry(validator, head, link) {
+ list_for_each_entry(validator, head, user_event_link) {
pos = data + validator->offset;
/* Already done min_size check, no bounds check here */
@@ -2278,7 +2281,7 @@ static long user_events_ioctl_unreg(unsigned long uarg)
*/
mutex_lock(&event_mutex);
- list_for_each_entry_safe(enabler, next, &mm->enablers, link)
+ list_for_each_entry_safe(enabler, next, &mm->enablers, mm_enablers_link) {
if (enabler->addr == reg.disable_addr &&
(enabler->values & ENABLE_VAL_BIT_MASK) == reg.disable_bit) {
set_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler));
@@ -2289,6 +2292,7 @@ static long user_events_ioctl_unreg(unsigned long uarg)
/* Removed at least one */
ret = 0;
}
+ }
mutex_unlock(&event_mutex);
--
2.25.1
^ permalink raw reply related [flat|nested] 6+ messages in thread