From: Steven Rostedt <rostedt@goodmis.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org,
<gregkh@linuxfoundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>,
Mark Rutland <mark.rutland@arm.com>,
Andrew Morton <akpm@linux-foundation.org>,
Ajay Kaher <akaher@vmware.com>
Subject: [v6.6][PATCH 4/5] eventfs: Delete eventfs_inode when the last dentry is freed
Date: Sun, 05 Nov 2023 10:56:34 -0500 [thread overview]
Message-ID: <20231105160139.821908367@goodmis.org> (raw)
In-Reply-To: 20231105155630.925114107@goodmis.org
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
commit 020010fbfa202aa528a52743eba4ab0da3400a4e upstream
There exists a race between holding a reference of an eventfs_inode dentry
and the freeing of the eventfs_inode. If user space has a dentry held long
enough, it may still be able to access the dentry's eventfs_inode after it
has been freed.
To prevent this, have he eventfs_inode freed via the last dput() (or via
RCU if the eventfs_inode does not have a dentry).
This means reintroducing the eventfs_inode del_list field at a temporary
place to put the eventfs_inode. It needs to mark it as freed (via the
list) but also must invalidate the dentry immediately as the return from
eventfs_remove_dir() expects that they are. But the dentry invalidation
must not be called under the eventfs_mutex, so it must be done after the
eventfs_inode is marked as free (put on a deletion list).
Link: https://lkml.kernel.org/r/20231101172650.123479767@goodmis.org
Cc: stable@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ajay Kaher <akaher@vmware.com>
Fixes: 5bdcd5f5331a2 ("eventfs: Implement removal of meta data from eventfs")
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
fs/tracefs/event_inode.c | 150 +++++++++++++++++++--------------------
1 file changed, 74 insertions(+), 76 deletions(-)
diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c
index 6a3f7502310c..7aa92b8ebc51 100644
--- a/fs/tracefs/event_inode.c
+++ b/fs/tracefs/event_inode.c
@@ -53,10 +53,12 @@ struct eventfs_file {
const struct inode_operations *iop;
/*
* Union - used for deletion
+ * @llist: for calling dput() if needed after RCU
* @del_list: list of eventfs_file to delete
* @rcu: eventfs_file to delete in RCU
*/
union {
+ struct llist_node llist;
struct list_head del_list;
struct rcu_head rcu;
};
@@ -113,8 +115,7 @@ static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry,
mutex_lock(&eventfs_mutex);
ef = dentry->d_fsdata;
- /* The LSB is set when the eventfs_inode is being freed */
- if (((unsigned long)ef & 1UL) || ef->is_freed) {
+ if (ef->is_freed) {
/* Do not allow changes if the event is about to be removed. */
mutex_unlock(&eventfs_mutex);
return -ENODEV;
@@ -258,6 +259,13 @@ static struct dentry *create_dir(struct eventfs_file *ef,
return eventfs_end_creating(dentry);
}
+static void free_ef(struct eventfs_file *ef)
+{
+ kfree(ef->name);
+ kfree(ef->ei);
+ kfree(ef);
+}
+
/**
* eventfs_set_ef_status_free - set the ef->status to free
* @ti: the tracefs_inode of the dentry
@@ -270,34 +278,20 @@ void eventfs_set_ef_status_free(struct tracefs_inode *ti, struct dentry *dentry)
{
struct tracefs_inode *ti_parent;
struct eventfs_inode *ei;
- struct eventfs_file *ef, *tmp;
+ struct eventfs_file *ef;
/* The top level events directory may be freed by this */
if (unlikely(ti->flags & TRACEFS_EVENT_TOP_INODE)) {
- LIST_HEAD(ef_del_list);
-
mutex_lock(&eventfs_mutex);
-
ei = ti->private;
- /* Record all the top level files */
- list_for_each_entry_srcu(ef, &ei->e_top_files, list,
- lockdep_is_held(&eventfs_mutex)) {
- list_add_tail(&ef->del_list, &ef_del_list);
- }
-
/* Nothing should access this, but just in case! */
ti->private = NULL;
-
mutex_unlock(&eventfs_mutex);
- /* Now safely free the top level files and their children */
- list_for_each_entry_safe(ef, tmp, &ef_del_list, del_list) {
- list_del(&ef->del_list);
- eventfs_remove(ef);
- }
-
- kfree(ei);
+ ef = dentry->d_fsdata;
+ if (ef)
+ free_ef(ef);
return;
}
@@ -311,16 +305,13 @@ void eventfs_set_ef_status_free(struct tracefs_inode *ti, struct dentry *dentry)
if (!ef)
goto out;
- /*
- * If ef was freed, then the LSB bit is set for d_fsdata.
- * But this should not happen, as it should still have a
- * ref count that prevents it. Warn in case it does.
- */
- if (WARN_ON_ONCE((unsigned long)ef & 1))
- goto out;
+ if (ef->is_freed) {
+ free_ef(ef);
+ } else {
+ ef->dentry = NULL;
+ }
dentry->d_fsdata = NULL;
- ef->dentry = NULL;
out:
mutex_unlock(&eventfs_mutex);
}
@@ -847,13 +838,53 @@ int eventfs_add_file(const char *name, umode_t mode,
return 0;
}
-static void free_ef(struct rcu_head *head)
+static LLIST_HEAD(free_list);
+
+static void eventfs_workfn(struct work_struct *work)
+{
+ struct eventfs_file *ef, *tmp;
+ struct llist_node *llnode;
+
+ llnode = llist_del_all(&free_list);
+ llist_for_each_entry_safe(ef, tmp, llnode, llist) {
+ /* This should only get here if it had a dentry */
+ if (!WARN_ON_ONCE(!ef->dentry))
+ dput(ef->dentry);
+ }
+}
+
+static DECLARE_WORK(eventfs_work, eventfs_workfn);
+
+static void free_rcu_ef(struct rcu_head *head)
{
struct eventfs_file *ef = container_of(head, struct eventfs_file, rcu);
- kfree(ef->name);
- kfree(ef->ei);
- kfree(ef);
+ if (ef->dentry) {
+ /* Do not free the ef until all references of dentry are gone */
+ if (llist_add(&ef->llist, &free_list))
+ queue_work(system_unbound_wq, &eventfs_work);
+ return;
+ }
+
+ free_ef(ef);
+}
+
+static void unhook_dentry(struct dentry *dentry)
+{
+ if (!dentry)
+ return;
+
+ /* Keep the dentry from being freed yet (see eventfs_workfn()) */
+ dget(dentry);
+
+ dentry->d_fsdata = NULL;
+ d_invalidate(dentry);
+ mutex_lock(&eventfs_mutex);
+ /* dentry should now have at least a single reference */
+ WARN_ONCE((int)d_count(dentry) < 1,
+ "dentry %px (%s) less than one reference (%d) after invalidate\n",
+ dentry, dentry->d_name.name, d_count(dentry));
+ mutex_unlock(&eventfs_mutex);
}
/**
@@ -905,58 +936,25 @@ void eventfs_remove(struct eventfs_file *ef)
{
struct eventfs_file *tmp;
LIST_HEAD(ef_del_list);
- struct dentry *dentry_list = NULL;
- struct dentry *dentry;
if (!ef)
return;
+ /*
+ * Move the deleted eventfs_inodes onto the ei_del_list
+ * which will also set the is_freed value. Note, this has to be
+ * done under the eventfs_mutex, but the deletions of
+ * the dentries must be done outside the eventfs_mutex.
+ * Hence moving them to this temporary list.
+ */
mutex_lock(&eventfs_mutex);
eventfs_remove_rec(ef, &ef_del_list, 0);
- list_for_each_entry_safe(ef, tmp, &ef_del_list, del_list) {
- if (ef->dentry) {
- unsigned long ptr = (unsigned long)dentry_list;
-
- /* Keep the dentry from being freed yet */
- dget(ef->dentry);
-
- /*
- * Paranoid: The dget() above should prevent the dentry
- * from being freed and calling eventfs_set_ef_status_free().
- * But just in case, set the link list LSB pointer to 1
- * and have eventfs_set_ef_status_free() check that to
- * make sure that if it does happen, it will not think
- * the d_fsdata is an event_file.
- *
- * For this to work, no event_file should be allocated
- * on a odd space, as the ef should always be allocated
- * to be at least word aligned. Check for that too.
- */
- WARN_ON_ONCE(ptr & 1);
-
- ef->dentry->d_fsdata = (void *)(ptr | 1);
- dentry_list = ef->dentry;
- ef->dentry = NULL;
- }
- call_srcu(&eventfs_srcu, &ef->rcu, free_ef);
- }
mutex_unlock(&eventfs_mutex);
- while (dentry_list) {
- unsigned long ptr;
-
- dentry = dentry_list;
- ptr = (unsigned long)dentry->d_fsdata & ~1UL;
- dentry_list = (struct dentry *)ptr;
- dentry->d_fsdata = NULL;
- d_invalidate(dentry);
- mutex_lock(&eventfs_mutex);
- /* dentry should now have at least a single reference */
- WARN_ONCE((int)d_count(dentry) < 1,
- "dentry %p less than one reference (%d) after invalidate\n",
- dentry, d_count(dentry));
- mutex_unlock(&eventfs_mutex);
- dput(dentry);
+ list_for_each_entry_safe(ef, tmp, &ef_del_list, del_list) {
+ unhook_dentry(ef->dentry);
+ list_del(&ef->del_list);
+ call_srcu(&eventfs_srcu, &ef->rcu, free_rcu_ef);
}
}
--
2.42.0
next prev parent reply other threads:[~2023-11-05 16:01 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-11-05 15:56 [v6.6][PATCH 0/5] tracing: Backport of eventfs fixes for v6.6 Steven Rostedt
2023-11-05 15:56 ` [v6.6][PATCH 1/5] tracing: Have trace_event_file have ref counters Steven Rostedt
2023-11-05 15:56 ` [v6.6][PATCH 2/5] eventfs: Remove "is_freed" union with rcu head Steven Rostedt
2023-11-05 15:56 ` [v6.6][PATCH 3/5] eventfs: Save ownership and mode Steven Rostedt
2023-11-12 10:41 ` NULL pointer dereference regression when running `chmod -R root:tracing /sys/kernel/debug/tracing` Milian Wolff
2023-11-12 12:14 ` Steven Rostedt
2023-11-12 14:26 ` Steven Rostedt
2023-11-14 13:38 ` Linux regression tracking #adding (Thorsten Leemhuis)
2023-11-14 13:55 ` Steven Rostedt
2023-11-14 15:06 ` Thorsten Leemhuis
2023-11-05 15:56 ` Steven Rostedt [this message]
2023-11-05 15:56 ` [v6.6][PATCH 5/5] eventfs: Use simple_recursive_removal() to clean up dentries Steven Rostedt
2023-11-06 11:40 ` [v6.6][PATCH 0/5] tracing: Backport of eventfs fixes for v6.6 Greg KH
2023-11-06 20:12 ` Steven Rostedt
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231105160139.821908367@goodmis.org \
--to=rostedt@goodmis.org \
--cc=akaher@vmware.com \
--cc=akpm@linux-foundation.org \
--cc=gregkh@linuxfoundation.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mark.rutland@arm.com \
--cc=mhiramat@kernel.org \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.