linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Suren Baghdasaryan <surenb@google.com>
To: tj@kernel.org
Cc: gregkh@linuxfoundation.org, peterz@infradead.org,
	lujialin4@huawei.com, lizefan.x@bytedance.com,
	hannes@cmpxchg.org, mingo@redhat.com, ebiggers@kernel.org,
	oleg@redhat.com, akpm@linux-foundation.org,
	viro@zeniv.linux.org.uk, brauner@kernel.org,
	juri.lelli@redhat.com, vincent.guittot@linaro.org,
	dietmar.eggemann@arm.com, rostedt@goodmis.org,
	bsegall@google.com, mgorman@suse.de, bristot@redhat.com,
	vschneid@redhat.com, linux-kernel@vger.kernel.org,
	cgroups@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	kernel-team@android.com, surenb@google.com
Subject: [PATCH 2/2] sched/psi: tie psi trigger destruction with file's lifecycle
Date: Mon, 26 Jun 2023 13:17:13 -0700	[thread overview]
Message-ID: <20230626201713.1204982-2-surenb@google.com> (raw)
In-Reply-To: <20230626201713.1204982-1-surenb@google.com>

Destroying psi trigger in cgroup_file_release causes UAF issues when
a cgroup is removed from under a polling process. This is happening
because cgroup removal causes a call to cgroup_file_release via this
path:

do_rmdir
  cgroup_rmdir
    kernfs_drain_open_files
      cgroup_file_release
        cgroup_pressure_release

while the actual file is still alive. Destroying the trigger at this
point would also destroy its waitqueue head and if there is still a
polling process on that file accessing the waitqueue, it will step
on a freed pointer.
Patch [1] fixed this issue for epoll() case using wake_up_pollfree(),
however the same issue exists for synchronous poll() case.
The root cause of this issue is that the lifecycles of the psi trigger's
waitqueue and of the file associated with the trigger are different. Fix
this by destroying the trigger from inside kernfs_ops.free operation
which is tied to the last fput() of the file. This also renders the fix
in [1] obsolete, so revert it.

[1] commit c2dbe32d5db5 ("sched/psi: Fix use-after-free in ep_remove_wait_queue()")

Reported-by: Lu Jialin <lujialin4@huawei.com>
Closes: https://lore.kernel.org/all/20230613062306.101831-1-lujialin4@huawei.com/
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
---
 include/linux/cgroup-defs.h |  1 +
 include/linux/psi.h         |  6 +++++-
 kernel/cgroup/cgroup.c      | 29 ++++++++++++++++++++++++++++-
 kernel/sched/psi.c          | 13 ++++++-------
 4 files changed, 40 insertions(+), 9 deletions(-)

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 8a0d5466c7be..6f5230a8821f 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -598,6 +598,7 @@ struct cftype {
 
 	int (*open)(struct kernfs_open_file *of);
 	void (*release)(struct kernfs_open_file *of);
+	void (*free)(struct kernfs_open_file *of);
 
 	/*
 	 * read_u64() is a shortcut for the common case of returning a
diff --git a/include/linux/psi.h b/include/linux/psi.h
index ab26200c2803..ebb4c7efba84 100644
--- a/include/linux/psi.h
+++ b/include/linux/psi.h
@@ -25,7 +25,11 @@ void psi_memstall_leave(unsigned long *flags);
 int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);
 struct psi_trigger *psi_trigger_create(struct psi_group *group,
 			char *buf, enum psi_res res, struct file *file);
-void psi_trigger_destroy(struct psi_trigger *t);
+void psi_trigger_disable(struct psi_trigger *t);
+static inline void psi_trigger_destroy(struct psi_trigger *t)
+{
+	kfree(t);
+}
 
 __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
 			poll_table *wait);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 4d42f0cbc11e..62e91ce6ca20 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3895,6 +3895,13 @@ static void cgroup_pressure_release(struct kernfs_open_file *of)
 {
 	struct cgroup_file_ctx *ctx = of->priv;
 
+	psi_trigger_disable(ctx->psi.trigger);
+}
+
+static void cgroup_pressure_free(struct kernfs_open_file *of)
+{
+	struct cgroup_file_ctx *ctx = of->priv;
+
 	psi_trigger_destroy(ctx->psi.trigger);
 }
 
@@ -4055,7 +4062,21 @@ static void cgroup_file_release(struct kernfs_open_file *of)
 	if (cft->release)
 		cft->release(of);
 	put_cgroup_ns(ctx->ns);
-	kfree(ctx);
+	/* Keep the context alive until cft->free is called */
+	if (!cft->free)
+		kfree(ctx);
+}
+
+static void cgroup_file_free(struct kernfs_open_file *of)
+{
+	struct cftype *cft = of_cft(of);
+
+	if (cft->free) {
+		struct cgroup_file_ctx *ctx = of->priv;
+
+		cft->free(of);
+		kfree(ctx);
+	}
 }
 
 static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
@@ -4158,6 +4179,7 @@ static struct kernfs_ops cgroup_kf_single_ops = {
 	.atomic_write_len	= PAGE_SIZE,
 	.open			= cgroup_file_open,
 	.release		= cgroup_file_release,
+	.free			= cgroup_file_free,
 	.write			= cgroup_file_write,
 	.poll			= cgroup_file_poll,
 	.seq_show		= cgroup_seqfile_show,
@@ -4167,6 +4189,7 @@ static struct kernfs_ops cgroup_kf_ops = {
 	.atomic_write_len	= PAGE_SIZE,
 	.open			= cgroup_file_open,
 	.release		= cgroup_file_release,
+	.free			= cgroup_file_free,
 	.write			= cgroup_file_write,
 	.poll			= cgroup_file_poll,
 	.seq_start		= cgroup_seqfile_start,
@@ -5294,6 +5317,7 @@ static struct cftype cgroup_psi_files[] = {
 		.write = cgroup_io_pressure_write,
 		.poll = cgroup_pressure_poll,
 		.release = cgroup_pressure_release,
+		.free = cgroup_pressure_free,
 	},
 	{
 		.name = "memory.pressure",
@@ -5302,6 +5326,7 @@ static struct cftype cgroup_psi_files[] = {
 		.write = cgroup_memory_pressure_write,
 		.poll = cgroup_pressure_poll,
 		.release = cgroup_pressure_release,
+		.free = cgroup_pressure_free,
 	},
 	{
 		.name = "cpu.pressure",
@@ -5310,6 +5335,7 @@ static struct cftype cgroup_psi_files[] = {
 		.write = cgroup_cpu_pressure_write,
 		.poll = cgroup_pressure_poll,
 		.release = cgroup_pressure_release,
+		.free = cgroup_pressure_free,
 	},
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 	{
@@ -5319,6 +5345,7 @@ static struct cftype cgroup_psi_files[] = {
 		.write = cgroup_irq_pressure_write,
 		.poll = cgroup_pressure_poll,
 		.release = cgroup_pressure_release,
+		.free = cgroup_pressure_free,
 	},
 #endif
 	{
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index e072f6b31bf3..b4ad50805e08 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -622,7 +622,7 @@ static void psi_schedule_rtpoll_work(struct psi_group *group, unsigned long dela
 
 	task = rcu_dereference(group->rtpoll_task);
 	/*
-	 * kworker might be NULL in case psi_trigger_destroy races with
+	 * kworker might be NULL in case psi_trigger_disable races with
 	 * psi_task_change (hotpath) which can't use locks
 	 */
 	if (likely(task))
@@ -1372,7 +1372,7 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
 	return t;
 }
 
-void psi_trigger_destroy(struct psi_trigger *t)
+void psi_trigger_disable(struct psi_trigger *t)
 {
 	struct psi_group *group;
 	struct task_struct *task_to_destroy = NULL;
@@ -1386,11 +1386,10 @@ void psi_trigger_destroy(struct psi_trigger *t)
 
 	group = t->group;
 	/*
-	 * Wakeup waiters to stop polling and clear the queue to prevent it from
-	 * being accessed later. Can happen if cgroup is deleted from under a
-	 * polling process.
+	 * Wakeup waiters to stop polling. Can happen if cgroup is deleted
+	 * from under a polling process.
 	 */
-	wake_up_pollfree(&t->event_wait);
+	wake_up_interruptible(&t->event_wait);
 
 	if (t->aggregator == PSI_AVGS) {
 		mutex_lock(&group->avgs_lock);
@@ -1446,7 +1445,6 @@ void psi_trigger_destroy(struct psi_trigger *t)
 		kthread_stop(task_to_destroy);
 		atomic_set(&group->rtpoll_scheduled, 0);
 	}
-	kfree(t);
 }
 
 __poll_t psi_trigger_poll(void **trigger_ptr,
@@ -1573,6 +1571,7 @@ static int psi_fop_release(struct inode *inode, struct file *file)
 {
 	struct seq_file *seq = file->private_data;
 
+	psi_trigger_disable(seq->private);
 	psi_trigger_destroy(seq->private);
 	return single_release(inode, file);
 }
-- 
2.41.0.162.gfafddb0af9-goog


  reply	other threads:[~2023-06-26 20:17 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-26 20:17 [PATCH 1/2] kernfs: add kernfs_ops.free operation to free resources tied to the file Suren Baghdasaryan
2023-06-26 20:17 ` Suren Baghdasaryan [this message]
2023-06-26 20:21 ` Suren Baghdasaryan
2023-06-26 20:31 ` Tejun Heo
2023-06-26 20:39   ` Suren Baghdasaryan
2023-06-27  8:24   ` Christian Brauner
2023-06-27 17:09     ` Suren Baghdasaryan
2023-06-27 17:30       ` Christian Brauner
2023-06-27 17:36         ` Suren Baghdasaryan
2023-06-27 18:42         ` Tejun Heo
2023-06-27 20:09           ` Suren Baghdasaryan
2023-06-27 21:43             ` Suren Baghdasaryan
2023-06-27 21:58               ` Suren Baghdasaryan
2023-06-28  1:54                 ` Tejun Heo
2023-06-28  3:09                   ` Suren Baghdasaryan
2023-06-28  7:26                     ` Christian Brauner
2023-06-28  7:46                       ` Suren Baghdasaryan
2023-06-28  8:41                         ` Christian Brauner
2023-06-28 16:28                           ` Suren Baghdasaryan
2023-06-28 17:35                             ` Christian Brauner
2023-06-28 18:02                               ` Tejun Heo
2023-06-28 18:18                                 ` Suren Baghdasaryan
2023-06-28 18:42                                   ` Greg KH
2023-06-28 20:12                                     ` Suren Baghdasaryan
2023-06-28 20:34                                       ` Tejun Heo
2023-06-28 21:50                                         ` Suren Baghdasaryan
2023-06-30  0:59                                           ` Suren Baghdasaryan
2023-06-30  8:21                                             ` Christian Brauner
2023-07-10 20:38                                               ` Tejun Heo
2023-06-28 17:58                       ` Tejun Heo
2023-06-27  6:25 ` Greg KH
2023-06-27 17:03   ` Suren Baghdasaryan
2023-06-27 17:23     ` Christian Brauner
2023-06-27 17:36     ` Matthew Wilcox

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230626201713.1204982-2-surenb@google.com \
    --to=surenb@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=brauner@kernel.org \
    --cc=bristot@redhat.com \
    --cc=bsegall@google.com \
    --cc=cgroups@vger.kernel.org \
    --cc=dietmar.eggemann@arm.com \
    --cc=ebiggers@kernel.org \
    --cc=gregkh@linuxfoundation.org \
    --cc=hannes@cmpxchg.org \
    --cc=juri.lelli@redhat.com \
    --cc=kernel-team@android.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lizefan.x@bytedance.com \
    --cc=lujialin4@huawei.com \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=oleg@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=tj@kernel.org \
    --cc=vincent.guittot@linaro.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=vschneid@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).