linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Oleg Nesterov <oleg@redhat.com>
To: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>,
	linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
	David Howells <dhowells@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Sasha Levin <levinsasha928@gmail.com>,
	Cyrill Gorcunov <gorcunov@openvz.org>,
	"David S. Miller" <davem@davemloft.net>,
	"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Subject: Re: [PATCH, RESEND] procfs: silence lockdep warning about read vs. exec seq_file
Date: Sun, 3 Aug 2014 18:44:52 +0200	[thread overview]
Message-ID: <20140803164452.GA14626@redhat.com> (raw)
In-Reply-To: <1407010227-2269-1-git-send-email-kirill@shutemov.name>

Sorry for delay,

On 08/02, Kirill A. Shutemov wrote:
>
> +/*
> + * proc_pid_personality() and proc_pid_stack() take cred_guard_mutex via
> + * lock_trace()

And at first glance they lock_trace() can die. But lets temporary ignore,
m_start() is trickier.

> +static struct lock_class_key pid_maps_seq_file_lock;
> +
>  void task_mem(struct seq_file *m, struct mm_struct *mm)
>  {
>  	unsigned long data, text, lib, swap;
> @@ -242,6 +254,7 @@ static int do_maps_open(struct inode *inode, struct file *file,
>  		ret = seq_open(file, ops);
>  		if (!ret) {
>  			struct seq_file *m = file->private_data;
> +			lockdep_set_class(&m->lock, &pid_maps_seq_file_lock);

Perhaps lockdep_set_subclass() would be better... But this doesn't matter.

The question is, why m_start() calls mm_access(). This is not even
strictly correct if the task execs between m_stop() + m_start().

Can't we do something like below? The patch is obviously horrible and
incomplete, just to explain what I meant. Basically this is what
proc_mem_operations does.

Oleg.


diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 3ab6d14..c16b70e 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -267,6 +267,7 @@ extern int proc_remount(struct super_block *, int *, char *);
 struct proc_maps_private {
 	struct pid *pid;
 	struct task_struct *task;
+	struct mm_struct *mm;
 #ifdef CONFIG_MMU
 	struct vm_area_struct *tail_vma;
 #endif
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index cfa63ee..9b88248 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -165,9 +165,9 @@ static void *m_start(struct seq_file *m, loff_t *pos)
 	if (!priv->task)
 		return ERR_PTR(-ESRCH);
 
-	mm = mm_access(priv->task, PTRACE_MODE_READ);
-	if (!mm || IS_ERR(mm))
-		return mm;
+	mm = priv->mm;
+	if (!mm || !atomic_inc_not_zero(&mm->mm_users))
+		return NULL;
 	down_read(&mm->mmap_sem);
 
 	tail_vma = get_gate_vma(priv->task->mm);
@@ -231,6 +231,27 @@ static void m_stop(struct seq_file *m, void *v)
 		put_task_struct(priv->task);
 }
 
+// TODO: change __mem_open() to use this helper
+static struct mm_struct *xxx(struct inode *inode, struct file *file, unsigned int mode)
+{
+	struct task_struct *task = get_proc_task(inode);
+	struct mm_struct *mm = ERR_PTR(-ESRCH);
+
+	if (task) {
+		mm = mm_access(task, mode);
+		put_task_struct(task);
+
+		if (!IS_ERR_OR_NULL(mm)) {
+			/* ensure this mm_struct can't be freed */
+			atomic_inc(&mm->mm_count);
+			/* but do not pin its memory */
+			mmput(mm);
+		}
+	}
+
+	return mm;
+}
+
 static int do_maps_open(struct inode *inode, struct file *file,
 			const struct seq_operations *ops)
 {
@@ -239,17 +260,38 @@ static int do_maps_open(struct inode *inode, struct file *file,
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (priv) {
 		priv->pid = proc_pid(inode);
+		priv->mm = xxx(inode, file, PTRACE_MODE_READ);
+
+		// XXX cleanup me
+		if (IS_ERR(priv->mm)) {
+			ret = -EACCES;
+			goto free;
+		}
+
 		ret = seq_open(file, ops);
 		if (!ret) {
 			struct seq_file *m = file->private_data;
 			m->private = priv;
 		} else {
+			// XXX cleanup me
+			if (priv->mm)
+				mmdrop(priv->mm);
+ free:
 			kfree(priv);
 		}
 	}
 	return ret;
 }
 
+static int xxx_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq = file->private_data;
+	struct proc_maps_private *priv = seq->private;
+	if (priv->mm)
+		mmdrop(priv->mm);
+	return 0;
+}
+
 static void
 show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
 {
@@ -399,14 +441,14 @@ const struct file_operations proc_pid_maps_operations = {
 	.open		= pid_maps_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= seq_release_private,
+	.release	= xxx_release,
 };
 
 const struct file_operations proc_tid_maps_operations = {
 	.open		= tid_maps_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= seq_release_private,
+	.release	= xxx_release,
 };
 
 /*
@@ -682,14 +724,14 @@ const struct file_operations proc_pid_smaps_operations = {
 	.open		= pid_smaps_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= seq_release_private,
+	.release	= xxx_release,
 };
 
 const struct file_operations proc_tid_smaps_operations = {
 	.open		= tid_smaps_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= seq_release_private,
+	.release	= xxx_release,
 };
 
 /*

  reply	other threads:[~2014-08-03 16:44 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-08-02 20:10 [PATCH, RESEND] procfs: silence lockdep warning about read vs. exec seq_file Kirill A. Shutemov
2014-08-03 16:44 ` Oleg Nesterov [this message]
2014-08-03 21:18   ` [PATCH 0/5] (Was: procfs: silence lockdep warning about read vs. exec seq_file) Oleg Nesterov
2014-08-03 21:19     ` [PATCH 1/5] fs/proc/task_mmu.c: don't use task->mm in m_start() and show_*map() Oleg Nesterov
2014-08-03 21:19     ` [PATCH 2/5] fs/proc/task_mmu.c: unify/simplify do_maps_open() and numa_maps_open() Oleg Nesterov
2014-08-03 21:20     ` [PATCH 3/5] proc: introduce proc_mem_open() Oleg Nesterov
2014-08-03 21:20     ` [PATCH 4/5] fs/proc/task_mmu.c: introduce the "stable" proc_maps_private->mm Oleg Nesterov
2014-08-03 21:20     ` [PATCH 5/5] fs/proc/task_mmu.c: change m_start() to rely on priv->mm and avoid mm_access() Oleg Nesterov
2014-08-04  6:59     ` [PATCH 0/5] (Was: procfs: silence lockdep warning about read vs. exec seq_file) Cyrill Gorcunov
2014-08-04  9:20     ` Kirill A. Shutemov
2014-08-04 14:55       ` Oleg Nesterov
2014-08-05  3:42 ` [PATCH, RESEND] procfs: silence lockdep warning about read vs. exec seq_file Eric W. Biederman
2014-08-05  8:46   ` Kirill A. Shutemov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140803164452.GA14626@redhat.com \
    --to=oleg@redhat.com \
    --cc=davem@davemloft.net \
    --cc=dhowells@redhat.com \
    --cc=gorcunov@openvz.org \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=kirill@shutemov.name \
    --cc=levinsasha928@gmail.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=peterz@infradead.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).