All of lore.kernel.org
 help / color / mirror / Atom feed
From: Greg KH <gregkh@suse.de>
To: linux-kernel@vger.kernel.org, stable@kernel.org
Cc: Justin Forbes <jmforbes@linuxtx.org>,
	Zwane Mwaikambo <zwane@arm.linux.org.uk>,
	"Theodore Ts'o" <tytso@mit.edu>,
	Randy Dunlap <rdunlap@xenotime.net>,
	Dave Jones <davej@redhat.com>,
	Chuck Wolber <chuckw@quantumlinux.com>,
	Chris Wedgwood <reviews@ml.cw.f00f.org>,
	Michael Krufky <mkrufky@linuxtv.org>,
	Chuck Ebbert <cebbert@redhat.com>,
	Domenico Andreoli <cavokz@gmail.com>, Willy Tarreau <w@1wt.eu>,
	Rodrigo Rubira Branco <rbranco@la.checkpoint.com>,
	Jake Edge <jake@lwn.net>, Eugene Teo <eteo@redhat.com>,
	torvalds@linux-foundation.org, akpm@linux-foundation.org,
	alan@lxorguk.ukuu.org.uk, Al Viro <viro@zeniv.linux.org.uk>
Subject: [patch 40/58] New locking/refcounting for fs_struct
Date: Wed, 06 May 2009 14:46:08 -0700	[thread overview]
Message-ID: <20090506214800.844554430@mini.kroah.org> (raw)
In-Reply-To: <20090506215017.GA21981@kroah.com>

[-- Attachment #1: new-locking-refcounting-for-fs_struct.patch --]
[-- Type: text/plain, Size: 9847 bytes --]

2.6.29-stable review patch.  If anyone has any objections, please let us know.

------------------

From: Al Viro <viro@zeniv.linux.org.uk>

commit 498052bba55ecaff58db6a1436b0e25bfd75a7ff upstream.

* all changes of current->fs are done under task_lock and write_lock of
  old fs->lock
* refcount is not atomic anymore (same protection)
* its decrements are done when removing reference from current; at the
  same time we decide whether to free it.
* put_fs_struct() is gone
* new field - ->in_exec.  Set by check_unsafe_exec() if we are trying to do
  execve() and only subthreads share fs_struct.  Cleared when finishing exec
  (success and failure alike).  Makes CLONE_FS fail with -EAGAIN if set.
* check_unsafe_exec() may fail with -EAGAIN if another execve() from subthread
  is in progress.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

---
 fs/compat.c               |   16 +++++++++-
 fs/exec.c                 |   31 +++++++++++++++++---
 fs/fs_struct.c            |   69 ++++++++++++++++++++++++++++++++--------------
 fs/internal.h             |    2 -
 fs/proc/task_nommu.c      |    2 -
 include/linux/fs_struct.h |    8 ++---
 kernel/fork.c             |   37 ++++++++++++++++++------
 7 files changed, 121 insertions(+), 44 deletions(-)

--- a/fs/compat.c
+++ b/fs/compat.c
@@ -51,6 +51,7 @@
 #include <linux/poll.h>
 #include <linux/mm.h>
 #include <linux/eventpoll.h>
+#include <linux/fs_struct.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -1412,12 +1413,15 @@ int compat_do_execve(char * filename,
 	bprm->cred = prepare_exec_creds();
 	if (!bprm->cred)
 		goto out_unlock;
-	check_unsafe_exec(bprm);
+
+	retval = check_unsafe_exec(bprm);
+	if (retval)
+		goto out_unlock;
 
 	file = open_exec(filename);
 	retval = PTR_ERR(file);
 	if (IS_ERR(file))
-		goto out_unlock;
+		goto out_unmark;
 
 	sched_exec();
 
@@ -1459,6 +1463,9 @@ int compat_do_execve(char * filename,
 		goto out;
 
 	/* execve succeeded */
+	write_lock(&current->fs->lock);
+	current->fs->in_exec = 0;
+	write_unlock(&current->fs->lock);
 	mutex_unlock(&current->cred_exec_mutex);
 	acct_update_integrals(current);
 	free_bprm(bprm);
@@ -1476,6 +1483,11 @@ out_file:
 		fput(bprm->file);
 	}
 
+out_unmark:
+	write_lock(&current->fs->lock);
+	current->fs->in_exec = 0;
+	write_unlock(&current->fs->lock);
+
 out_unlock:
 	mutex_unlock(&current->cred_exec_mutex);
 
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1049,16 +1049,18 @@ EXPORT_SYMBOL(install_exec_creds);
  * - the caller must hold current->cred_exec_mutex to protect against
  *   PTRACE_ATTACH
  */
-void check_unsafe_exec(struct linux_binprm *bprm)
+int check_unsafe_exec(struct linux_binprm *bprm)
 {
 	struct task_struct *p = current, *t;
 	unsigned long flags;
 	unsigned n_fs, n_sighand;
+	int res = 0;
 
 	bprm->unsafe = tracehook_unsafe_exec(p);
 
 	n_fs = 1;
 	n_sighand = 1;
+	write_lock(&p->fs->lock);
 	lock_task_sighand(p, &flags);
 	for (t = next_thread(p); t != p; t = next_thread(t)) {
 		if (t->fs == p->fs)
@@ -1066,11 +1068,19 @@ void check_unsafe_exec(struct linux_binp
 		n_sighand++;
 	}
 
-	if (atomic_read(&p->fs->count) > n_fs ||
-	    atomic_read(&p->sighand->count) > n_sighand)
+	if (p->fs->users > n_fs ||
+	    atomic_read(&p->sighand->count) > n_sighand) {
 		bprm->unsafe |= LSM_UNSAFE_SHARE;
+	} else {
+		if (p->fs->in_exec)
+			res = -EAGAIN;
+		p->fs->in_exec = 1;
+	}
 
 	unlock_task_sighand(p, &flags);
+	write_unlock(&p->fs->lock);
+
+	return res;
 }
 
 /* 
@@ -1285,12 +1295,15 @@ int do_execve(char * filename,
 	bprm->cred = prepare_exec_creds();
 	if (!bprm->cred)
 		goto out_unlock;
-	check_unsafe_exec(bprm);
+
+	retval = check_unsafe_exec(bprm);
+	if (retval)
+		goto out_unlock;
 
 	file = open_exec(filename);
 	retval = PTR_ERR(file);
 	if (IS_ERR(file))
-		goto out_unlock;
+		goto out_unmark;
 
 	sched_exec();
 
@@ -1333,6 +1346,9 @@ int do_execve(char * filename,
 		goto out;
 
 	/* execve succeeded */
+	write_lock(&current->fs->lock);
+	current->fs->in_exec = 0;
+	write_unlock(&current->fs->lock);
 	mutex_unlock(&current->cred_exec_mutex);
 	acct_update_integrals(current);
 	free_bprm(bprm);
@@ -1350,6 +1366,11 @@ out_file:
 		fput(bprm->file);
 	}
 
+out_unmark:
+	write_lock(&current->fs->lock);
+	current->fs->in_exec = 0;
+	write_unlock(&current->fs->lock);
+
 out_unlock:
 	mutex_unlock(&current->cred_exec_mutex);
 
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -72,25 +72,27 @@ void chroot_fs_refs(struct path *old_roo
 		path_put(old_root);
 }
 
-void put_fs_struct(struct fs_struct *fs)
+void free_fs_struct(struct fs_struct *fs)
 {
-	/* No need to hold fs->lock if we are killing it */
-	if (atomic_dec_and_test(&fs->count)) {
-		path_put(&fs->root);
-		path_put(&fs->pwd);
-		kmem_cache_free(fs_cachep, fs);
-	}
+	path_put(&fs->root);
+	path_put(&fs->pwd);
+	kmem_cache_free(fs_cachep, fs);
 }
 
 void exit_fs(struct task_struct *tsk)
 {
-	struct fs_struct * fs = tsk->fs;
+	struct fs_struct *fs = tsk->fs;
 
 	if (fs) {
+		int kill;
 		task_lock(tsk);
+		write_lock(&fs->lock);
 		tsk->fs = NULL;
+		kill = !--fs->users;
+		write_unlock(&fs->lock);
 		task_unlock(tsk);
-		put_fs_struct(fs);
+		if (kill)
+			free_fs_struct(fs);
 	}
 }
 
@@ -99,7 +101,8 @@ struct fs_struct *copy_fs_struct(struct 
 	struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
 	/* We don't need to lock fs - think why ;-) */
 	if (fs) {
-		atomic_set(&fs->count, 1);
+		fs->users = 1;
+		fs->in_exec = 0;
 		rwlock_init(&fs->lock);
 		fs->umask = old->umask;
 		read_lock(&old->lock);
@@ -114,28 +117,54 @@ struct fs_struct *copy_fs_struct(struct 
 
 int unshare_fs_struct(void)
 {
-	struct fs_struct *fsp = copy_fs_struct(current->fs);
-	if (!fsp)
+	struct fs_struct *fs = current->fs;
+	struct fs_struct *new_fs = copy_fs_struct(fs);
+	int kill;
+
+	if (!new_fs)
 		return -ENOMEM;
-	exit_fs(current);
-	current->fs = fsp;
+
+	task_lock(current);
+	write_lock(&fs->lock);
+	kill = !--fs->users;
+	current->fs = new_fs;
+	write_unlock(&fs->lock);
+	task_unlock(current);
+
+	if (kill)
+		free_fs_struct(fs);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(unshare_fs_struct);
 
 /* to be mentioned only in INIT_TASK */
 struct fs_struct init_fs = {
-	.count		= ATOMIC_INIT(1),
+	.users		= 1,
 	.lock		= __RW_LOCK_UNLOCKED(init_fs.lock),
 	.umask		= 0022,
 };
 
 void daemonize_fs_struct(void)
 {
-	struct fs_struct *fs;
+	struct fs_struct *fs = current->fs;
 
-	exit_fs(current);	/* current->fs->count--; */
-	fs = &init_fs;
-	current->fs = fs;
-	atomic_inc(&fs->count);
+	if (fs) {
+		int kill;
+
+		task_lock(current);
+
+		write_lock(&init_fs.lock);
+		init_fs.users++;
+		write_unlock(&init_fs.lock);
+
+		write_lock(&fs->lock);
+		current->fs = &init_fs;
+		kill = !--fs->users;
+		write_unlock(&fs->lock);
+
+		task_unlock(current);
+		if (kill)
+			free_fs_struct(fs);
+	}
 }
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -44,7 +44,7 @@ extern void __init chrdev_init(void);
 /*
  * exec.c
  */
-extern void check_unsafe_exec(struct linux_binprm *);
+extern int check_unsafe_exec(struct linux_binprm *);
 
 /*
  * namespace.c
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -49,7 +49,7 @@ void task_mem(struct seq_file *m, struct
 	else
 		bytes += kobjsize(mm);
 	
-	if (current->fs && atomic_read(&current->fs->count) > 1)
+	if (current->fs && current->fs->users > 1)
 		sbytes += kobjsize(current->fs);
 	else
 		bytes += kobjsize(current->fs);
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -4,12 +4,10 @@
 #include <linux/path.h>
 
 struct fs_struct {
-	atomic_t count;	/* This usage count is used by check_unsafe_exec() for
-			 * security checking purposes - therefore it may not be
-			 * incremented, except by clone(CLONE_FS).
-			 */
+	int users;
 	rwlock_t lock;
 	int umask;
+	int in_exec;
 	struct path root, pwd;
 };
 
@@ -19,7 +17,7 @@ extern void exit_fs(struct task_struct *
 extern void set_fs_root(struct fs_struct *, struct path *);
 extern void set_fs_pwd(struct fs_struct *, struct path *);
 extern struct fs_struct *copy_fs_struct(struct fs_struct *);
-extern void put_fs_struct(struct fs_struct *);
+extern void free_fs_struct(struct fs_struct *);
 extern void daemonize_fs_struct(void);
 extern int unshare_fs_struct(void);
 
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -678,11 +678,19 @@ fail_nomem:
 
 static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
 {
+	struct fs_struct *fs = current->fs;
 	if (clone_flags & CLONE_FS) {
-		atomic_inc(&current->fs->count);
+		/* tsk->fs is already what we want */
+		write_lock(&fs->lock);
+		if (fs->in_exec) {
+			write_unlock(&fs->lock);
+			return -EAGAIN;
+		}
+		fs->users++;
+		write_unlock(&fs->lock);
 		return 0;
 	}
-	tsk->fs = copy_fs_struct(current->fs);
+	tsk->fs = copy_fs_struct(fs);
 	if (!tsk->fs)
 		return -ENOMEM;
 	return 0;
@@ -1518,12 +1526,16 @@ static int unshare_fs(unsigned long unsh
 {
 	struct fs_struct *fs = current->fs;
 
-	if ((unshare_flags & CLONE_FS) &&
-	    (fs && atomic_read(&fs->count) > 1)) {
-		*new_fsp = copy_fs_struct(current->fs);
-		if (!*new_fsp)
-			return -ENOMEM;
-	}
+	if (!(unshare_flags & CLONE_FS) || !fs)
+		return 0;
+
+	/* don't need lock here; in the worst case we'll do useless copy */
+	if (fs->users == 1)
+		return 0;
+
+	*new_fsp = copy_fs_struct(fs);
+	if (!*new_fsp)
+		return -ENOMEM;
 
 	return 0;
 }
@@ -1639,8 +1651,13 @@ SYSCALL_DEFINE1(unshare, unsigned long, 
 
 		if (new_fs) {
 			fs = current->fs;
+			write_lock(&fs->lock);
 			current->fs = new_fs;
-			new_fs = fs;
+			if (--fs->users)
+				new_fs = NULL;
+			else
+				new_fs = fs;
+			write_unlock(&fs->lock);
 		}
 
 		if (new_mm) {
@@ -1679,7 +1696,7 @@ bad_unshare_cleanup_sigh:
 
 bad_unshare_cleanup_fs:
 	if (new_fs)
-		put_fs_struct(new_fs);
+		free_fs_struct(new_fs);
 
 bad_unshare_cleanup_thread:
 bad_unshare_out:



  parent reply	other threads:[~2009-05-06 22:11 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20090506214528.660389067@mini.kroah.org>
2009-05-06 21:50 ` [patch 00/58] 2.6.29.3-stable review Greg KH
2009-05-06 21:45   ` [patch 01/58] forcedeth: Fix resume from hibernation regression Greg KH
2009-05-06 21:45   ` [patch 02/58] mac80211: Fix bug in getting rx status for frames pending in reorder buffer Greg KH
2009-05-06 21:45   ` [patch 03/58] b43: Poison RX buffers Greg KH
2009-05-06 21:45   ` [patch 04/58] b43: Refresh RX poison on buffer recycling Greg KH
2009-05-06 21:45   ` [patch 05/58] thinkpad-acpi: fix LED blinking through timer trigger Greg KH
2009-05-06 21:45   ` [patch 06/58] ALSA: us122l: add snd_us122l_free() Greg KH
2009-05-06 21:45   ` [patch 07/58] mac80211: fix basic rate bitmap calculation Greg KH
2009-05-06 21:45   ` [patch 08/58] KVM: MMU: Fix off-by-one calculating large page count Greg KH
2009-05-06 21:45   ` [patch 09/58] KVM: MMU: disable global page optimization Greg KH
2009-05-06 21:45   ` [patch 10/58] KVM: Fix overlapping check for memory slots Greg KH
2009-05-06 21:45   ` [patch 11/58] KVM: x86: release time_page on vcpu destruction Greg KH
2009-05-06 21:45   ` [patch 12/58] USB: Unusual Device support for Gold MP3 Player Energy Greg KH
2009-05-06 21:45   ` [patch 13/58] virtio-rng: Remove false BUG for spurious callbacks Greg KH
2009-05-06 21:45   ` [patch 14/58] b44: Use kernel DMA addresses for the kernel DMA API Greg KH
2009-05-06 21:45   ` [patch 15/58] block: include empty disks in /proc/diskstats Greg KH
2009-05-06 21:45   ` [patch 16/58] crypto: ixp4xx - Fix handling of chained sg buffers Greg KH
2009-05-06 21:45   ` [patch 17/58] exit_notify: kill the wrong capable(CAP_KILL) check (CVE-2009-1337) Greg KH
2009-05-06 21:45   ` [patch 18/58] PCI: fix incorrect mask of PM No_Soft_Reset bit Greg KH
2009-05-06 21:45   ` [patch 19/58] unreached code in selinux_ip_postroute_iptables_compat() (CVE-2009-1184) Greg KH
2009-05-06 21:45   ` [patch 20/58] drm/i915: add support for G41 chipset Greg KH
2009-05-06 21:45   ` [patch 21/58] x86-64: fix FPU corruption with signals and preemption Greg KH
2009-05-06 21:45   ` [patch 22/58] x86/PCI: dont call e820_all_mapped with -1 in the mmconfig case Greg KH
2009-05-06 21:45   ` [patch 23/58] ASoC: Fix offset of freqmode in WM8580 PLL configuration Greg KH
2009-05-06 21:45   ` [patch 24/58] PCI quirk: disable MSI on VIA VT3364 chipsets Greg KH
2009-05-06 21:45   ` [patch 25/58] bio: fix memcpy corruption in bio_copy_user_iov() Greg KH
2009-05-06 21:45   ` [patch 26/58] drm/i915: allow tiled front buffers on 965+ Greg KH
2009-05-06 21:45   ` [patch 27/58] pagemap: require aligned-length, non-null reads of /proc/pid/pagemap Greg KH
2009-05-06 21:45   ` [patch 28/58] kbuild: fix Module.markers permission error under cygwin Greg KH
2009-05-06 21:45   ` [patch 29/58] ptrace: ptrace_attach: fix the usage of ->cred_exec_mutex Greg KH
2009-05-06 21:45   ` [patch 30/58] USB: serial: fix lifetime and locking problems Greg KH
2009-05-06 21:45   ` [patch 31/58] ACPI: Revert conflicting workaround for BIOS w/ mangled PRT entries Greg KH
2009-05-06 21:46   ` [patch 32/58] powerpc: Sanitize stack pointer in signal handling code Greg KH
2009-05-06 21:46   ` [patch 33/58] compat_do_execve should unshare_files Greg KH
2009-05-06 21:46   ` [patch 34/58] fix setuid sometimes doesnt Greg KH
2009-05-06 21:46   ` [patch 35/58] fix setuid sometimes wouldnt Greg KH
2009-05-06 21:46   ` [patch 36/58] Annotate struct fs_structs usage count restriction Greg KH
2009-05-06 21:46   ` [patch 37/58] Kill unsharing fs_struct in __set_personality() Greg KH
2009-05-06 21:46   ` [patch 38/58] Get rid of bumping fs_struct refcount in pivot_root(2) Greg KH
2009-05-06 21:46   ` [patch 39/58] Take fs_struct handling to new file (fs/fs_struct.c) Greg KH
2009-05-06 21:46   ` Greg KH [this message]
2009-05-06 21:46   ` [patch 41/58] check_unsafe_exec() doesnt care about signal handlers sharing Greg KH
2009-05-06 21:46   ` [patch 42/58] do_execve() must not clear fs->in_exec if it was set by another thread Greg KH
2009-05-06 21:46   ` [patch 43/58] check_unsafe_exec: s/lock_task_sighand/rcu_read_lock/ Greg KH
2009-05-06 21:46   ` [patch 44/58] mv643xx_eth: 64bit mib counter read fix Greg KH
2009-05-06 21:46   ` [patch 45/58] mv643xx_eth: OOM handling fixes Greg KH
2009-05-06 21:46   ` [patch 46/58] ath5k: fix buffer overrun in rate debug code Greg KH
2009-05-06 21:46   ` [patch 47/58] proc: avoid information leaks to non-privileged processes Greg KH
2009-05-06 21:46   ` [patch 48/58] cs5536: define dma_sff_read_status() method Greg KH
2009-05-06 21:46   ` [patch 49/58] intel-iommu: Fix device-to-iommu mapping for PCI-PCI bridges Greg KH
2009-05-06 21:46   ` [patch 50/58] intel-iommu: Fix oops in device_to_iommu() when devices not found Greg KH
2009-05-06 21:46   ` [patch 51/58] intel-iommu: Avoid panic() for DRHD at address zero Greg KH
2009-05-06 21:46   ` [patch 52/58] clockevents: prevent endless loop in tick_handle_periodic() Greg KH
2009-05-06 21:46   ` [patch 53/58] Ignore madvise(MADV_WILLNEED) for hugetlbfs-backed regions Greg KH
2009-05-06 21:46   ` [patch 54/58] mm: fix Committed_AS underflow on large NR_CPUS environment Greg KH
2009-05-06 21:46   ` [patch 55/58] rndis_wlan: fix initialization order for workqueue&workers Greg KH
2009-05-06 21:46   ` [patch 56/58] sched: account system time properly Greg KH
2009-05-06 21:46   ` [patch 57/58] tracing: x86, mmiotrace: fix range test Greg KH
2009-05-06 22:12     ` Steven Rostedt
2009-05-06 21:46   ` [patch 58/58] ath9k: Fix FIF_BCN_PRBRESP_PROMISC handling Greg KH
2009-05-06 21:46     ` Greg KH
2009-05-07  0:58   ` [patch 00/58] 2.6.29.3-stable review Stefan Lippers-Hollmann
2009-05-07  1:26     ` Greg KH
2009-05-07 17:23   ` Chris Frey
2009-05-07 17:49     ` Steve French
2009-05-07 22:13     ` Greg KH
2009-05-08  4:33       ` Suresh Jayaraman
2009-05-08  5:13         ` Greg KH

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090506214800.844554430@mini.kroah.org \
    --to=gregkh@suse.de \
    --cc=akpm@linux-foundation.org \
    --cc=alan@lxorguk.ukuu.org.uk \
    --cc=cavokz@gmail.com \
    --cc=cebbert@redhat.com \
    --cc=chuckw@quantumlinux.com \
    --cc=davej@redhat.com \
    --cc=eteo@redhat.com \
    --cc=jake@lwn.net \
    --cc=jmforbes@linuxtx.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mkrufky@linuxtv.org \
    --cc=rbranco@la.checkpoint.com \
    --cc=rdunlap@xenotime.net \
    --cc=reviews@ml.cw.f00f.org \
    --cc=stable@kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=tytso@mit.edu \
    --cc=viro@zeniv.linux.org.uk \
    --cc=w@1wt.eu \
    --cc=zwane@arm.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.